From c78dba777669df16803f48459ea20651902157a0 Mon Sep 17 00:00:00 2001 From: Sweaterdog Date: Mon, 27 Jan 2025 16:15:12 -0800 Subject: [PATCH] Update huggingface.js Added Deepseek-R1 support, such as the qwen2.5 32b distill --- src/models/huggingface.js | 95 ++++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 20 deletions(-) diff --git a/src/models/huggingface.js b/src/models/huggingface.js index 56f9d55..e4e1432 100644 --- a/src/models/huggingface.js +++ b/src/models/huggingface.js @@ -1,44 +1,99 @@ -import {toSinglePrompt} from '../utils/text.js'; -import {getKey} from '../utils/keys.js'; -import {HfInference} from "@huggingface/inference"; +// huggingface.js +import { toSinglePrompt } from '../utils/text.js'; +import { getKey } from '../utils/keys.js'; +import { HfInference } from "@huggingface/inference"; export class HuggingFace { constructor(model_name, url) { - this.model_name = model_name.replace('huggingface/',''); + // Remove 'huggingface/' prefix if present + this.model_name = model_name.replace('huggingface/', ''); this.url = url; + // Hugging Face Inference doesn't currently allow custom base URLs if (this.url) { console.warn("Hugging Face doesn't support custom urls!"); } + // Initialize the HfInference instance this.huggingface = new HfInference(getKey('HUGGINGFACE_API_KEY')); } + /** + * Main method to handle chat requests. + */ async sendRequest(turns, systemMessage) { const stop_seq = '***'; - const prompt = toSinglePrompt(turns, null, stop_seq); - let model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B'; + // Convert the user's turns and systemMessage into a single prompt string + const prompt = toSinglePrompt(turns, null, stop_seq); + // Fallback model if none was provided + const model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B'; + + // Combine system message with the prompt const input = systemMessage + "\n" + prompt; + + // We'll collect the streaming response in this variable let res = ''; - try { - console.log('Awaiting Hugging Face API response...'); - for await (const chunk of this.huggingface.chatCompletionStream({ - model: model_name, - messages: [{ role: "user", content: input }] - })) { - res += (chunk.choices[0]?.delta?.content || ""); + console.log('Messages:', [{ role: "system", content: systemMessage }, ...turns]); + + // We'll do up to 5 attempts if the model is "DeepSeek-R1" and tags are mismatched + const maxAttempts = 5; + let attempt = 0; + let finalRes = null; + + while (attempt < maxAttempts) { + attempt++; + console.log(`Awaiting Hugging Face API response... (model: ${model_name}, attempt: ${attempt})`); + + res = ''; + try { + // ChatCompletionStream returns an async iterator that we consume chunk by chunk + for await (const chunk of this.huggingface.chatCompletionStream({ + model: model_name, + messages: [{ role: "user", content: input }] + })) { + // Each chunk may or may not have delta content + res += (chunk.choices[0]?.delta?.content || ""); + } + } catch (err) { + console.log(err); + res = 'My brain disconnected, try again.'; + // Exit the loop, as we only want to retry for block mismatches, not other errors + break; } - } catch (err) { - console.log(err); - res = 'My brain disconnected, try again.'; + + // If the model name includes "DeepSeek-R1", then handle blocks + if (this.model_name && this.model_name.toLowerCase().includes("deepseek-r1")) { + const hasOpenTag = res.includes(""); + const hasCloseTag = res.includes(""); + + // If there's a partial mismatch, attempt to regenerate the entire response + if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) { + console.warn("Partial block detected. Re-generating..."); + continue; + } + + // If both tags appear, remove them (and everything in between) + if (hasOpenTag && hasCloseTag) { + res = res.replace(/[\s\S]*?<\/think>/g, '').trim(); + } + } + + // We made it here with either a valid or no-think scenario + finalRes = res; + break; // Stop retrying + } + + // If after max attempts we couldn't get a matched or valid response + if (finalRes == null) { + console.warn("Could not get a valid block or normal response after max attempts."); + finalRes = 'Response incomplete, please try again.'; } console.log('Received.'); - console.log(res); - return res; + // Return the final (possibly trimmed) response + return finalRes; } - async embed(text) { throw new Error('Embeddings are not supported by HuggingFace.'); } -} \ No newline at end of file +}