mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-07-18 14:05:17 +02:00
Update huggingface.js
Added Deepseek-R1 support, such as the qwen2.5 32b distill
This commit is contained in:
parent
2b3ca165e8
commit
c78dba7776
1 changed files with 75 additions and 20 deletions
|
@ -1,44 +1,99 @@
|
|||
import {toSinglePrompt} from '../utils/text.js';
|
||||
import {getKey} from '../utils/keys.js';
|
||||
import {HfInference} from "@huggingface/inference";
|
||||
// huggingface.js
|
||||
import { toSinglePrompt } from '../utils/text.js';
|
||||
import { getKey } from '../utils/keys.js';
|
||||
import { HfInference } from "@huggingface/inference";
|
||||
|
||||
export class HuggingFace {
|
||||
constructor(model_name, url) {
|
||||
this.model_name = model_name.replace('huggingface/','');
|
||||
// Remove 'huggingface/' prefix if present
|
||||
this.model_name = model_name.replace('huggingface/', '');
|
||||
this.url = url;
|
||||
|
||||
// Hugging Face Inference doesn't currently allow custom base URLs
|
||||
if (this.url) {
|
||||
console.warn("Hugging Face doesn't support custom urls!");
|
||||
}
|
||||
|
||||
// Initialize the HfInference instance
|
||||
this.huggingface = new HfInference(getKey('HUGGINGFACE_API_KEY'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Main method to handle chat requests.
|
||||
*/
|
||||
async sendRequest(turns, systemMessage) {
|
||||
const stop_seq = '***';
|
||||
const prompt = toSinglePrompt(turns, null, stop_seq);
|
||||
let model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B';
|
||||
|
||||
// Convert the user's turns and systemMessage into a single prompt string
|
||||
const prompt = toSinglePrompt(turns, null, stop_seq);
|
||||
// Fallback model if none was provided
|
||||
const model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B';
|
||||
|
||||
// Combine system message with the prompt
|
||||
const input = systemMessage + "\n" + prompt;
|
||||
|
||||
// We'll collect the streaming response in this variable
|
||||
let res = '';
|
||||
try {
|
||||
console.log('Awaiting Hugging Face API response...');
|
||||
for await (const chunk of this.huggingface.chatCompletionStream({
|
||||
model: model_name,
|
||||
messages: [{ role: "user", content: input }]
|
||||
})) {
|
||||
res += (chunk.choices[0]?.delta?.content || "");
|
||||
console.log('Messages:', [{ role: "system", content: systemMessage }, ...turns]);
|
||||
|
||||
// We'll do up to 5 attempts if the model is "DeepSeek-R1" and <think> tags are mismatched
|
||||
const maxAttempts = 5;
|
||||
let attempt = 0;
|
||||
let finalRes = null;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
console.log(`Awaiting Hugging Face API response... (model: ${model_name}, attempt: ${attempt})`);
|
||||
|
||||
res = '';
|
||||
try {
|
||||
// ChatCompletionStream returns an async iterator that we consume chunk by chunk
|
||||
for await (const chunk of this.huggingface.chatCompletionStream({
|
||||
model: model_name,
|
||||
messages: [{ role: "user", content: input }]
|
||||
})) {
|
||||
// Each chunk may or may not have delta content
|
||||
res += (chunk.choices[0]?.delta?.content || "");
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
// Exit the loop, as we only want to retry for <think> block mismatches, not other errors
|
||||
break;
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
|
||||
// If the model name includes "DeepSeek-R1", then handle <think> blocks
|
||||
if (this.model_name && this.model_name.toLowerCase().includes("deepseek-r1")) {
|
||||
const hasOpenTag = res.includes("<think>");
|
||||
const hasCloseTag = res.includes("</think>");
|
||||
|
||||
// If there's a partial mismatch, attempt to regenerate the entire response
|
||||
if ((hasOpenTag && !hasCloseTag) || (!hasOpenTag && hasCloseTag)) {
|
||||
console.warn("Partial <think> block detected. Re-generating...");
|
||||
continue;
|
||||
}
|
||||
|
||||
// If both tags appear, remove them (and everything in between)
|
||||
if (hasOpenTag && hasCloseTag) {
|
||||
res = res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
}
|
||||
|
||||
// We made it here with either a valid or no-think scenario
|
||||
finalRes = res;
|
||||
break; // Stop retrying
|
||||
}
|
||||
|
||||
// If after max attempts we couldn't get a matched <think> or valid response
|
||||
if (finalRes == null) {
|
||||
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
||||
finalRes = 'Response incomplete, please try again.';
|
||||
}
|
||||
console.log('Received.');
|
||||
console.log(res);
|
||||
return res;
|
||||
// Return the final (possibly trimmed) response
|
||||
return finalRes;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by HuggingFace.');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue