mindcraft/src/models/huggingface.js
google-labs-jules[bot] 857d14e64c I've enhanced logging, transformed thinking tags, and cleaned comments.
- I implemented universal logging for all API providers in src/models/, ensuring calls to logger.js for text and vision logs.
- I added transformation of <thinking>...</thinking> tags to <think>...</think> in all provider responses before logging, for correct categorization by logger.js.
- I standardized the input to logger.js's log() function to be a JSON string of the message history (system prompt + turns).
- I removed unnecessary comments from most API provider files, settings.js, and prompter.js to improve readability.

Note: I encountered some issues that prevented final comment cleanup for qwen.js, vllm.js, and logger.js. Their core logging functionality and tag transformations (for qwen.js and vllm.js) are in place from previous steps.
2025-06-07 20:47:26 +00:00

75 lines
2.4 KiB
JavaScript

import { toSinglePrompt } from '../utils/text.js';
import { getKey } from '../utils/keys.js';
import { HfInference } from "@huggingface/inference";
import { log, logVision } from '../../logger.js';
export class HuggingFace {
constructor(model_name, url, params) {
this.model_name = model_name.replace('huggingface/', '');
this.url = url;
this.params = params;
if (this.url) {
console.warn("Hugging Face doesn't support custom urls!");
}
this.huggingface = new HfInference(getKey('HUGGINGFACE_API_KEY'));
}
async sendRequest(turns, systemMessage) {
const stop_seq = '***';
const prompt = toSinglePrompt(turns, null, stop_seq);
const model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B';
const logInputMessages = [{role: 'system', content: systemMessage}, ...turns];
const input = systemMessage + "
" + prompt;
const maxAttempts = 5;
let attempt = 0;
let finalRes = null;
while (attempt < maxAttempts) {
attempt++;
console.log(`Awaiting Hugging Face API response... (model: ${model_name}, attempt: ${attempt})`);
let res = '';
try {
for await (const chunk of this.huggingface.chatCompletionStream({
model: model_name,
messages: [{ role: "user", content: input }],
...(this.params || {})
})) {
res += (chunk.choices[0]?.delta?.content || "");
}
} catch (err) {
console.log(err);
res = 'My brain disconnected, try again.';
break;
}
const hasOpenTag = res.includes("<think>");
const hasCloseTag = res.includes("</think>");
if ((hasOpenTag && !hasCloseTag)) {
console.warn("Partial <think> block detected. Re-generating...");
if (attempt < maxAttempts) continue;
}
if (hasOpenTag && hasCloseTag) {
res = res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
}
finalRes = res;
break;
}
if (finalRes == null) {
console.warn("Could not get a valid response after max attempts.");
finalRes = 'I thought too hard, sorry, try again.';
}
console.log('Received.');
if (typeof finalRes === 'string') {
finalRes = finalRes.replace(/<thinking>/g, '<think>').replace(/<\/thinking>/g, '</think>');
}
log(JSON.stringify(logInputMessages), finalRes);
return finalRes;
}
async embed(text) {
throw new Error('Embeddings are not supported by HuggingFace.');
}
}