mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-02 21:35:31 +02:00
![google-labs-jules[bot]](/assets/img/avatar_default.png)
- I implemented universal logging for all API providers in src/models/, ensuring calls to logger.js for text and vision logs. - I added transformation of <thinking>...</thinking> tags to <think>...</think> in all provider responses before logging, for correct categorization by logger.js. - I standardized the input to logger.js's log() function to be a JSON string of the message history (system prompt + turns). - I removed unnecessary comments from most API provider files, settings.js, and prompter.js to improve readability. Note: I encountered some issues that prevented final comment cleanup for qwen.js, vllm.js, and logger.js. Their core logging functionality and tag transformations (for qwen.js and vllm.js) are in place from previous steps.
73 lines
2.8 KiB
JavaScript
73 lines
2.8 KiB
JavaScript
import OpenAIApi from 'openai';
|
|
import { getKey } from '../utils/keys.js';
|
|
import { log, logVision } from '../../logger.js';
|
|
|
|
export class GLHF {
|
|
constructor(model_name, url) {
|
|
this.model_name = model_name;
|
|
const apiKey = getKey('GHLF_API_KEY');
|
|
if (!apiKey) {
|
|
throw new Error('API key not found. Please check keys.json and ensure GHLF_API_KEY is defined.');
|
|
}
|
|
this.openai = new OpenAIApi({
|
|
apiKey,
|
|
baseURL: url || "https://glhf.chat/api/openai/v1"
|
|
});
|
|
}
|
|
|
|
async sendRequest(turns, systemMessage, stop_seq = '***') {
|
|
let messages = [{ role: 'system', content: systemMessage }].concat(turns);
|
|
const pack = {
|
|
model: this.model_name || "hf:meta-llama/Llama-3.1-405B-Instruct",
|
|
messages,
|
|
stop: [stop_seq]
|
|
};
|
|
|
|
const maxAttempts = 5;
|
|
let attempt = 0;
|
|
let finalRes = null;
|
|
|
|
while (attempt < maxAttempts) {
|
|
attempt++;
|
|
console.log(`Awaiting glhf.chat API response... (attempt: ${attempt})`);
|
|
try {
|
|
let completion = await this.openai.chat.completions.create(pack);
|
|
if (completion.choices[0].finish_reason === 'length') {
|
|
throw new Error('Context length exceeded');
|
|
}
|
|
let res = completion.choices[0].message.content;
|
|
if (res.includes("<think>") && !res.includes("</think>")) {
|
|
console.warn("Partial <think> block detected. Re-generating...");
|
|
if (attempt < maxAttempts) continue;
|
|
}
|
|
if (res.includes("</think>") && !res.includes("<think>")) {
|
|
res = "<think>" + res;
|
|
}
|
|
finalRes = res.replace(/<\|separator\|>/g, '*no response*');
|
|
break;
|
|
} catch (err) {
|
|
if ((err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && turns.length > 1) {
|
|
console.log('Context length exceeded, trying again with shorter context.');
|
|
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
|
} else {
|
|
console.error(err);
|
|
finalRes = 'My brain disconnected, try again.';
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (finalRes === null) {
|
|
finalRes = "I thought too hard, sorry, try again";
|
|
}
|
|
|
|
if (typeof finalRes === 'string') {
|
|
finalRes = finalRes.replace(/<thinking>/g, '<think>').replace(/<\/thinking>/g, '</think>');
|
|
}
|
|
log(JSON.stringify(messages), finalRes);
|
|
return finalRes;
|
|
}
|
|
|
|
async embed(text) {
|
|
throw new Error('Embeddings are not supported by glhf.');
|
|
}
|
|
}
|