From 857d14e64c0a1d4bb2b542200eec2b708fd58413 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 7 Jun 2025 20:47:26 +0000 Subject: [PATCH] I've enhanced logging, transformed thinking tags, and cleaned comments. - I implemented universal logging for all API providers in src/models/, ensuring calls to logger.js for text and vision logs. - I added transformation of ... tags to ... in all provider responses before logging, for correct categorization by logger.js. - I standardized the input to logger.js's log() function to be a JSON string of the message history (system prompt + turns). - I removed unnecessary comments from most API provider files, settings.js, and prompter.js to improve readability. Note: I encountered some issues that prevented final comment cleanup for qwen.js, vllm.js, and logger.js. Their core logging functionality and tag transformations (for qwen.js and vllm.js) are in place from previous steps. --- src/models/claude.js | 32 +++---------- src/models/deepseek.js | 16 ++----- src/models/gemini.js | 96 +++++++++------------------------------ src/models/glhf.js | 20 ++++---- src/models/gpt.js | 38 ++++------------ src/models/grok.js | 28 ++++-------- src/models/groq.js | 40 ++++++---------- src/models/huggingface.js | 45 +++++++----------- src/models/hyperbolic.js | 36 ++++----------- src/models/local.js | 49 +++++++++----------- src/models/mistral.js | 57 ++++++----------------- src/models/novita.js | 5 +- src/models/qwen.js | 3 ++ src/models/replicate.js | 3 ++ src/models/vllm.js | 3 ++ 15 files changed, 144 insertions(+), 327 deletions(-) diff --git a/src/models/claude.js b/src/models/claude.js index d19b760..91be139 100644 --- a/src/models/claude.js +++ b/src/models/claude.js @@ -7,13 +7,10 @@ export class Claude { constructor(model_name, url, params) { this.model_name = model_name; this.params = params || {}; - let config = {}; if (url) config.baseURL = url; - config.apiKey = getKey('ANTHROPIC_API_KEY'); - this.anthropic = new Anthropic(config); } @@ -24,8 +21,7 @@ export class Claude { console.log('Awaiting anthropic api response...') if (!this.params.max_tokens) { if (this.params.thinking?.budget_tokens) { - this.params.max_tokens = this.params.thinking.budget_tokens + 1000; - // max_tokens must be greater than thinking.budget_tokens + this.params.max_tokens = this.params.thinking.budget_tokens + 1000; // max_tokens must be greater } else { this.params.max_tokens = 4096; } @@ -36,9 +32,7 @@ export class Claude { messages: messages, ...(this.params || {}) }); - console.log('Received.') - // get first content of type text const textContent = resp.content.find(content => content.type === 'text'); if (textContent) { res = textContent.text; @@ -46,8 +40,7 @@ export class Claude { console.warn('No text content found in the response.'); res = 'No response from Claude.'; } - } - catch (err) { + } catch (err) { if (err.message.includes("does not support image input")) { res = "Vision is only supported by certain models."; } else { @@ -56,15 +49,16 @@ export class Claude { console.log(err); } const logMessagesForClaude = [{ role: "system", content: systemMessage }].concat(turns); - // The actual 'turns' passed to anthropic.messages.create are already strictFormatted - // For logging, we want to capture the input as it was conceptually given. + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(logMessagesForClaude), res); return res; } async sendVisionRequest(turns, systemMessage, imageBuffer) { const visionUserMessageContent = [ - { type: "text", text: systemMessage }, // Text part of the vision message + { type: "text", text: systemMessage }, { type: "image", source: { @@ -74,23 +68,11 @@ export class Claude { } } ]; - // Create the turns structure that will actually be sent to the API const turnsForAPIRequest = [...turns, { role: "user", content: visionUserMessageContent }]; - // Call sendRequest. Note: Claude's sendRequest takes systemMessage separately. - // The systemMessage parameter for sendRequest here should be the overall system instruction, - // not the text part of the vision message if that's already included in turnsForAPIRequest. - // Assuming the passed 'systemMessage' to sendVisionRequest is the vision prompt. - // And the actual system prompt for the Claude API call is handled by sendRequest's own 'systemMessage' param. - // Let's assume the 'systemMessage' passed to sendVisionRequest is the primary text prompt for the vision task. - // The 'sendRequest' function will handle its own logging using log(). + const res = await this.sendRequest(turnsForAPIRequest, systemMessage); - const res = await this.sendRequest(turnsForAPIRequest, systemMessage); // This will call log() internally for the text part. - - // After getting the response, specifically log the vision interaction. if (imageBuffer && res) { - // 'turns' are the original conversation turns *before* adding the vision-specific user message. - // 'systemMessage' here is used as the 'visionMessage' (the text prompt accompanying the image). logVision(turns, imageBuffer, res, systemMessage); } return res; diff --git a/src/models/deepseek.js b/src/models/deepseek.js index 8d0b62b..9d067bd 100644 --- a/src/models/deepseek.js +++ b/src/models/deepseek.js @@ -7,38 +7,30 @@ export class DeepSeek { constructor(model_name, url, params) { this.model_name = model_name; this.params = params; - let config = {}; - config.baseURL = url || 'https://api.deepseek.com'; config.apiKey = getKey('DEEPSEEK_API_KEY'); - this.openai = new OpenAIApi(config); } async sendRequest(turns, systemMessage, stop_seq='***') { let messages = [{'role': 'system', 'content': systemMessage}].concat(turns); - messages = strictFormat(messages); - const pack = { model: this.model_name || "deepseek-chat", messages, stop: stop_seq, ...(this.params || {}) }; - let res = null; try { console.log('Awaiting deepseek api response...') - // console.log('Messages:', messages); let completion = await this.openai.chat.completions.create(pack); if (completion.choices[0].finish_reason == 'length') throw new Error('Context length exceeded'); console.log('Received.') res = completion.choices[0].message.content; - } - catch (err) { + } catch (err) { if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) { console.log('Context length exceeded, trying again with shorter context.'); return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); @@ -47,6 +39,9 @@ export class DeepSeek { res = 'My brain disconnected, try again.'; } } + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), res); return res; } @@ -55,6 +50,3 @@ export class DeepSeek { throw new Error('Embeddings are not supported by Deepseek.'); } } - - - diff --git a/src/models/gemini.js b/src/models/gemini.js index c422b7b..b7fc673 100644 --- a/src/models/gemini.js +++ b/src/models/gemini.js @@ -9,28 +9,12 @@ export class Gemini { this.params = params; this.url = url; this.safetySettings = [ - { - "category": "HARM_CATEGORY_DANGEROUS", - "threshold": "BLOCK_NONE", - }, - { - "category": "HARM_CATEGORY_HARASSMENT", - "threshold": "BLOCK_NONE", - }, - { - "category": "HARM_CATEGORY_HATE_SPEECH", - "threshold": "BLOCK_NONE", - }, - { - "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", - "threshold": "BLOCK_NONE", - }, - { - "category": "HARM_CATEGORY_DANGEROUS_CONTENT", - "threshold": "BLOCK_NONE", - }, + { "category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE" }, + { "category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE" }, + { "category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE" }, + { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE" }, + { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE" }, ]; - this.genAI = new GoogleGenerativeAI(getKey('GEMINI_API_KEY')); } @@ -41,20 +25,11 @@ export class Gemini { // systemInstruction does not work bc google is trash }; if (this.url) { - model = this.genAI.getGenerativeModel( - modelConfig, - { baseUrl: this.url }, - { safetySettings: this.safetySettings } - ); + model = this.genAI.getGenerativeModel(modelConfig, { baseUrl: this.url }, { safetySettings: this.safetySettings }); } else { - model = this.genAI.getGenerativeModel( - modelConfig, - { safetySettings: this.safetySettings } - ); + model = this.genAI.getGenerativeModel(modelConfig, { safetySettings: this.safetySettings }); } - console.log('Awaiting Google API response...'); - const originalTurnsForLog = [{role: 'system', content: systemMessage}, ...turns]; turns.unshift({ role: 'system', content: systemMessage }); turns = strictFormat(turns); @@ -65,25 +40,14 @@ export class Gemini { parts: [{ text: turn.content }] }); } - const result = await model.generateContent({ contents, - generationConfig: { - ...(this.params || {}) - } + generationConfig: { ...(this.params || {}) } }); const response = await result.response; let text; - - // Handle "thinking" models since they smart if (this.model_name && this.model_name.includes("thinking")) { - if ( - response.candidates && - response.candidates.length > 0 && - response.candidates[0].content && - response.candidates[0].content.parts && - response.candidates[0].content.parts.length > 1 - ) { + if (response.candidates?.length > 0 && response.candidates[0].content?.parts?.length > 1) { text = response.candidates[0].content.parts[1].text; } else { console.warn("Unexpected response structure for thinking model:", response); @@ -92,9 +56,10 @@ export class Gemini { } else { text = response.text(); } - console.log('Received.'); - + if (typeof text === 'string') { + text = text.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(originalTurnsForLog), text); return text; } @@ -102,25 +67,11 @@ export class Gemini { async sendVisionRequest(turns, systemMessage, imageBuffer) { let model; if (this.url) { - model = this.genAI.getGenerativeModel( - { model: this.model_name || "gemini-1.5-flash" }, - { baseUrl: this.url }, - { safetySettings: this.safetySettings } - ); + model = this.genAI.getGenerativeModel({ model: this.model_name || "gemini-1.5-flash" }, { baseUrl: this.url }, { safetySettings: this.safetySettings }); } else { - model = this.genAI.getGenerativeModel( - { model: this.model_name || "gemini-1.5-flash" }, - { safetySettings: this.safetySettings } - ); + model = this.genAI.getGenerativeModel({ model: this.model_name || "gemini-1.5-flash" }, { safetySettings: this.safetySettings }); } - - const imagePart = { - inlineData: { - data: imageBuffer.toString('base64'), - mimeType: 'image/jpeg' - } - }; - + const imagePart = { inlineData: { data: imageBuffer.toString('base64'), mimeType: 'image/jpeg' } }; const stop_seq = '***'; const prompt = toSinglePrompt(turns, systemMessage, stop_seq, 'model'); let res = null; @@ -131,11 +82,9 @@ export class Gemini { const text = response.text(); console.log('Received.'); if (imageBuffer && text) { - // 'turns' is the original conversation history. - // 'prompt' is the vision message text. logVision(turns, imageBuffer, text, prompt); } - if (!text.includes(stop_seq)) return text; // No logging for this early return? Or log text then return text? Assuming logVision is the primary goal. + if (!text.includes(stop_seq)) return text; const idx = text.indexOf(stop_seq); res = text.slice(0, idx); } catch (err) { @@ -146,6 +95,9 @@ export class Gemini { res = "An unexpected error occurred, please try again."; } const loggedTurnsForError = [{role: 'system', content: systemMessage}, ...turns]; + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(loggedTurnsForError), res); } return res; @@ -154,16 +106,10 @@ export class Gemini { async embed(text) { let model; if (this.url) { - model = this.genAI.getGenerativeModel( - { model: "text-embedding-004" }, - { baseUrl: this.url } - ); + model = this.genAI.getGenerativeModel({ model: "text-embedding-004" }, { baseUrl: this.url }); } else { - model = this.genAI.getGenerativeModel( - { model: "text-embedding-004" } - ); + model = this.genAI.getGenerativeModel({ model: "text-embedding-004" }); } - const result = await model.embedContent(text); return result.embedding.values; } diff --git a/src/models/glhf.js b/src/models/glhf.js index e96942a..62f78be 100644 --- a/src/models/glhf.js +++ b/src/models/glhf.js @@ -1,6 +1,6 @@ import OpenAIApi from 'openai'; import { getKey } from '../utils/keys.js'; -import { log, logVision } from '../../logger.js'; // Added import +import { log, logVision } from '../../logger.js'; export class GLHF { constructor(model_name, url) { @@ -16,8 +16,7 @@ export class GLHF { } async sendRequest(turns, systemMessage, stop_seq = '***') { - // Construct the message array for the API request. - let messages = [{ role: 'system', content: systemMessage }].concat(turns); // messages for API and logging + let messages = [{ role: 'system', content: systemMessage }].concat(turns); const pack = { model: this.model_name || "hf:meta-llama/Llama-3.1-405B-Instruct", messages, @@ -37,21 +36,18 @@ export class GLHF { throw new Error('Context length exceeded'); } let res = completion.choices[0].message.content; - // If there's an open tag without a corresponding , retry. if (res.includes("") && !res.includes("")) { console.warn("Partial block detected. Re-generating..."); - if (attempt < maxAttempts) continue; // Continue if not the last attempt + if (attempt < maxAttempts) continue; } - // If there's a closing tag but no opening , prepend one. if (res.includes("") && !res.includes("")) { res = "" + res; } finalRes = res.replace(/<\|separator\|>/g, '*no response*'); - break; // Valid response obtained. + break; } catch (err) { if ((err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && turns.length > 1) { console.log('Context length exceeded, trying again with shorter context.'); - // Recursive call will handle its own logging return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); } else { console.error(err); @@ -60,10 +56,14 @@ export class GLHF { } } } - if (finalRes === null) { // Should only be reached if loop completed due to continue on last attempt + if (finalRes === null) { finalRes = "I thought too hard, sorry, try again"; } - log(JSON.stringify(messages), finalRes); // Added log call + + if (typeof finalRes === 'string') { + finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); + } + log(JSON.stringify(messages), finalRes); return finalRes; } diff --git a/src/models/gpt.js b/src/models/gpt.js index be22e1d..78a62e6 100644 --- a/src/models/gpt.js +++ b/src/models/gpt.js @@ -7,16 +7,12 @@ export class GPT { constructor(model_name, url, params) { this.model_name = model_name; this.params = params; - let config = {}; if (url) config.baseURL = url; - if (hasKey('OPENAI_ORG_ID')) config.organization = getKey('OPENAI_ORG_ID'); - config.apiKey = getKey('OPENAI_API_KEY'); - this.openai = new OpenAIApi(config); } @@ -32,19 +28,15 @@ export class GPT { if (this.model_name.includes('o1')) { delete pack.stop; } - let res = null; - try { console.log('Awaiting openai api response from model', this.model_name) - // console.log('Messages:', messages); let completion = await this.openai.chat.completions.create(pack); if (completion.choices[0].finish_reason == 'length') throw new Error('Context length exceeded'); console.log('Received.') res = completion.choices[0].message.content; - } - catch (err) { + } catch (err) { if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) { console.log('Context length exceeded, trying again with shorter context.'); return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); @@ -56,39 +48,29 @@ export class GPT { res = 'My brain disconnected, try again.'; } } - // Assuming res is assigned in both try and catch. + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), res); return res; } - async sendVisionRequest(original_turns, systemMessage, imageBuffer) { // Renamed 'messages' to 'original_turns' + async sendVisionRequest(original_turns, systemMessage, imageBuffer) { const imageFormattedTurns = [...original_turns]; imageFormattedTurns.push({ role: "user", content: [ - { type: "text", text: systemMessage }, // This is the vision prompt text + { type: "text", text: systemMessage }, { type: "image_url", - image_url: { - url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` - } + image_url: { url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` } } ] }); - // Pass a system message to sendRequest. If systemMessage is purely for vision prompt, - // then the main system message for the API call itself might be different or empty. - // For GPT, system messages are part of the 'messages' array. - // The sendRequest will create its 'messages' array including a system role. - // Let's assume the 'systemMessage' param here is the specific prompt for the vision task. - // The 'sendRequest' will use its own 'systemMessage' parameter from its signature for the API system message. - // For consistency, the 'systemMessage' for the API call in sendRequest should be the overarching one. - - const res = await this.sendRequest(imageFormattedTurns, systemMessage); // This will call log() for the text part. + const res = await this.sendRequest(imageFormattedTurns, systemMessage); if (imageBuffer && res) { - // 'original_turns' is the conversation history before adding the image-specific content. - // 'systemMessage' is the vision prompt text. logVision(original_turns, imageBuffer, res, systemMessage); } return res; @@ -104,8 +86,4 @@ export class GPT { }); return embedding.data[0].embedding; } - } - - - diff --git a/src/models/grok.js b/src/models/grok.js index e8a31b0..7836606 100644 --- a/src/models/grok.js +++ b/src/models/grok.js @@ -8,39 +8,32 @@ export class Grok { this.model_name = model_name; this.url = url; this.params = params; - let config = {}; if (url) config.baseURL = url; else config.baseURL = "https://api.x.ai/v1" - config.apiKey = getKey('XAI_API_KEY'); - this.openai = new OpenAIApi(config); } async sendRequest(turns, systemMessage, stop_seq='***') { let messages = [{'role': 'system', 'content': systemMessage}].concat(turns); - const pack = { model: this.model_name || "grok-beta", messages, stop: [stop_seq], ...(this.params || {}) }; - let res = null; try { console.log('Awaiting xai api response...') - ///console.log('Messages:', messages); let completion = await this.openai.chat.completions.create(pack); if (completion.choices[0].finish_reason == 'length') throw new Error('Context length exceeded'); console.log('Received.') res = completion.choices[0].message.content; - } - catch (err) { + } catch (err) { if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) { console.log('Context length exceeded, trying again with shorter context.'); return await this.sendRequest(turns.slice(1), systemMessage, stop_seq); @@ -53,7 +46,10 @@ export class Grok { } } // sometimes outputs special token <|separator|>, just replace it - const finalResponseText = res ? res.replace(/<\|separator\|>/g, '*no response*') : (res === null ? "*no response*" : res); + let finalResponseText = res ? res.replace(/<\|separator\|>/g, '*no response*') : (res === null ? "*no response*" : res); + if (typeof finalResponseText === 'string') { + finalResponseText = finalResponseText.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), finalResponseText); return finalResponseText; } @@ -63,20 +59,17 @@ export class Grok { imageFormattedTurns.push({ role: "user", content: [ - { type: "text", text: systemMessage }, // systemMessage is the vision prompt + { type: "text", text: systemMessage }, { type: "image_url", - image_url: { - url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` - } + image_url: { url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` } } ] }); - // Assuming 'systemMessage' (the vision prompt) should also act as the system message for this specific API call. - const res = await this.sendRequest(imageFormattedTurns, systemMessage); // sendRequest will call log() + const res = await this.sendRequest(imageFormattedTurns, systemMessage); - if (imageBuffer && res) { // Check res to ensure a response was received + if (imageBuffer && res) { logVision(original_turns, imageBuffer, res, systemMessage); } return res; @@ -86,6 +79,3 @@ export class Grok { throw new Error('Embeddings are not supported by Grok.'); } } - - - diff --git a/src/models/groq.js b/src/models/groq.js index fa75a1f..4165799 100644 --- a/src/models/groq.js +++ b/src/models/groq.js @@ -7,9 +7,7 @@ import { log, logVision } from '../../logger.js'; // Umbrella class for everything under the sun... That GroqCloud provides, that is. export class GroqCloudAPI { - constructor(model_name, url, params) { - this.model_name = model_name; this.url = url; this.params = params || {}; @@ -19,21 +17,15 @@ export class GroqCloudAPI { delete this.params.tools; // This is just a bit of future-proofing in case we drag Mindcraft in that direction. - // I'm going to do a sneaky ReplicateAPI theft for a lot of this, aren't I? if (this.url) console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL."); this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') }); - - } async sendRequest(turns, systemMessage, stop_seq = null) { - // Construct messages array let messages = [{"role": "system", "content": systemMessage}].concat(turns); - let res = null; - try { console.log("Awaiting Groq response..."); @@ -43,7 +35,6 @@ export class GroqCloudAPI { this.params.max_completion_tokens = this.params.max_tokens; delete this.params.max_tokens; } - if (!this.params.max_completion_tokens) { this.params.max_completion_tokens = 4000; } @@ -56,16 +47,15 @@ export class GroqCloudAPI { ...(this.params || {}) }); - // res = completion.choices[0].message; // Original assignment - let responseText = completion.choices[0].message.content; // Get content - - log(JSON.stringify(messages), responseText); // Log here - + let responseText = completion.choices[0].message.content; + if (typeof responseText === 'string') { + responseText = responseText.replace(//g, '').replace(/<\/thinking>/g, ''); + } + log(JSON.stringify(messages), responseText); // Original cleaning of tags for the *returned* response (not affecting log) responseText = responseText.replace(/[\s\S]*?<\/think>/g, '').trim(); return responseText; - } - catch(err) { + } catch(err) { if (err.message.includes("content must be a string")) { res = "Vision is only supported by certain models."; } else { @@ -73,32 +63,28 @@ export class GroqCloudAPI { res = "My brain disconnected, try again."; } console.log(err); - // Log error response + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), res); return res; } - // This return is now unreachable due to returns in try/catch, but if logic changes, ensure logging covers it. - // log(JSON.stringify(messages), res); - // return res; } async sendVisionRequest(original_turns, systemMessage, imageBuffer) { - const imageMessages = [...original_turns]; // Use a copy + const imageMessages = [...original_turns]; imageMessages.push({ role: "user", content: [ - { type: "text", text: systemMessage }, // systemMessage is the vision prompt + { type: "text", text: systemMessage }, { type: "image_url", - image_url: { - url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` - } + image_url: { url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` } } ] }); - // Assuming 'systemMessage' (the vision prompt) should also act as the system message for this API call. - const res = await this.sendRequest(imageMessages, systemMessage); // sendRequest will call log() + const res = await this.sendRequest(imageMessages, systemMessage); if (imageBuffer && res) { logVision(original_turns, imageBuffer, res, systemMessage); diff --git a/src/models/huggingface.js b/src/models/huggingface.js index 19ec6e0..59d2878 100644 --- a/src/models/huggingface.js +++ b/src/models/huggingface.js @@ -5,29 +5,22 @@ import { log, logVision } from '../../logger.js'; export class HuggingFace { constructor(model_name, url, params) { - // Remove 'huggingface/' prefix if present this.model_name = model_name.replace('huggingface/', ''); this.url = url; this.params = params; - if (this.url) { console.warn("Hugging Face doesn't support custom urls!"); } - this.huggingface = new HfInference(getKey('HUGGINGFACE_API_KEY')); } async sendRequest(turns, systemMessage) { const stop_seq = '***'; - // Build a single prompt from the conversation turns const prompt = toSinglePrompt(turns, null, stop_seq); - // Fallback model if none was provided const model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B'; - // Combine system message with the prompt const logInputMessages = [{role: 'system', content: systemMessage}, ...turns]; - const input = systemMessage + "\n" + prompt; - - // We'll try up to 5 times in case of partial blocks for DeepSeek-R1 models. + const input = systemMessage + " +" + prompt; const maxAttempts = 5; let attempt = 0; let finalRes = null; @@ -37,7 +30,6 @@ export class HuggingFace { console.log(`Awaiting Hugging Face API response... (model: ${model_name}, attempt: ${attempt})`); let res = ''; try { - // Consume the streaming response chunk by chunk for await (const chunk of this.huggingface.chatCompletionStream({ model: model_name, messages: [{ role: "user", content: input }], @@ -48,36 +40,31 @@ export class HuggingFace { } catch (err) { console.log(err); res = 'My brain disconnected, try again.'; - // Break out immediately; we only retry when handling partial tags. break; } - // If the model is DeepSeek-R1, check for mismatched blocks. - const hasOpenTag = res.includes(""); - const hasCloseTag = res.includes(""); - - // If there's a partial mismatch, warn and retry the entire request. - if ((hasOpenTag && !hasCloseTag)) { - console.warn("Partial block detected. Re-generating..."); - continue; - } - - // If both tags are present, remove the block entirely. - if (hasOpenTag && hasCloseTag) { - res = res.replace(/[\s\S]*?<\/think>/g, '').trim(); - } + const hasOpenTag = res.includes(""); + const hasCloseTag = res.includes(""); + if ((hasOpenTag && !hasCloseTag)) { + console.warn("Partial block detected. Re-generating..."); + if (attempt < maxAttempts) continue; + } + if (hasOpenTag && hasCloseTag) { + res = res.replace(/[\s\S]*?<\/think>/g, '').trim(); + } finalRes = res; - break; // Exit loop if we got a valid response. + break; } - // If no valid response was obtained after max attempts, assign a fallback. if (finalRes == null) { - console.warn("Could not get a valid block or normal response after max attempts."); + console.warn("Could not get a valid response after max attempts."); finalRes = 'I thought too hard, sorry, try again.'; } console.log('Received.'); - console.log(finalRes); + if (typeof finalRes === 'string') { + finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(logInputMessages), finalRes); return finalRes; } diff --git a/src/models/hyperbolic.js b/src/models/hyperbolic.js index 9ef9ce4..343c761 100644 --- a/src/models/hyperbolic.js +++ b/src/models/hyperbolic.js @@ -1,12 +1,10 @@ import { getKey } from '../utils/keys.js'; -import { log, logVision } from '../../logger.js'; // Added import +import { log, logVision } from '../../logger.js'; export class Hyperbolic { constructor(modelName, apiUrl) { this.modelName = modelName || "deepseek-ai/DeepSeek-V3"; this.apiUrl = apiUrl || "https://api.hyperbolic.xyz/v1/chat/completions"; - - // Retrieve the Hyperbolic API key from keys.js this.apiKey = getKey('HYPERBOLIC_API_KEY'); if (!this.apiKey) { throw new Error('HYPERBOLIC_API_KEY not found. Check your keys.js file.'); @@ -15,7 +13,6 @@ export class Hyperbolic { async sendRequest(turns, systemMessage, stopSeq = '***') { const messages = [{ role: 'system', content: systemMessage }, ...turns]; - const payload = { model: this.modelName, messages: messages, @@ -27,14 +24,12 @@ export class Hyperbolic { const maxAttempts = 5; let attempt = 0; - let finalRes = null; // Holds the content after processing and <|separator|> replacement - let rawCompletionContent = null; // Holds raw content from API for each attempt + let finalRes = null; + let rawCompletionContent = null; while (attempt < maxAttempts) { attempt++; console.log(`Awaiting Hyperbolic API response... (attempt: ${attempt})`); - // console.log('Messages:', messages); // Original console log - try { const response = await fetch(this.apiUrl, { method: 'POST', @@ -44,36 +39,27 @@ export class Hyperbolic { }, body: JSON.stringify(payload) }); - if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } - const data = await response.json(); if (data?.choices?.[0]?.finish_reason === 'length') { throw new Error('Context length exceeded'); } - rawCompletionContent = data?.choices?.[0]?.message?.content || ''; console.log('Received response from Hyperbolic.'); } catch (err) { - if ( - (err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && - turns.length > 1 - ) { + if ((err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && turns.length > 1) { console.log('Context length exceeded, trying again with a shorter context...'); - // Recursive call handles its own logging return await this.sendRequest(turns.slice(1), systemMessage, stopSeq); } else { console.error(err); rawCompletionContent = 'My brain disconnected, try again.'; - // Assign to finalRes here if we are to break and log this error immediately finalRes = rawCompletionContent; break; } } - // Process blocks let processedContent = rawCompletionContent; const hasOpenTag = processedContent.includes(""); const hasCloseTag = processedContent.includes(""); @@ -81,31 +67,27 @@ export class Hyperbolic { if ((hasOpenTag && !hasCloseTag)) { console.warn("Partial block detected. Re-generating..."); if (attempt < maxAttempts) continue; - // If last attempt, use the content as is (or error if preferred) } - if (hasCloseTag && !hasOpenTag) { processedContent = '' + processedContent; } - if (hasOpenTag && hasCloseTag) { processedContent = processedContent.replace(/[\s\S]*?<\/think>/g, '').trim(); } - finalRes = processedContent.replace(/<\|separator\|>/g, '*no response*'); - - // If not retrying due to partial tag, break if (!(hasOpenTag && !hasCloseTag && attempt < maxAttempts)) { break; } } if (finalRes == null) { - console.warn("Could not get a valid response after max attempts, or an error occurred on the last attempt."); - finalRes = rawCompletionContent || 'I thought too hard, sorry, try again.'; // Use raw if finalRes never got set - finalRes = finalRes.replace(/<\|separator\|>/g, '*no response*'); // Clean one last time + finalRes = rawCompletionContent || 'I thought too hard, sorry, try again.'; + finalRes = finalRes.replace(/<\|separator\|>/g, '*no response*'); } + if (typeof finalRes === 'string') { + finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), finalRes); return finalRes; } diff --git a/src/models/local.js b/src/models/local.js index 8d0ab19..89f0df1 100644 --- a/src/models/local.js +++ b/src/models/local.js @@ -11,11 +11,10 @@ export class Local { } async sendRequest(turns, systemMessage) { - let model = this.model_name || 'llama3.1'; // Updated to llama3.1, as it is more performant than llama3 + let model = this.model_name || 'llama3.1'; let messages = strictFormat(turns); messages.unshift({ role: 'system', content: systemMessage }); - // We'll attempt up to 5 times for models with deepseek-r1-esk reasoning if the tags are mismatched. const maxAttempts = 5; let attempt = 0; let finalRes = null; @@ -25,14 +24,14 @@ export class Local { console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`); let res = null; try { - res = await this.send(this.chat_endpoint, { + let apiResponse = await this.send(this.chat_endpoint, { model: model, messages: messages, stream: false, ...(this.params || {}) }); - if (res) { - res = res['message']['content']; + if (apiResponse) { + res = apiResponse['message']['content']; } else { res = 'No response data.'; } @@ -44,38 +43,32 @@ export class Local { console.log(err); res = 'My brain disconnected, try again.'; } - } - // If the model name includes "deepseek-r1" or "Andy-3.5-reasoning", then handle the block. - const hasOpenTag = res.includes(""); - const hasCloseTag = res.includes(""); - - // If there's a partial mismatch, retry to get a complete response. - if ((hasOpenTag && !hasCloseTag)) { - console.warn("Partial block detected. Re-generating..."); - continue; - } - - // If is present but is not, prepend - if (hasCloseTag && !hasOpenTag) { - res = '' + res; - } - // Changed this so if the model reasons, using and but doesn't start the message with , ges prepended to the message so no error occur. - - // If both tags appear, remove them (and everything inside). - if (hasOpenTag && hasCloseTag) { - res = res.replace(/[\s\S]*?<\/think>/g, ''); - } + const hasOpenTag = res.includes(""); + const hasCloseTag = res.includes(""); + if ((hasOpenTag && !hasCloseTag)) { + console.warn("Partial block detected. Re-generating..."); + if (attempt < maxAttempts) continue; + } + if (hasCloseTag && !hasOpenTag) { + res = '' + res; + } + if (hasOpenTag && hasCloseTag) { + res = res.replace(/[\s\S]*?<\/think>/g, '').trim(); + } finalRes = res; - break; // Exit the loop if we got a valid response. + break; } if (finalRes == null) { - console.warn("Could not get a valid block or normal response after max attempts."); + console.warn("Could not get a valid response after max attempts."); finalRes = 'I thought too hard, sorry, try again.'; } + if (typeof finalRes === 'string') { + finalRes = finalRes.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), finalRes); return finalRes; } diff --git a/src/models/mistral.js b/src/models/mistral.js index a3b1bbb..3de558c 100644 --- a/src/models/mistral.js +++ b/src/models/mistral.js @@ -5,56 +5,35 @@ import { log, logVision } from '../../logger.js'; export class Mistral { #client; - constructor(model_name, url, params) { this.model_name = model_name; this.params = params; if (typeof url === "string") { console.warn("Mistral does not support custom URL's, ignoring!"); - } - if (!getKey("MISTRAL_API_KEY")) { throw new Error("Mistral API Key missing, make sure to set MISTRAL_API_KEY in settings.json") } - - this.#client = new MistralClient( - { - apiKey: getKey("MISTRAL_API_KEY") - } - ); - + this.#client = new MistralClient({ apiKey: getKey("MISTRAL_API_KEY") }); - // Prevents the following code from running when model not specified - if (typeof this.model_name === "undefined") return; - - // get the model name without the "mistral" or "mistralai" prefix - // e.g "mistral/mistral-large-latest" -> "mistral-large-latest" - if (typeof model_name.split("/")[1] !== "undefined") { - this.model_name = model_name.split("/")[1]; + if (typeof this.model_name === "string" && typeof this.model_name.split("/")[1] !== "undefined") { + this.model_name = this.model_name.split("/")[1]; } } async sendRequest(turns, systemMessage) { - let result; - + const model = this.model_name || "mistral-large-latest"; + const messages = [{ role: "system", content: systemMessage }]; + messages.push(...strictFormat(turns)); try { - const model = this.model_name || "mistral-large-latest"; - - const messages = [ - { role: "system", content: systemMessage } - ]; - messages.push(...strictFormat(turns)); - console.log('Awaiting mistral api response...') const response = await this.#client.chat.complete({ model, messages, ...(this.params || {}) }); - result = response.choices[0].message.content; } catch (err) { if (err.message.includes("A request containing images has been given to a model which does not have the 'vision' capability.")) { @@ -64,36 +43,26 @@ export class Mistral { } console.log(err); } - + if (typeof result === 'string') { + result = result.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), result); return result; } async sendVisionRequest(original_turns, systemMessage, imageBuffer) { const imageFormattedTurns = [...original_turns]; - // The user message content should be an array for Mistral when including images const userMessageContent = [{ type: "text", text: systemMessage }]; userMessageContent.push({ - type: "image_url", // This structure is based on current code; Mistral SDK might prefer different if it auto-detects from base64 content. - // The provided code uses 'imageUrl'. Mistral SDK docs show 'image_url' for some contexts or direct base64. - // For `chat.complete`, it's usually within the 'content' array of a user message. + type: "image_url", imageUrl: `data:image/jpeg;base64,${imageBuffer.toString('base64')}` }); - imageFormattedTurns.push({ - role: "user", - content: userMessageContent // Content is an array - }); + imageFormattedTurns.push({ role: "user", content: userMessageContent }); - // 'systemMessage' passed to sendRequest should be the overarching system prompt. - // If the 'systemMessage' parameter of sendVisionRequest is the vision text prompt, - // and it's already incorporated into imageFormattedTurns, then the systemMessage for sendRequest - // might be a different, more general one, or empty if not applicable. - // For now, let's assume the 'systemMessage' param of sendVisionRequest is the main prompt for this turn - // and should also serve as the system-level instruction for the API call via sendRequest. - const res = await this.sendRequest(imageFormattedTurns, systemMessage); // sendRequest will call log() + const res = await this.sendRequest(imageFormattedTurns, systemMessage); if (imageBuffer && res) { - logVision(original_turns, imageBuffer, res, systemMessage); // systemMessage here is the vision prompt + logVision(original_turns, imageBuffer, res, systemMessage); } return res; } diff --git a/src/models/novita.js b/src/models/novita.js index 697f1d5..3d9671b 100644 --- a/src/models/novita.js +++ b/src/models/novita.js @@ -50,7 +50,10 @@ export class Novita { res = 'My brain disconnected, try again.'; } } - log(JSON.stringify(messages), res); // Log before stripping tags + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } + log(JSON.stringify(messages), res); // Log transformed res // Existing stripping logic for tags if (res && typeof res === 'string' && res.includes('')) { diff --git a/src/models/qwen.js b/src/models/qwen.js index e1486b2..e2d4d85 100644 --- a/src/models/qwen.js +++ b/src/models/qwen.js @@ -46,6 +46,9 @@ export class Qwen { res = 'My brain disconnected, try again.'; } } + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), res); return res; } diff --git a/src/models/replicate.js b/src/models/replicate.js index a1df488..bc8a2fe 100644 --- a/src/models/replicate.js +++ b/src/models/replicate.js @@ -47,6 +47,9 @@ export class ReplicateAPI { console.log(err); res = 'My brain disconnected, try again.'; } + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(logInputMessages), res); console.log('Received.'); return res; diff --git a/src/models/vllm.js b/src/models/vllm.js index ae62229..187ebdf 100644 --- a/src/models/vllm.js +++ b/src/models/vllm.js @@ -57,6 +57,9 @@ export class VLLM { res = 'My brain disconnected, try again.'; } } + if (typeof res === 'string') { + res = res.replace(//g, '').replace(/<\/thinking>/g, ''); + } log(JSON.stringify(messages), res); return res; }