mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-03 22:05:35 +02:00
Merge pull request #423 from Sweaterdog/main
2 new API's, and Reasoning model support
This commit is contained in:
commit
d5df9f8c9f
17 changed files with 448 additions and 136 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -15,4 +15,4 @@ services/viaproxy/saves.json
|
|||
services/viaproxy/viaproxy.yml
|
||||
tmp/
|
||||
wandb/
|
||||
experiments/
|
||||
experiments/
|
||||
|
|
|
@ -39,11 +39,11 @@ You can configure the agent's name, model, and prompts in their profile like `an
|
|||
| API | Config Variable | Example Model name | Docs |
|
||||
|------|------|------|------|
|
||||
| `openai` | `OPENAI_API_KEY` | `gpt-4o-mini` | [docs](https://platform.openai.com/docs/models) |
|
||||
| `google` | `GEMINI_API_KEY` | `gemini-pro` | [docs](https://ai.google.dev/gemini-api/docs/models/gemini) |
|
||||
| `google` | `GEMINI_API_KEY` | `gemini-2.0-flash` | [docs](https://ai.google.dev/gemini-api/docs/models/gemini) |
|
||||
| `anthropic` | `ANTHROPIC_API_KEY` | `claude-3-haiku-20240307` | [docs](https://docs.anthropic.com/claude/docs/models-overview) |
|
||||
| `xai` | `XAI_API_KEY` | `grok-2-1212` | [docs](https://docs.x.ai/docs) |
|
||||
| `deepseek` | `DEEPSEEK_API_KEY` | `deepseek-chat` | [docs](https://api-docs.deepseek.com/) |
|
||||
| `ollama` (local) | n/a | `llama3` | [docs](https://ollama.com/library) |
|
||||
| `ollama` (local) | n/a | `llama3.1` | [docs](https://ollama.com/library) |
|
||||
| `qwen` | `QWEN_API_KEY` | `qwen-max` | [Intl.](https://www.alibabacloud.com/help/en/model-studio/developer-reference/use-qwen-by-calling-api)/[cn](https://help.aliyun.com/zh/model-studio/getting-started/models) |
|
||||
| `mistral` | `MISTRAL_API_KEY` | `mistral-large-latest` | [docs](https://docs.mistral.ai/getting-started/models/models_overview/) |
|
||||
| `replicate` | `REPLICATE_API_KEY` | `replicate/meta/meta-llama-3-70b-instruct` | [docs](https://replicate.com/collections/language-models) |
|
||||
|
@ -51,9 +51,11 @@ You can configure the agent's name, model, and prompts in their profile like `an
|
|||
| `huggingface` | `HUGGINGFACE_API_KEY` | `huggingface/mistralai/Mistral-Nemo-Instruct-2407` | [docs](https://huggingface.co/models) |
|
||||
| `novita` | `NOVITA_API_KEY` | `novita/deepseek/deepseek-r1` | [docs](https://novita.ai/model-api/product/llm-api?utm_source=github_mindcraft&utm_medium=github_readme&utm_campaign=link) |
|
||||
| `openrouter` | `OPENROUTER_API_KEY` | `openrouter/anthropic/claude-3.5-sonnet` | [docs](https://openrouter.ai/models) |
|
||||
| `glhf.chat` | `GHLF_API_KEY` | `glhf/hf:meta-llama/Llama-3.1-405B-Instruct` | [docs](https://glhf.chat/user-settings/api) |
|
||||
| `hyperbolic` | `HYPERBOLIC_API_KEY` | `hyperbolic/deepseek-ai/DeepSeek-V3` | [docs](https://docs.hyperbolic.xyz/docs/getting-started) |
|
||||
|
||||
If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command:
|
||||
`ollama pull llama3 && ollama pull nomic-embed-text`
|
||||
`ollama pull llama3.1 && ollama pull nomic-embed-text`
|
||||
|
||||
### Online Servers
|
||||
To connect to online servers your bot will need an official Microsoft/Minecraft account. You can use your own personal one, but will need another account if you want to connect too and play with it. To connect, change these lines in `settings.js`:
|
||||
|
|
|
@ -347,4 +347,4 @@ def main():
|
|||
# run_experiment(args.task_path, args.task_id, args.num_exp)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
|
|
@ -109,4 +109,3 @@
|
|||
"type": "techtree",
|
||||
"timeout": 300
|
||||
}
|
||||
}
|
|
@ -10,6 +10,8 @@
|
|||
"XAI_API_KEY": "",
|
||||
"MISTRAL_API_KEY": "",
|
||||
"DEEPSEEK_API_KEY": "",
|
||||
"GHLF_API_KEY": "",
|
||||
"HYPERBOLIC_API_KEY": "",
|
||||
"NOVITA_API_KEY": "",
|
||||
"OPENROUTER_API_KEY": ""
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "gemini",
|
||||
|
||||
"model": "gemini-1.5-flash",
|
||||
|
||||
|
||||
"model": "gemini-2.0-flash",
|
||||
|
||||
"cooldown": 10000
|
||||
}
|
||||
}
|
||||
|
|
|
@ -151,5 +151,4 @@ export class ActionManager {
|
|||
await this.stop(); // last attempt to stop
|
||||
}, TIMEOUT_MINS * 60 * 1000);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,7 +86,7 @@ export class Agent {
|
|||
|
||||
console.log(`${this.name} spawned.`);
|
||||
this.clearBotLogs();
|
||||
|
||||
|
||||
this._setupEventHandlers(save_data, init_message);
|
||||
this.startEvents();
|
||||
|
||||
|
|
|
@ -117,4 +117,4 @@ export class History {
|
|||
this.turns = [];
|
||||
this.memory = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -143,4 +143,4 @@ export class SelfPrompter {
|
|||
// this stops it from responding from the handlemessage loop and the self-prompt loop at the same time
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,6 @@ export class Gemini {
|
|||
model: this.model_name || "gemini-1.5-flash",
|
||||
// systemInstruction does not work bc google is trash
|
||||
};
|
||||
|
||||
if (this.url) {
|
||||
model = this.genAI.getGenerativeModel(
|
||||
modelConfig,
|
||||
|
@ -72,7 +71,26 @@ export class Gemini {
|
|||
}
|
||||
});
|
||||
const response = await result.response;
|
||||
const text = response.text();
|
||||
let text;
|
||||
|
||||
// Handle "thinking" models since they smart
|
||||
if (this.model_name && this.model_name.includes("thinking")) {
|
||||
if (
|
||||
response.candidates &&
|
||||
response.candidates.length > 0 &&
|
||||
response.candidates[0].content &&
|
||||
response.candidates[0].content.parts &&
|
||||
response.candidates[0].content.parts.length > 1
|
||||
) {
|
||||
text = response.candidates[0].content.parts[1].text;
|
||||
} else {
|
||||
console.warn("Unexpected response structure for thinking model:", response);
|
||||
text = response.text();
|
||||
}
|
||||
} else {
|
||||
text = response.text();
|
||||
}
|
||||
|
||||
console.log('Received.');
|
||||
|
||||
return text;
|
||||
|
@ -94,4 +112,4 @@ export class Gemini {
|
|||
const result = await model.embedContent(text);
|
||||
return result.embedding.values;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
70
src/models/glhf.js
Normal file
70
src/models/glhf.js
Normal file
|
@ -0,0 +1,70 @@
|
|||
import OpenAIApi from 'openai';
|
||||
import { getKey } from '../utils/keys.js';
|
||||
|
||||
export class GLHF {
|
||||
constructor(model_name, url) {
|
||||
this.model_name = model_name;
|
||||
const apiKey = getKey('GHLF_API_KEY');
|
||||
if (!apiKey) {
|
||||
throw new Error('API key not found. Please check keys.json and ensure GHLF_API_KEY is defined.');
|
||||
}
|
||||
this.openai = new OpenAIApi({
|
||||
apiKey,
|
||||
baseURL: url || "https://glhf.chat/api/openai/v1"
|
||||
});
|
||||
}
|
||||
|
||||
async sendRequest(turns, systemMessage, stop_seq = '***') {
|
||||
// Construct the message array for the API request.
|
||||
let messages = [{ role: 'system', content: systemMessage }].concat(turns);
|
||||
const pack = {
|
||||
model: this.model_name || "hf:meta-llama/Llama-3.1-405B-Instruct",
|
||||
messages,
|
||||
stop: [stop_seq]
|
||||
};
|
||||
|
||||
const maxAttempts = 5;
|
||||
let attempt = 0;
|
||||
let finalRes = null;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
console.log(`Awaiting glhf.chat API response... (attempt: ${attempt})`);
|
||||
try {
|
||||
let completion = await this.openai.chat.completions.create(pack);
|
||||
if (completion.choices[0].finish_reason === 'length') {
|
||||
throw new Error('Context length exceeded');
|
||||
}
|
||||
let res = completion.choices[0].message.content;
|
||||
// If there's an open <think> tag without a corresponding </think>, retry.
|
||||
if (res.includes("<think>") && !res.includes("</think>")) {
|
||||
console.warn("Partial <think> block detected. Re-generating...");
|
||||
continue;
|
||||
}
|
||||
// If there's a closing </think> tag but no opening <think>, prepend one.
|
||||
if (res.includes("</think>") && !res.includes("<think>")) {
|
||||
res = "<think>" + res;
|
||||
}
|
||||
finalRes = res.replace(/<\|separator\|>/g, '*no response*');
|
||||
break; // Valid response obtained.
|
||||
} catch (err) {
|
||||
if ((err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') && turns.length > 1) {
|
||||
console.log('Context length exceeded, trying again with shorter context.');
|
||||
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||
} else {
|
||||
console.error(err);
|
||||
finalRes = 'My brain disconnected, try again.';
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (finalRes === null) {
|
||||
finalRes = "I thought too hard, sorry, try again";
|
||||
}
|
||||
return finalRes;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by glhf.');
|
||||
}
|
||||
}
|
|
@ -24,61 +24,83 @@ export class GroqCloudAPI {
|
|||
|
||||
this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
|
||||
|
||||
|
||||
}
|
||||
|
||||
async sendRequest(turns, systemMessage, stop_seq=null) {
|
||||
async sendRequest(turns, systemMessage, stop_seq = null) {
|
||||
// Variables for DeepSeek-R1 models
|
||||
const maxAttempts = 5;
|
||||
let attempt = 0;
|
||||
let finalRes = null;
|
||||
let res = null;
|
||||
|
||||
let messages = [{"role": "system", "content": systemMessage}].concat(turns); // The standard for GroqCloud is just appending to a messages array starting with the system prompt, but
|
||||
// this is perfectly acceptable too, and I recommend it.
|
||||
// I still feel as though I should note it for any future revisions of MindCraft, though.
|
||||
// Construct messages array
|
||||
let messages = [{"role": "system", "content": systemMessage}].concat(turns);
|
||||
|
||||
// These variables look odd, but they're for the future. Please keep them intact.
|
||||
let raw_res = null;
|
||||
let res = null;
|
||||
let tool_calls = null;
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
|
||||
try {
|
||||
// These variables look odd, but they're for the future.
|
||||
let raw_res = null;
|
||||
let tool_calls = null;
|
||||
|
||||
console.log("Awaiting Groq response...");
|
||||
try {
|
||||
console.log("Awaiting Groq response...");
|
||||
|
||||
if (this.params.max_tokens) {
|
||||
// Handle deprecated max_tokens parameter
|
||||
if (this.params.max_tokens) {
|
||||
console.warn("GROQCLOUD WARNING: A profile is using `max_tokens`. This is deprecated. Please move to `max_completion_tokens`.");
|
||||
this.params.max_completion_tokens = this.params.max_tokens;
|
||||
delete this.params.max_tokens;
|
||||
}
|
||||
|
||||
console.warn("GROQCLOUD WARNING: A profile is using `max_tokens`. This is deprecated. Please move to `max_completion_tokens`.");
|
||||
this.params.max_completion_tokens = this.params.max_tokens;
|
||||
delete this.params.max_tokens;
|
||||
if (!this.params.max_completion_tokens) {
|
||||
this.params.max_completion_tokens = 8000; // Set it lower.
|
||||
}
|
||||
|
||||
}
|
||||
let completion = await this.groq.chat.completions.create({
|
||||
"messages": messages,
|
||||
"model": this.model_name || "llama-3.3-70b-versatile",
|
||||
"stream": false,
|
||||
"stop": stop_seq,
|
||||
...(this.params || {})
|
||||
});
|
||||
|
||||
if (!this.params.max_completion_tokens) {
|
||||
|
||||
this.params.max_completion_tokens = 8000; // Set it lower. This is a common theme.
|
||||
|
||||
}
|
||||
|
||||
let completion = await this.groq.chat.completions.create({
|
||||
"messages": messages,
|
||||
"model": this.model_name || "llama-3.3-70b-versatile",
|
||||
"stream": false,
|
||||
"stop": stop_seq,
|
||||
...(this.params || {})
|
||||
});
|
||||
|
||||
raw_res = completion.choices[0].message;
|
||||
res = raw_res.content;
|
||||
|
||||
}
|
||||
|
||||
catch(err) {
|
||||
|
||||
console.log(err);
|
||||
res = "My brain just kinda stopped working. Try again.";
|
||||
|
||||
}
|
||||
|
||||
return res;
|
||||
raw_res = completion.choices[0].message;
|
||||
res = raw_res.content;
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
res = "My brain just kinda stopped working. Try again.";
|
||||
}
|
||||
|
||||
async embed(_) {
|
||||
throw new Error('Embeddings are not supported by Groq.');
|
||||
// Check for <think> tag issues
|
||||
const hasOpenTag = res.includes("<think>");
|
||||
const hasCloseTag = res.includes("</think>");
|
||||
|
||||
// If a partial <think> block is detected, log a warning and retry
|
||||
if (hasOpenTag && !hasCloseTag) {
|
||||
console.warn("Partial <think> block detected. Re-generating Groq request...");
|
||||
continue; // This will skip the rest of the loop and try again
|
||||
}
|
||||
|
||||
// If only the closing tag is present, prepend an opening tag
|
||||
if (hasCloseTag && !hasOpenTag) {
|
||||
res = '<think>' + res;
|
||||
}
|
||||
|
||||
// Remove the complete <think> block (and any content inside) from the response
|
||||
res = res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
|
||||
finalRes = res;
|
||||
break; // Exit the loop once a valid response is obtained
|
||||
}
|
||||
|
||||
if (finalRes == null) {
|
||||
console.warn("Could not obtain a valid <think> block or normal response after max attempts.");
|
||||
finalRes = "I thought too hard, sorry, try again.";
|
||||
}
|
||||
|
||||
finalRes = finalRes.replace(/<\|separator\|>/g, '*no response*');
|
||||
return finalRes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,46 +1,85 @@
|
|||
import {toSinglePrompt} from '../utils/text.js';
|
||||
import {getKey} from '../utils/keys.js';
|
||||
import {HfInference} from "@huggingface/inference";
|
||||
import { toSinglePrompt } from '../utils/text.js';
|
||||
import { getKey } from '../utils/keys.js';
|
||||
import { HfInference } from "@huggingface/inference";
|
||||
|
||||
export class HuggingFace {
|
||||
constructor(model_name, url, params) {
|
||||
this.model_name = model_name.replace('huggingface/','');
|
||||
this.url = url;
|
||||
this.params = params;
|
||||
constructor(model_name, url, params) {
|
||||
// Remove 'huggingface/' prefix if present
|
||||
this.model_name = model_name.replace('huggingface/', '');
|
||||
this.url = url;
|
||||
this.params = params;
|
||||
|
||||
if (this.url) {
|
||||
console.warn("Hugging Face doesn't support custom urls!");
|
||||
if (this.url) {
|
||||
console.warn("Hugging Face doesn't support custom urls!");
|
||||
}
|
||||
|
||||
this.huggingface = new HfInference(getKey('HUGGINGFACE_API_KEY'));
|
||||
}
|
||||
|
||||
async sendRequest(turns, systemMessage) {
|
||||
const stop_seq = '***';
|
||||
// Build a single prompt from the conversation turns
|
||||
const prompt = toSinglePrompt(turns, null, stop_seq);
|
||||
// Fallback model if none was provided
|
||||
const model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B';
|
||||
// Combine system message with the prompt
|
||||
const input = systemMessage + "\n" + prompt;
|
||||
|
||||
// We'll try up to 5 times in case of partial <think> blocks for DeepSeek-R1 models.
|
||||
const maxAttempts = 5;
|
||||
let attempt = 0;
|
||||
let finalRes = null;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
console.log(`Awaiting Hugging Face API response... (model: ${model_name}, attempt: ${attempt})`);
|
||||
let res = '';
|
||||
try {
|
||||
// Consume the streaming response chunk by chunk
|
||||
for await (const chunk of this.huggingface.chatCompletionStream({
|
||||
model: model_name,
|
||||
messages: [{ role: "user", content: input }],
|
||||
...(this.params || {})
|
||||
})) {
|
||||
res += (chunk.choices[0]?.delta?.content || "");
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
// Break out immediately; we only retry when handling partial <think> tags.
|
||||
break;
|
||||
}
|
||||
|
||||
// If the model is DeepSeek-R1, check for mismatched <think> blocks.
|
||||
const hasOpenTag = res.includes("<think>");
|
||||
const hasCloseTag = res.includes("</think>");
|
||||
|
||||
// If there's a partial mismatch, warn and retry the entire request.
|
||||
if ((hasOpenTag && !hasCloseTag)) {
|
||||
console.warn("Partial <think> block detected. Re-generating...");
|
||||
continue;
|
||||
}
|
||||
|
||||
this.huggingface = new HfInference(getKey('HUGGINGFACE_API_KEY'));
|
||||
}
|
||||
|
||||
async sendRequest(turns, systemMessage) {
|
||||
const stop_seq = '***';
|
||||
const prompt = toSinglePrompt(turns, null, stop_seq);
|
||||
let model_name = this.model_name || 'meta-llama/Meta-Llama-3-8B';
|
||||
|
||||
const input = systemMessage + "\n" + prompt;
|
||||
let res = '';
|
||||
try {
|
||||
console.log('Awaiting Hugging Face API response...');
|
||||
for await (const chunk of this.huggingface.chatCompletionStream({
|
||||
model: model_name,
|
||||
messages: [{ role: "user", content: input }],
|
||||
...(this.params || {})
|
||||
})) {
|
||||
res += (chunk.choices[0]?.delta?.content || "");
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
// If both tags are present, remove the <think> block entirely.
|
||||
if (hasOpenTag && hasCloseTag) {
|
||||
res = res.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
console.log('Received.');
|
||||
console.log(res);
|
||||
return res;
|
||||
|
||||
finalRes = res;
|
||||
break; // Exit loop if we got a valid response.
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by HuggingFace.');
|
||||
// If no valid response was obtained after max attempts, assign a fallback.
|
||||
if (finalRes == null) {
|
||||
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
||||
finalRes = 'I thought too hard, sorry, try again.';
|
||||
}
|
||||
}
|
||||
console.log('Received.');
|
||||
console.log(finalRes);
|
||||
return finalRes;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by HuggingFace.');
|
||||
}
|
||||
}
|
||||
|
|
113
src/models/hyperbolic.js
Normal file
113
src/models/hyperbolic.js
Normal file
|
@ -0,0 +1,113 @@
|
|||
import { getKey } from '../utils/keys.js';
|
||||
|
||||
export class Hyperbolic {
|
||||
constructor(modelName, apiUrl) {
|
||||
this.modelName = modelName || "deepseek-ai/DeepSeek-V3";
|
||||
this.apiUrl = apiUrl || "https://api.hyperbolic.xyz/v1/chat/completions";
|
||||
|
||||
// Retrieve the Hyperbolic API key from keys.js
|
||||
this.apiKey = getKey('HYPERBOLIC_API_KEY');
|
||||
if (!this.apiKey) {
|
||||
throw new Error('HYPERBOLIC_API_KEY not found. Check your keys.js file.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a chat completion request to the Hyperbolic endpoint.
|
||||
*
|
||||
* @param {Array} turns - An array of message objects, e.g. [{role: 'user', content: 'Hi'}].
|
||||
* @param {string} systemMessage - The system prompt or instruction.
|
||||
* @param {string} stopSeq - A stopping sequence, default '***'.
|
||||
* @returns {Promise<string>} - The model's reply.
|
||||
*/
|
||||
async sendRequest(turns, systemMessage, stopSeq = '***') {
|
||||
// Prepare the messages with a system prompt at the beginning
|
||||
const messages = [{ role: 'system', content: systemMessage }, ...turns];
|
||||
|
||||
// Build the request payload
|
||||
const payload = {
|
||||
model: this.modelName,
|
||||
messages: messages,
|
||||
max_tokens: 8192,
|
||||
temperature: 0.7,
|
||||
top_p: 0.9,
|
||||
stream: false
|
||||
};
|
||||
|
||||
const maxAttempts = 5;
|
||||
let attempt = 0;
|
||||
let finalRes = null;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
console.log(`Awaiting Hyperbolic API response... (attempt: ${attempt})`);
|
||||
console.log('Messages:', messages);
|
||||
|
||||
let completionContent = null;
|
||||
|
||||
try {
|
||||
const response = await fetch(this.apiUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`
|
||||
},
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
if (data?.choices?.[0]?.finish_reason === 'length') {
|
||||
throw new Error('Context length exceeded');
|
||||
}
|
||||
|
||||
completionContent = data?.choices?.[0]?.message?.content || '';
|
||||
console.log('Received response from Hyperbolic.');
|
||||
} catch (err) {
|
||||
if (
|
||||
(err.message === 'Context length exceeded' || err.code === 'context_length_exceeded') &&
|
||||
turns.length > 1
|
||||
) {
|
||||
console.log('Context length exceeded, trying again with a shorter context...');
|
||||
return await this.sendRequest(turns.slice(1), systemMessage, stopSeq);
|
||||
} else {
|
||||
console.error(err);
|
||||
completionContent = 'My brain disconnected, try again.';
|
||||
}
|
||||
}
|
||||
|
||||
// Check for <think> blocks
|
||||
const hasOpenTag = completionContent.includes("<think>");
|
||||
const hasCloseTag = completionContent.includes("</think>");
|
||||
|
||||
if ((hasOpenTag && !hasCloseTag)) {
|
||||
console.warn("Partial <think> block detected. Re-generating...");
|
||||
continue; // Retry the request
|
||||
}
|
||||
|
||||
if (hasCloseTag && !hasOpenTag) {
|
||||
completionContent = '<think>' + completionContent;
|
||||
}
|
||||
|
||||
if (hasOpenTag && hasCloseTag) {
|
||||
completionContent = completionContent.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
|
||||
finalRes = completionContent.replace(/<\|separator\|>/g, '*no response*');
|
||||
break; // Valid response obtained—exit loop
|
||||
}
|
||||
|
||||
if (finalRes == null) {
|
||||
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
||||
finalRes = 'I thought too hard, sorry, try again.';
|
||||
}
|
||||
return finalRes;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by Hyperbolic.');
|
||||
}
|
||||
}
|
|
@ -10,45 +10,86 @@ export class Local {
|
|||
}
|
||||
|
||||
async sendRequest(turns, systemMessage) {
|
||||
let model = this.model_name || 'llama3';
|
||||
let model = this.model_name || 'llama3.1'; // Updated to llama3.1, as it is more performant than llama3
|
||||
let messages = strictFormat(turns);
|
||||
messages.unshift({role: 'system', content: systemMessage});
|
||||
let res = null;
|
||||
try {
|
||||
console.log(`Awaiting local response... (model: ${model})`)
|
||||
res = await this.send(this.chat_endpoint, {
|
||||
model: model,
|
||||
messages: messages,
|
||||
stream: false,
|
||||
...(this.params || {})
|
||||
});
|
||||
if (res)
|
||||
res = res['message']['content'];
|
||||
}
|
||||
catch (err) {
|
||||
if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
|
||||
console.log('Context length exceeded, trying again with shorter context.');
|
||||
return await sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||
} else {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
messages.unshift({ role: 'system', content: systemMessage });
|
||||
|
||||
// We'll attempt up to 5 times for models with deepseek-r1-esk reasoning if the <think> tags are mismatched.
|
||||
const maxAttempts = 5;
|
||||
let attempt = 0;
|
||||
let finalRes = null;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
console.log(`Awaiting local response... (model: ${model}, attempt: ${attempt})`);
|
||||
let res = null;
|
||||
try {
|
||||
res = await this.send(this.chat_endpoint, {
|
||||
model: model,
|
||||
messages: messages,
|
||||
stream: false,
|
||||
...(this.params || {})
|
||||
});
|
||||
if (res) {
|
||||
res = res['message']['content'];
|
||||
} else {
|
||||
res = 'No response data.';
|
||||
}
|
||||
} catch (err) {
|
||||
if (err.message.toLowerCase().includes('context length') && turns.length > 1) {
|
||||
console.log('Context length exceeded, trying again with shorter context.');
|
||||
return await this.sendRequest(turns.slice(1), systemMessage);
|
||||
} else {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// If the model name includes "deepseek-r1" or "Andy-3.5-reasoning", then handle the <think> block.
|
||||
const hasOpenTag = res.includes("<think>");
|
||||
const hasCloseTag = res.includes("</think>");
|
||||
|
||||
// If there's a partial mismatch, retry to get a complete response.
|
||||
if ((hasOpenTag && !hasCloseTag)) {
|
||||
console.warn("Partial <think> block detected. Re-generating...");
|
||||
continue;
|
||||
}
|
||||
|
||||
// If </think> is present but <think> is not, prepend <think>
|
||||
if (hasCloseTag && !hasOpenTag) {
|
||||
res = '<think>' + res;
|
||||
}
|
||||
// Changed this so if the model reasons, using <think> and </think> but doesn't start the message with <think>, <think> ges prepended to the message so no error occur.
|
||||
|
||||
// If both tags appear, remove them (and everything inside).
|
||||
if (hasOpenTag && hasCloseTag) {
|
||||
res = res.replace(/<think>[\s\S]*?<\/think>/g, '');
|
||||
}
|
||||
|
||||
finalRes = res;
|
||||
break; // Exit the loop if we got a valid response.
|
||||
}
|
||||
return res;
|
||||
|
||||
if (finalRes == null) {
|
||||
console.warn("Could not get a valid <think> block or normal response after max attempts.");
|
||||
finalRes = 'I thought too hard, sorry, try again.';
|
||||
}
|
||||
return finalRes;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
let model = this.model_name || 'nomic-embed-text';
|
||||
let body = {model: model, prompt: text};
|
||||
let body = { model: model, input: text };
|
||||
let res = await this.send(this.embedding_endpoint, body);
|
||||
return res['embedding']
|
||||
return res['embedding'];
|
||||
}
|
||||
|
||||
async send(endpoint, body) {
|
||||
const url = new URL(endpoint, this.url);
|
||||
let method = 'POST';
|
||||
let headers = new Headers();
|
||||
const request = new Request(url, {method, headers, body: JSON.stringify(body)});
|
||||
const request = new Request(url, { method, headers, body: JSON.stringify(body) });
|
||||
let data = null;
|
||||
try {
|
||||
const res = await fetch(request);
|
||||
|
@ -63,4 +104,4 @@ export class Local {
|
|||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@ import { HuggingFace } from './huggingface.js';
|
|||
import { Qwen } from "./qwen.js";
|
||||
import { Grok } from "./grok.js";
|
||||
import { DeepSeek } from './deepseek.js';
|
||||
import { Hyperbolic } from './hyperbolic.js';
|
||||
import { GLHF } from './glhf.js';
|
||||
import { OpenRouter } from './openrouter.js';
|
||||
|
||||
export class Prompter {
|
||||
|
@ -40,7 +42,6 @@ export class Prompter {
|
|||
}
|
||||
// base overrides default, individual overrides base
|
||||
|
||||
|
||||
this.convo_examples = null;
|
||||
this.coding_examples = null;
|
||||
|
||||
|
@ -120,10 +121,12 @@ export class Prompter {
|
|||
profile = {model: profile};
|
||||
}
|
||||
if (!profile.api) {
|
||||
if (profile.model.includes('gemini'))
|
||||
if (profile.model.includes('openrouter/'))
|
||||
profile.api = 'openrouter'; // must do first because shares names with other models
|
||||
else if (profile.model.includes('ollama/'))
|
||||
profile.api = 'ollama'; // also must do early because shares names with other models
|
||||
else if (profile.model.includes('gemini'))
|
||||
profile.api = 'google';
|
||||
else if (profile.model.includes('openrouter/'))
|
||||
profile.api = 'openrouter'; // must do before others bc shares model names
|
||||
else if (profile.model.includes('gpt') || profile.model.includes('o1')|| profile.model.includes('o3'))
|
||||
profile.api = 'openai';
|
||||
else if (profile.model.includes('claude'))
|
||||
|
@ -136,6 +139,10 @@ export class Prompter {
|
|||
model_profile.api = 'mistral';
|
||||
else if (profile.model.includes("groq/") || profile.model.includes("groqcloud/"))
|
||||
profile.api = 'groq';
|
||||
else if (profile.model.includes("glhf/"))
|
||||
profile.api = 'glhf';
|
||||
else if (profile.model.includes("hyperbolic/"))
|
||||
profile.api = 'hyperbolic';
|
||||
else if (profile.model.includes('novita/'))
|
||||
profile.api = 'novita';
|
||||
else if (profile.model.includes('qwen'))
|
||||
|
@ -144,16 +151,13 @@ export class Prompter {
|
|||
profile.api = 'xai';
|
||||
else if (profile.model.includes('deepseek'))
|
||||
profile.api = 'deepseek';
|
||||
else if (profile.model.includes('mistral'))
|
||||
else if (profile.model.includes('mistral'))
|
||||
profile.api = 'mistral';
|
||||
else if (profile.model.includes('llama3'))
|
||||
profile.api = 'ollama';
|
||||
else
|
||||
throw new Error('Unknown model:', profile.model);
|
||||
throw new Error('Unknown model:', profile.model, 'Did you check the name is correct?'); // Asks the user if the name is correct
|
||||
}
|
||||
return profile;
|
||||
}
|
||||
|
||||
_createModel(profile) {
|
||||
let model = null;
|
||||
if (profile.api === 'google')
|
||||
|
@ -165,13 +169,17 @@ export class Prompter {
|
|||
else if (profile.api === 'replicate')
|
||||
model = new ReplicateAPI(profile.model.replace('replicate/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'ollama')
|
||||
model = new Local(profile.model, profile.url, profile.params);
|
||||
model = new Local(profile.model.replace('ollama/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'mistral')
|
||||
model = new Mistral(profile.model, profile.url, profile.params);
|
||||
else if (profile.api === 'groq')
|
||||
model = new GroqCloudAPI(profile.model.replace('groq/', '').replace('groqcloud/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'huggingface')
|
||||
model = new HuggingFace(profile.model, profile.url, profile.params);
|
||||
else if (profile.api === 'glhf')
|
||||
model = new GLHF(profile.model.replace('glhf/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'hyperbolic')
|
||||
model = new Hyperbolic(profile.model.replace('hyperbolic/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'novita')
|
||||
model = new Novita(profile.model.replace('novita/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'qwen')
|
||||
|
@ -186,7 +194,6 @@ export class Prompter {
|
|||
throw new Error('Unknown API:', profile.api);
|
||||
return model;
|
||||
}
|
||||
|
||||
getName() {
|
||||
return this.profile.name;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue