From e42fcd044f6a52a02735e94c56ef3c813888c6d7 Mon Sep 17 00:00:00 2001 From: mrelmida Date: Thu, 1 May 2025 18:49:38 +0300 Subject: [PATCH] implementing TogetherAI and LMStudio implementing TogetherAI and LM Studio support. Implementing also Speculative Decoding with LM Studio. --- README.md | 2 + keys.example.json | 3 +- profiles/lmstudio-speculative.json | 10 ++++ profiles/lmstudio.json | 4 ++ profiles/together.json | 7 +++ src/models/lmstudio.js | 74 ++++++++++++++++++++++++++++++ src/models/prompter.js | 12 +++++ src/models/together.js | 58 +++++++++++++++++++++++ 8 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 profiles/lmstudio-speculative.json create mode 100644 profiles/lmstudio.json create mode 100644 profiles/together.json create mode 100644 src/models/lmstudio.js create mode 100644 src/models/together.js diff --git a/README.md b/README.md index f291c2f..f095e14 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,8 @@ You can configure the agent's name, model, and prompts in their profile like `an | `glhf.chat` | `GHLF_API_KEY` | `glhf/hf:meta-llama/Llama-3.1-405B-Instruct` | [docs](https://glhf.chat/user-settings/api) | | `hyperbolic` | `HYPERBOLIC_API_KEY` | `hyperbolic/deepseek-ai/DeepSeek-V3` | [docs](https://docs.hyperbolic.xyz/docs/getting-started) | | `vllm` | n/a | `vllm/llama3` | n/a | +| `together` | `TOGETHER_API_KEY` | `together/meta-llama/Llama-3.3-70B-Instruct-Turbo` | [docs](https://docs.together.ai/docs/serverless-models) | +| `lmstudio` | n/a | `lmstudio/qwen3-8b` | [docs](https://lmstudio.ai/models) | If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command: `ollama pull llama3.1 && ollama pull nomic-embed-text` diff --git a/keys.example.json b/keys.example.json index 99286c5..abbde6d 100644 --- a/keys.example.json +++ b/keys.example.json @@ -13,5 +13,6 @@ "GHLF_API_KEY": "", "HYPERBOLIC_API_KEY": "", "NOVITA_API_KEY": "", - "OPENROUTER_API_KEY": "" + "OPENROUTER_API_KEY": "", + "TOGETHER_API_KEY": "" } diff --git a/profiles/lmstudio-speculative.json b/profiles/lmstudio-speculative.json new file mode 100644 index 0000000..97e15bc --- /dev/null +++ b/profiles/lmstudio-speculative.json @@ -0,0 +1,10 @@ +{ + "name": "qwen3", + "model": { + "url": "ws://127.0.0.1:1234", + "model": "lmstudio/qwen2.5-14b-instruct", + "params": { + "draftModel": "qwen2.5-1.5b-instruct" + } + } +} \ No newline at end of file diff --git a/profiles/lmstudio.json b/profiles/lmstudio.json new file mode 100644 index 0000000..1a02c88 --- /dev/null +++ b/profiles/lmstudio.json @@ -0,0 +1,4 @@ +{ + "name": "qwen3", + "model": "lmstudio/qwen3-8b" +} \ No newline at end of file diff --git a/profiles/together.json b/profiles/together.json new file mode 100644 index 0000000..c2d93e8 --- /dev/null +++ b/profiles/together.json @@ -0,0 +1,7 @@ +{ + "name": "Together", + + "model": "together/meta-llama/Llama-3.3-70B-Instruct-Turbo", + + "embedding": "openai" +} \ No newline at end of file diff --git a/src/models/lmstudio.js b/src/models/lmstudio.js new file mode 100644 index 0000000..1b8d634 --- /dev/null +++ b/src/models/lmstudio.js @@ -0,0 +1,74 @@ + +import { LMStudioClient, Chat } from '@lmstudio/sdk'; + +export class LMStudio { + constructor(model_name, url, params) { + this.model_name = model_name; + this.params = params; + + let config = {}; + if (url) + config.baseURL = url; + else + config.baseURL = "ws://127.0.0.1:1234"; + + this.lmstudio = new LMStudioClient({ + baseUrl: config.baseURL + }); + } + + + async sendRequest(turns, systemMessage, stop_seq='***') { + let res = null; + + let pack = { + reasoningParsing: { + enabled: true, + startString: "", + endString: "" + }, + ...this.params || {} + }; + try { + let messages = [{'role': 'system', 'content': systemMessage}].concat(turns); + let chat = Chat.from(messages); + const model = await this.lmstudio.llm.model(this.model_name || "qwen3-8b"); + let response = await model.respond(chat, pack); + const { content, stats, nonReasoningContent } = response; + + if (stats.stopReason === "contextLengthReached") throw new Error("Context length exceeded"); + if (stats.stopReason === "failed") throw new Error("Failed to generate response"); + + res = nonReasoningContent || content; + } catch (err) { + console.error('Error while awaiting response:', err); + // If the error indicates a context-length problem, we can slice the turns array, etc. + res = "My brain disconnected, try again."; + } + return res; + } + + async sendVisionRequest(messages, systemMessage, imageBuffer) { + const imageMessages = [...messages]; + const image = await this.lmstudio.files.prepareImageBase64(imageBuffer); + imageMessages.push({ + role: "user", + content: systemMessage, + image: [image], + }); + return this.sendRequest(imageMessages, systemMessage); + } + + async embed(text) { + if (text.length > 8191) + text = text.slice(0, 8191); + + const model = await this.lmstudio.llm.model(this.model_name || "text-embedding-nomic-embed-text-v1.5"); + const { embedding } = await model.embed(text); + return embedding; + } + +} + + + diff --git a/src/models/prompter.js b/src/models/prompter.js index e05f5a8..82ce2d9 100644 --- a/src/models/prompter.js +++ b/src/models/prompter.js @@ -110,6 +110,10 @@ export class Prompter { this.embedding_model = new HuggingFace(embedding.model, embedding.url); else if (embedding.api === 'novita') this.embedding_model = new Novita(embedding.model, embedding.url); + else if (embedding.api === 'lmstudio') + this.embedding_model = new LMStudio(embedding.model, embedding.url); + else if (embedding.api === 'together') + this.embedding_model = new Together(embedding.model, embedding.url); else { this.embedding_model = null; let embedding_name = embedding ? embedding.api : '[NOT SPECIFIED]' @@ -140,6 +144,10 @@ export class Prompter { profile.api = 'openrouter'; // must do first because shares names with other models else if (profile.model.includes('ollama/')) profile.api = 'ollama'; // also must do early because shares names with other models + else if (profile.model.includes('lmstudio/')) + profile.api = 'lmstudio'; // also + else if (profile.model.includes('together/')) + profile.api = 'together'; // also else if (profile.model.includes('gemini')) profile.api = 'google'; else if (profile.model.includes('vllm/')) @@ -209,6 +217,10 @@ export class Prompter { model = new OpenRouter(profile.model.replace('openrouter/', ''), profile.url, profile.params); else if (profile.api === 'vllm') model = new VLLM(profile.model.replace('vllm/', ''), profile.url, profile.params); + else if (profile.api === 'lmstudio') + model = new LMStudio(profile.model.replace('lmstudio/', ''), profile.url, profile.params); + else if (profile.api === 'together') + model = new Together(profile.model.replace('together/', ''), profile.url, profile.params); else throw new Error('Unknown API:', profile.api); return model; diff --git a/src/models/together.js b/src/models/together.js new file mode 100644 index 0000000..de56e88 --- /dev/null +++ b/src/models/together.js @@ -0,0 +1,58 @@ +import OpenAIApi from 'openai'; +import { getKey, hasKey } from '../utils/keys.js'; +import { strictFormat } from '../utils/text.js'; + +export class Together { + constructor(model_name, url) { + this.model_name = model_name; + + let config = {}; + config.baseURL = url || 'https://api.together.xyz/v1'; + + const apiKey = getKey('TOGETHER_API_KEY'); + if (!apiKey) { + console.error('Error: TOGETHER_API_KEY not found. Make sure it is set properly.'); + } + + // Pass the API key to OpenAI compatible Api + config.apiKey = apiKey; + + this.openai = new OpenAIApi(config); + } + + async sendRequest(turns, systemMessage, stop_seq='*') { + let messages = [{ role: 'system', content: systemMessage }, ...turns]; + messages = strictFormat(messages); + + // Choose a valid model from openrouter.ai (for example, "openai/gpt-4o") + const pack = { + model: this.model_name || "meta-llama/Llama-3.3-70B-Instruct-Turbo", + messages, + stop: stop_seq + }; + + let res = null; + try { + console.log('Awaiting together api response...'); + let completion = await this.openai.chat.completions.create(pack); + if (!completion?.choices?.[0]) { + console.error('No completion or choices returned:', completion); + return 'No response received.'; + } + if (completion.choices[0].finish_reason === 'length') { + throw new Error('Context length exceeded'); + } + console.log('Received.'); + res = completion.choices[0].message.content; + } catch (err) { + console.error('Error while awaiting response:', err); + // If the error indicates a context-length problem, we can slice the turns array, etc. + res = 'My brain disconnected, try again.'; + } + return res; + } + + async embed(text) { + throw new Error('Embeddings are not supported by TogetherAI.'); + } +} \ No newline at end of file