implementing TogetherAI and LMStudio

implementing TogetherAI and LM Studio support. Implementing also Speculative Decoding with LM Studio.
2025-07-28 02:45:27 +02:00 · 2025-05-01 18:49:38 +03:00 · 2025-05-01 18:49:38 +03:00 · e42fcd044f
commit e42fcd044f
parent 6f8fb1789c
8 changed files with 169 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -64,6 +64,8 @@ You can configure the agent's name, model, and prompts in their profile like `an
 | `glhf.chat` | `GHLF_API_KEY` | `glhf/hf:meta-llama/Llama-3.1-405B-Instruct` | [docs](https://glhf.chat/user-settings/api) |
 | `hyperbolic` | `HYPERBOLIC_API_KEY` | `hyperbolic/deepseek-ai/DeepSeek-V3` | [docs](https://docs.hyperbolic.xyz/docs/getting-started) |
 | `vllm` | n/a | `vllm/llama3` | n/a |
+| `together` | `TOGETHER_API_KEY` | `together/meta-llama/Llama-3.3-70B-Instruct-Turbo` | [docs](https://docs.together.ai/docs/serverless-models) |
+| `lmstudio` | n/a | `lmstudio/qwen3-8b` | [docs](https://lmstudio.ai/models) |

 If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command:
 `ollama pull llama3.1 && ollama pull nomic-embed-text`
--- a/keys.example.json
+++ b/keys.example.json
@ -13,5 +13,6 @@
    "GHLF_API_KEY": "",
    "HYPERBOLIC_API_KEY": "",
    "NOVITA_API_KEY": "",
-    "OPENROUTER_API_KEY": ""
+    "OPENROUTER_API_KEY": "",
+    "TOGETHER_API_KEY": ""
 }
--- a/profiles/lmstudio-speculative.json
+++ b/profiles/lmstudio-speculative.json
@ -0,0 +1,10 @@
+{
+    "name": "qwen3",
+    "model": {
+        "url": "ws://127.0.0.1:1234",
+        "model": "lmstudio/qwen2.5-14b-instruct",
+        "params": {
+            "draftModel": "qwen2.5-1.5b-instruct"
+        }
+    }
+}
--- a/profiles/lmstudio.json
+++ b/profiles/lmstudio.json
@ -0,0 +1,4 @@
+{
+    "name": "qwen3",
+    "model": "lmstudio/qwen3-8b"
+}
--- a/profiles/together.json
+++ b/profiles/together.json
@ -0,0 +1,7 @@
+{
+    "name": "Together",
+
+    "model": "together/meta-llama/Llama-3.3-70B-Instruct-Turbo",
+
+    "embedding": "openai"
+}
--- a/src/models/lmstudio.js
+++ b/src/models/lmstudio.js
@ -0,0 +1,74 @@
+
+import { LMStudioClient, Chat } from '@lmstudio/sdk';
+
+export class LMStudio {
+    constructor(model_name, url, params) {
+        this.model_name = model_name;
+        this.params = params;
+
+        let config = {};
+        if (url)
+            config.baseURL = url;
+        else
+            config.baseURL = "ws://127.0.0.1:1234";
+
+        this.lmstudio = new LMStudioClient({
+            baseUrl: config.baseURL
+        });
+    }
+
+
+    async sendRequest(turns, systemMessage, stop_seq='***') {
+        let res = null;
+
+        let pack = {
+            reasoningParsing: {
+                enabled: true,
+                startString: "<think>",
+                endString: "</think>"
+            },
+            ...this.params || {}
+        };
+        try {
+            let messages = [{'role': 'system', 'content': systemMessage}].concat(turns);
+            let chat = Chat.from(messages);
+            const model = await this.lmstudio.llm.model(this.model_name || "qwen3-8b");
+            let response = await model.respond(chat, pack);
+            const { content, stats, nonReasoningContent } = response;
+
+            if (stats.stopReason === "contextLengthReached") throw new Error("Context length exceeded");
+            if (stats.stopReason === "failed") throw new Error("Failed to generate response");
+            
+            res = nonReasoningContent || content;
+        } catch (err) {
+            console.error('Error while awaiting response:', err);
+            // If the error indicates a context-length problem, we can slice the turns array, etc.
+            res = "My brain disconnected, try again.";
+        }
+        return res;
+    }
+
+    async sendVisionRequest(messages, systemMessage, imageBuffer) {
+        const imageMessages = [...messages];
+        const image = await this.lmstudio.files.prepareImageBase64(imageBuffer);
+        imageMessages.push({
+            role: "user",
+            content: systemMessage,
+            image: [image],
+        });
+        return this.sendRequest(imageMessages, systemMessage);
+    }
+
+    async embed(text) {
+        if (text.length > 8191)
+            text = text.slice(0, 8191);
+
+        const model = await this.lmstudio.llm.model(this.model_name || "text-embedding-nomic-embed-text-v1.5");
+        const { embedding } = await model.embed(text);
+        return embedding;
+    }
+
+}
+
+
+
--- a/src/models/prompter.js
+++ b/src/models/prompter.js
@ -110,6 +110,10 @@ export class Prompter {
                this.embedding_model = new HuggingFace(embedding.model, embedding.url);
            else if (embedding.api === 'novita')
                this.embedding_model = new Novita(embedding.model, embedding.url);
+            else if (embedding.api === 'lmstudio')
+                this.embedding_model = new LMStudio(embedding.model, embedding.url);
+            else if (embedding.api === 'together')
+                this.embedding_model = new Together(embedding.model, embedding.url);
            else {
                this.embedding_model = null;
                let embedding_name = embedding ? embedding.api : '[NOT SPECIFIED]'
@ -140,6 +144,10 @@ export class Prompter {
                profile.api = 'openrouter'; // must do first because shares names with other models
            else if (profile.model.includes('ollama/'))
                profile.api = 'ollama'; // also must do early because shares names with other models
+            else if (profile.model.includes('lmstudio/'))
+                profile.api = 'lmstudio'; // also
+            else if (profile.model.includes('together/'))
+                profile.api = 'together'; // also
            else if (profile.model.includes('gemini'))
                profile.api = 'google';
            else if (profile.model.includes('vllm/'))
@ -209,6 +217,10 @@ export class Prompter {
            model = new OpenRouter(profile.model.replace('openrouter/', ''), profile.url, profile.params);
        else if (profile.api === 'vllm')
            model = new VLLM(profile.model.replace('vllm/', ''), profile.url, profile.params);
+        else if (profile.api === 'lmstudio')
+            model = new LMStudio(profile.model.replace('lmstudio/', ''), profile.url, profile.params);
+        else if (profile.api === 'together')
+            model = new Together(profile.model.replace('together/', ''), profile.url, profile.params);
        else
            throw new Error('Unknown API:', profile.api);
        return model;
--- a/src/models/together.js
+++ b/src/models/together.js
@ -0,0 +1,58 @@
+import OpenAIApi from 'openai';
+import { getKey, hasKey } from '../utils/keys.js';
+import { strictFormat } from '../utils/text.js';
+
+export class Together {
+    constructor(model_name, url) {
+        this.model_name = model_name;
+
+        let config = {};
+        config.baseURL = url || 'https://api.together.xyz/v1';
+
+        const apiKey = getKey('TOGETHER_API_KEY');
+        if (!apiKey) {
+            console.error('Error: TOGETHER_API_KEY not found. Make sure it is set properly.');
+        }
+
+        // Pass the API key to OpenAI compatible Api
+        config.apiKey = apiKey; 
+
+        this.openai = new OpenAIApi(config);
+    }
+
+    async sendRequest(turns, systemMessage, stop_seq='*') {
+        let messages = [{ role: 'system', content: systemMessage }, ...turns];
+        messages = strictFormat(messages);
+
+        // Choose a valid model from openrouter.ai (for example, "openai/gpt-4o")
+        const pack = {
+            model: this.model_name || "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+            messages,
+            stop: stop_seq
+        };
+
+        let res = null;
+        try {
+            console.log('Awaiting together api response...');
+            let completion = await this.openai.chat.completions.create(pack);
+            if (!completion?.choices?.[0]) {
+                console.error('No completion or choices returned:', completion);
+                return 'No response received.';
+            }
+            if (completion.choices[0].finish_reason === 'length') {
+                throw new Error('Context length exceeded');
+            }
+            console.log('Received.');
+            res = completion.choices[0].message.content;
+        } catch (err) {
+            console.error('Error while awaiting response:', err);
+            // If the error indicates a context-length problem, we can slice the turns array, etc.
+            res = 'My brain disconnected, try again.';
+        }
+        return res;
+    }
+
+    async embed(text) {
+        throw new Error('Embeddings are not supported by TogetherAI.');
+    }
+}