implementing TogetherAI and LMStudio

implementing TogetherAI and LM Studio support. Implementing also Speculative Decoding with LM Studio.
This commit is contained in:
mrelmida 2025-05-01 18:49:38 +03:00
parent 6f8fb1789c
commit e42fcd044f
8 changed files with 169 additions and 1 deletions

View file

@ -64,6 +64,8 @@ You can configure the agent's name, model, and prompts in their profile like `an
| `glhf.chat` | `GHLF_API_KEY` | `glhf/hf:meta-llama/Llama-3.1-405B-Instruct` | [docs](https://glhf.chat/user-settings/api) |
| `hyperbolic` | `HYPERBOLIC_API_KEY` | `hyperbolic/deepseek-ai/DeepSeek-V3` | [docs](https://docs.hyperbolic.xyz/docs/getting-started) |
| `vllm` | n/a | `vllm/llama3` | n/a |
| `together` | `TOGETHER_API_KEY` | `together/meta-llama/Llama-3.3-70B-Instruct-Turbo` | [docs](https://docs.together.ai/docs/serverless-models) |
| `lmstudio` | n/a | `lmstudio/qwen3-8b` | [docs](https://lmstudio.ai/models) |
If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command:
`ollama pull llama3.1 && ollama pull nomic-embed-text`

View file

@ -13,5 +13,6 @@
"GHLF_API_KEY": "",
"HYPERBOLIC_API_KEY": "",
"NOVITA_API_KEY": "",
"OPENROUTER_API_KEY": ""
"OPENROUTER_API_KEY": "",
"TOGETHER_API_KEY": ""
}

View file

@ -0,0 +1,10 @@
{
"name": "qwen3",
"model": {
"url": "ws://127.0.0.1:1234",
"model": "lmstudio/qwen2.5-14b-instruct",
"params": {
"draftModel": "qwen2.5-1.5b-instruct"
}
}
}

4
profiles/lmstudio.json Normal file
View file

@ -0,0 +1,4 @@
{
"name": "qwen3",
"model": "lmstudio/qwen3-8b"
}

7
profiles/together.json Normal file
View file

@ -0,0 +1,7 @@
{
"name": "Together",
"model": "together/meta-llama/Llama-3.3-70B-Instruct-Turbo",
"embedding": "openai"
}

74
src/models/lmstudio.js Normal file
View file

@ -0,0 +1,74 @@
import { LMStudioClient, Chat } from '@lmstudio/sdk';
export class LMStudio {
constructor(model_name, url, params) {
this.model_name = model_name;
this.params = params;
let config = {};
if (url)
config.baseURL = url;
else
config.baseURL = "ws://127.0.0.1:1234";
this.lmstudio = new LMStudioClient({
baseUrl: config.baseURL
});
}
async sendRequest(turns, systemMessage, stop_seq='***') {
let res = null;
let pack = {
reasoningParsing: {
enabled: true,
startString: "<think>",
endString: "</think>"
},
...this.params || {}
};
try {
let messages = [{'role': 'system', 'content': systemMessage}].concat(turns);
let chat = Chat.from(messages);
const model = await this.lmstudio.llm.model(this.model_name || "qwen3-8b");
let response = await model.respond(chat, pack);
const { content, stats, nonReasoningContent } = response;
if (stats.stopReason === "contextLengthReached") throw new Error("Context length exceeded");
if (stats.stopReason === "failed") throw new Error("Failed to generate response");
res = nonReasoningContent || content;
} catch (err) {
console.error('Error while awaiting response:', err);
// If the error indicates a context-length problem, we can slice the turns array, etc.
res = "My brain disconnected, try again.";
}
return res;
}
async sendVisionRequest(messages, systemMessage, imageBuffer) {
const imageMessages = [...messages];
const image = await this.lmstudio.files.prepareImageBase64(imageBuffer);
imageMessages.push({
role: "user",
content: systemMessage,
image: [image],
});
return this.sendRequest(imageMessages, systemMessage);
}
async embed(text) {
if (text.length > 8191)
text = text.slice(0, 8191);
const model = await this.lmstudio.llm.model(this.model_name || "text-embedding-nomic-embed-text-v1.5");
const { embedding } = await model.embed(text);
return embedding;
}
}

View file

@ -110,6 +110,10 @@ export class Prompter {
this.embedding_model = new HuggingFace(embedding.model, embedding.url);
else if (embedding.api === 'novita')
this.embedding_model = new Novita(embedding.model, embedding.url);
else if (embedding.api === 'lmstudio')
this.embedding_model = new LMStudio(embedding.model, embedding.url);
else if (embedding.api === 'together')
this.embedding_model = new Together(embedding.model, embedding.url);
else {
this.embedding_model = null;
let embedding_name = embedding ? embedding.api : '[NOT SPECIFIED]'
@ -140,6 +144,10 @@ export class Prompter {
profile.api = 'openrouter'; // must do first because shares names with other models
else if (profile.model.includes('ollama/'))
profile.api = 'ollama'; // also must do early because shares names with other models
else if (profile.model.includes('lmstudio/'))
profile.api = 'lmstudio'; // also
else if (profile.model.includes('together/'))
profile.api = 'together'; // also
else if (profile.model.includes('gemini'))
profile.api = 'google';
else if (profile.model.includes('vllm/'))
@ -209,6 +217,10 @@ export class Prompter {
model = new OpenRouter(profile.model.replace('openrouter/', ''), profile.url, profile.params);
else if (profile.api === 'vllm')
model = new VLLM(profile.model.replace('vllm/', ''), profile.url, profile.params);
else if (profile.api === 'lmstudio')
model = new LMStudio(profile.model.replace('lmstudio/', ''), profile.url, profile.params);
else if (profile.api === 'together')
model = new Together(profile.model.replace('together/', ''), profile.url, profile.params);
else
throw new Error('Unknown API:', profile.api);
return model;

58
src/models/together.js Normal file
View file

@ -0,0 +1,58 @@
import OpenAIApi from 'openai';
import { getKey, hasKey } from '../utils/keys.js';
import { strictFormat } from '../utils/text.js';
export class Together {
constructor(model_name, url) {
this.model_name = model_name;
let config = {};
config.baseURL = url || 'https://api.together.xyz/v1';
const apiKey = getKey('TOGETHER_API_KEY');
if (!apiKey) {
console.error('Error: TOGETHER_API_KEY not found. Make sure it is set properly.');
}
// Pass the API key to OpenAI compatible Api
config.apiKey = apiKey;
this.openai = new OpenAIApi(config);
}
async sendRequest(turns, systemMessage, stop_seq='*') {
let messages = [{ role: 'system', content: systemMessage }, ...turns];
messages = strictFormat(messages);
// Choose a valid model from openrouter.ai (for example, "openai/gpt-4o")
const pack = {
model: this.model_name || "meta-llama/Llama-3.3-70B-Instruct-Turbo",
messages,
stop: stop_seq
};
let res = null;
try {
console.log('Awaiting together api response...');
let completion = await this.openai.chat.completions.create(pack);
if (!completion?.choices?.[0]) {
console.error('No completion or choices returned:', completion);
return 'No response received.';
}
if (completion.choices[0].finish_reason === 'length') {
throw new Error('Context length exceeded');
}
console.log('Received.');
res = completion.choices[0].message.content;
} catch (err) {
console.error('Error while awaiting response:', err);
// If the error indicates a context-length problem, we can slice the turns array, etc.
res = 'My brain disconnected, try again.';
}
return res;
}
async embed(text) {
throw new Error('Embeddings are not supported by TogetherAI.');
}
}