mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-07-28 02:45:27 +02:00
implementing TogetherAI and LMStudio
implementing TogetherAI and LM Studio support. Implementing also Speculative Decoding with LM Studio.
This commit is contained in:
parent
6f8fb1789c
commit
e42fcd044f
8 changed files with 169 additions and 1 deletions
|
@ -64,6 +64,8 @@ You can configure the agent's name, model, and prompts in their profile like `an
|
|||
| `glhf.chat` | `GHLF_API_KEY` | `glhf/hf:meta-llama/Llama-3.1-405B-Instruct` | [docs](https://glhf.chat/user-settings/api) |
|
||||
| `hyperbolic` | `HYPERBOLIC_API_KEY` | `hyperbolic/deepseek-ai/DeepSeek-V3` | [docs](https://docs.hyperbolic.xyz/docs/getting-started) |
|
||||
| `vllm` | n/a | `vllm/llama3` | n/a |
|
||||
| `together` | `TOGETHER_API_KEY` | `together/meta-llama/Llama-3.3-70B-Instruct-Turbo` | [docs](https://docs.together.ai/docs/serverless-models) |
|
||||
| `lmstudio` | n/a | `lmstudio/qwen3-8b` | [docs](https://lmstudio.ai/models) |
|
||||
|
||||
If you use Ollama, to install the models used by default (generation and embedding), execute the following terminal command:
|
||||
`ollama pull llama3.1 && ollama pull nomic-embed-text`
|
||||
|
|
|
@ -13,5 +13,6 @@
|
|||
"GHLF_API_KEY": "",
|
||||
"HYPERBOLIC_API_KEY": "",
|
||||
"NOVITA_API_KEY": "",
|
||||
"OPENROUTER_API_KEY": ""
|
||||
"OPENROUTER_API_KEY": "",
|
||||
"TOGETHER_API_KEY": ""
|
||||
}
|
||||
|
|
10
profiles/lmstudio-speculative.json
Normal file
10
profiles/lmstudio-speculative.json
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"name": "qwen3",
|
||||
"model": {
|
||||
"url": "ws://127.0.0.1:1234",
|
||||
"model": "lmstudio/qwen2.5-14b-instruct",
|
||||
"params": {
|
||||
"draftModel": "qwen2.5-1.5b-instruct"
|
||||
}
|
||||
}
|
||||
}
|
4
profiles/lmstudio.json
Normal file
4
profiles/lmstudio.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"name": "qwen3",
|
||||
"model": "lmstudio/qwen3-8b"
|
||||
}
|
7
profiles/together.json
Normal file
7
profiles/together.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"name": "Together",
|
||||
|
||||
"model": "together/meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
|
||||
"embedding": "openai"
|
||||
}
|
74
src/models/lmstudio.js
Normal file
74
src/models/lmstudio.js
Normal file
|
@ -0,0 +1,74 @@
|
|||
|
||||
import { LMStudioClient, Chat } from '@lmstudio/sdk';
|
||||
|
||||
export class LMStudio {
|
||||
constructor(model_name, url, params) {
|
||||
this.model_name = model_name;
|
||||
this.params = params;
|
||||
|
||||
let config = {};
|
||||
if (url)
|
||||
config.baseURL = url;
|
||||
else
|
||||
config.baseURL = "ws://127.0.0.1:1234";
|
||||
|
||||
this.lmstudio = new LMStudioClient({
|
||||
baseUrl: config.baseURL
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
async sendRequest(turns, systemMessage, stop_seq='***') {
|
||||
let res = null;
|
||||
|
||||
let pack = {
|
||||
reasoningParsing: {
|
||||
enabled: true,
|
||||
startString: "<think>",
|
||||
endString: "</think>"
|
||||
},
|
||||
...this.params || {}
|
||||
};
|
||||
try {
|
||||
let messages = [{'role': 'system', 'content': systemMessage}].concat(turns);
|
||||
let chat = Chat.from(messages);
|
||||
const model = await this.lmstudio.llm.model(this.model_name || "qwen3-8b");
|
||||
let response = await model.respond(chat, pack);
|
||||
const { content, stats, nonReasoningContent } = response;
|
||||
|
||||
if (stats.stopReason === "contextLengthReached") throw new Error("Context length exceeded");
|
||||
if (stats.stopReason === "failed") throw new Error("Failed to generate response");
|
||||
|
||||
res = nonReasoningContent || content;
|
||||
} catch (err) {
|
||||
console.error('Error while awaiting response:', err);
|
||||
// If the error indicates a context-length problem, we can slice the turns array, etc.
|
||||
res = "My brain disconnected, try again.";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async sendVisionRequest(messages, systemMessage, imageBuffer) {
|
||||
const imageMessages = [...messages];
|
||||
const image = await this.lmstudio.files.prepareImageBase64(imageBuffer);
|
||||
imageMessages.push({
|
||||
role: "user",
|
||||
content: systemMessage,
|
||||
image: [image],
|
||||
});
|
||||
return this.sendRequest(imageMessages, systemMessage);
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
if (text.length > 8191)
|
||||
text = text.slice(0, 8191);
|
||||
|
||||
const model = await this.lmstudio.llm.model(this.model_name || "text-embedding-nomic-embed-text-v1.5");
|
||||
const { embedding } = await model.embed(text);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -110,6 +110,10 @@ export class Prompter {
|
|||
this.embedding_model = new HuggingFace(embedding.model, embedding.url);
|
||||
else if (embedding.api === 'novita')
|
||||
this.embedding_model = new Novita(embedding.model, embedding.url);
|
||||
else if (embedding.api === 'lmstudio')
|
||||
this.embedding_model = new LMStudio(embedding.model, embedding.url);
|
||||
else if (embedding.api === 'together')
|
||||
this.embedding_model = new Together(embedding.model, embedding.url);
|
||||
else {
|
||||
this.embedding_model = null;
|
||||
let embedding_name = embedding ? embedding.api : '[NOT SPECIFIED]'
|
||||
|
@ -140,6 +144,10 @@ export class Prompter {
|
|||
profile.api = 'openrouter'; // must do first because shares names with other models
|
||||
else if (profile.model.includes('ollama/'))
|
||||
profile.api = 'ollama'; // also must do early because shares names with other models
|
||||
else if (profile.model.includes('lmstudio/'))
|
||||
profile.api = 'lmstudio'; // also
|
||||
else if (profile.model.includes('together/'))
|
||||
profile.api = 'together'; // also
|
||||
else if (profile.model.includes('gemini'))
|
||||
profile.api = 'google';
|
||||
else if (profile.model.includes('vllm/'))
|
||||
|
@ -209,6 +217,10 @@ export class Prompter {
|
|||
model = new OpenRouter(profile.model.replace('openrouter/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'vllm')
|
||||
model = new VLLM(profile.model.replace('vllm/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'lmstudio')
|
||||
model = new LMStudio(profile.model.replace('lmstudio/', ''), profile.url, profile.params);
|
||||
else if (profile.api === 'together')
|
||||
model = new Together(profile.model.replace('together/', ''), profile.url, profile.params);
|
||||
else
|
||||
throw new Error('Unknown API:', profile.api);
|
||||
return model;
|
||||
|
|
58
src/models/together.js
Normal file
58
src/models/together.js
Normal file
|
@ -0,0 +1,58 @@
|
|||
import OpenAIApi from 'openai';
|
||||
import { getKey, hasKey } from '../utils/keys.js';
|
||||
import { strictFormat } from '../utils/text.js';
|
||||
|
||||
export class Together {
|
||||
constructor(model_name, url) {
|
||||
this.model_name = model_name;
|
||||
|
||||
let config = {};
|
||||
config.baseURL = url || 'https://api.together.xyz/v1';
|
||||
|
||||
const apiKey = getKey('TOGETHER_API_KEY');
|
||||
if (!apiKey) {
|
||||
console.error('Error: TOGETHER_API_KEY not found. Make sure it is set properly.');
|
||||
}
|
||||
|
||||
// Pass the API key to OpenAI compatible Api
|
||||
config.apiKey = apiKey;
|
||||
|
||||
this.openai = new OpenAIApi(config);
|
||||
}
|
||||
|
||||
async sendRequest(turns, systemMessage, stop_seq='*') {
|
||||
let messages = [{ role: 'system', content: systemMessage }, ...turns];
|
||||
messages = strictFormat(messages);
|
||||
|
||||
// Choose a valid model from openrouter.ai (for example, "openai/gpt-4o")
|
||||
const pack = {
|
||||
model: this.model_name || "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
messages,
|
||||
stop: stop_seq
|
||||
};
|
||||
|
||||
let res = null;
|
||||
try {
|
||||
console.log('Awaiting together api response...');
|
||||
let completion = await this.openai.chat.completions.create(pack);
|
||||
if (!completion?.choices?.[0]) {
|
||||
console.error('No completion or choices returned:', completion);
|
||||
return 'No response received.';
|
||||
}
|
||||
if (completion.choices[0].finish_reason === 'length') {
|
||||
throw new Error('Context length exceeded');
|
||||
}
|
||||
console.log('Received.');
|
||||
res = completion.choices[0].message.content;
|
||||
} catch (err) {
|
||||
console.error('Error while awaiting response:', err);
|
||||
// If the error indicates a context-length problem, we can slice the turns array, etc.
|
||||
res = 'My brain disconnected, try again.';
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by TogetherAI.');
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue