mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-07-25 09:25:23 +02:00
79 lines
No EOL
3.1 KiB
JavaScript
79 lines
No EOL
3.1 KiB
JavaScript
import OpenAIApi from 'openai';
|
|
import { getKey, hasKey } from '../utils/keys.js';
|
|
import { strictFormat } from '../utils/text.js';
|
|
|
|
export class Qwen {
|
|
constructor(model_name, url, params) {
|
|
this.model_name = model_name;
|
|
this.params = params;
|
|
let config = {};
|
|
|
|
config.baseURL = url || 'https://dashscope.aliyuncs.com/compatible-mode/v1';
|
|
config.apiKey = getKey('QWEN_API_KEY');
|
|
|
|
this.openai = new OpenAIApi(config);
|
|
}
|
|
|
|
async sendRequest(turns, systemMessage, stop_seq='***') {
|
|
let messages = [{'role': 'system', 'content': systemMessage}].concat(turns);
|
|
|
|
messages = strictFormat(messages);
|
|
|
|
const pack = {
|
|
model: this.model_name || "qwen-plus",
|
|
messages,
|
|
stop: stop_seq,
|
|
...(this.params || {})
|
|
};
|
|
|
|
let res = null;
|
|
try {
|
|
console.log('Awaiting Qwen api response...');
|
|
// console.log('Messages:', messages);
|
|
let completion = await this.openai.chat.completions.create(pack);
|
|
if (completion.choices[0].finish_reason == 'length')
|
|
throw new Error('Context length exceeded');
|
|
console.log('Received.');
|
|
res = completion.choices[0].message.content;
|
|
}
|
|
catch (err) {
|
|
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
|
console.log('Context length exceeded, trying again with shorter context.');
|
|
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
|
} else {
|
|
console.log(err);
|
|
res = 'My brain disconnected, try again.';
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// Why random backoff?
|
|
// With a 30 requests/second limit on Alibaba Qwen's embedding service,
|
|
// random backoff helps maximize bandwidth utilization.
|
|
async embed(text) {
|
|
const maxRetries = 5; // Maximum number of retries
|
|
for (let retries = 0; retries < maxRetries; retries++) {
|
|
try {
|
|
const { data } = await this.openai.embeddings.create({
|
|
model: this.model_name || "text-embedding-v3",
|
|
input: text,
|
|
encoding_format: "float",
|
|
});
|
|
return data[0].embedding;
|
|
} catch (err) {
|
|
if (err.status === 429) {
|
|
// If a rate limit error occurs, calculate the exponential backoff with a random delay (1-5 seconds)
|
|
const delay = Math.pow(2, retries) * 1000 + Math.floor(Math.random() * 2000);
|
|
// console.log(`Rate limit hit, retrying in ${delay} ms...`);
|
|
await new Promise(resolve => setTimeout(resolve, delay)); // Wait for the delay before retrying
|
|
} else {
|
|
throw err;
|
|
}
|
|
}
|
|
}
|
|
// If maximum retries are reached and the request still fails, throw an error
|
|
throw new Error('Max retries reached, request failed.');
|
|
}
|
|
|
|
} |