mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-08-04 22:35:35 +02:00
Fix Qwen.js to be compatible with OpenAI and add random backoff for rate limiting
This commit is contained in:
parent
5dd57dd429
commit
1a86c3a485
3 changed files with 70 additions and 121 deletions
|
@ -5,13 +5,13 @@
|
||||||
|
|
||||||
"model": {
|
"model": {
|
||||||
"api": "qwen",
|
"api": "qwen",
|
||||||
"url": "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation",
|
"url": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||||
"model": "qwen-max"
|
"model": "qwen-max"
|
||||||
},
|
},
|
||||||
|
|
||||||
"embedding": {
|
"embedding": {
|
||||||
"api": "qwen",
|
"api": "qwen",
|
||||||
"url": "https://dashscope-intl.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding",
|
"url": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||||
"model": "text-embedding-v3"
|
"model": "text-embedding-v3"
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -8,12 +8,14 @@ export class SkillLibrary {
|
||||||
this.skill_docs_embeddings = {};
|
this.skill_docs_embeddings = {};
|
||||||
}
|
}
|
||||||
async initSkillLibrary() {
|
async initSkillLibrary() {
|
||||||
await Promise.all([
|
const skillDocs = getSkillDocs();
|
||||||
...getSkillDocs().map(async (doc) => {
|
const embeddingPromises = skillDocs.map((doc) => {
|
||||||
|
return (async () => {
|
||||||
let func_name_desc = doc.split('\n').slice(0, 2).join('');
|
let func_name_desc = doc.split('\n').slice(0, 2).join('');
|
||||||
this.skill_docs_embeddings[doc] = await this.embedding_model.embed(func_name_desc);
|
this.skill_docs_embeddings[doc] = await this.embedding_model.embed(func_name_desc);
|
||||||
})
|
})();
|
||||||
]);
|
});
|
||||||
|
await Promise.all(embeddingPromises);
|
||||||
}
|
}
|
||||||
|
|
||||||
async getRelevantSkillDocs(message, select_num) {
|
async getRelevantSkillDocs(message, select_num) {
|
||||||
|
|
|
@ -1,134 +1,81 @@
|
||||||
// This code uses Dashscope and HTTP to ensure the latest support for the Qwen model.
|
import OpenAIApi from 'openai';
|
||||||
// Qwen is also compatible with the OpenAI API format;
|
import { getKey, hasKey } from '../utils/keys.js';
|
||||||
|
import { strictFormat } from '../utils/text.js';
|
||||||
import { getKey } from '../utils/keys.js';
|
|
||||||
|
|
||||||
export class Qwen {
|
export class Qwen {
|
||||||
constructor(modelName, url) {
|
constructor(model_name, url) {
|
||||||
this.modelName = modelName;
|
this.model_name = model_name;
|
||||||
this.url = url || 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation';
|
|
||||||
this.apiKey = getKey('QWEN_API_KEY');
|
let config = {};
|
||||||
|
|
||||||
|
config.baseURL = url || 'https://dashscope.aliyuncs.com/compatible-mode/v1';
|
||||||
|
config.apiKey = getKey('QWEN_API_KEY');
|
||||||
|
|
||||||
|
this.openai = new OpenAIApi(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendRequest(turns, systemMessage, stopSeq = '***', retryCount = 0) {
|
async sendRequest(turns, systemMessage, stop_seq='***') {
|
||||||
if (retryCount > 5) {
|
let messages = [{'role': 'system', 'content': systemMessage}].concat(turns);
|
||||||
console.error('Maximum retry attempts reached.');
|
|
||||||
return 'Error: Too many retry attempts.';
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = {
|
messages = strictFormat(messages);
|
||||||
model: this.modelName || 'qwen-plus',
|
|
||||||
input: { messages: [{ role: 'system', content: systemMessage }, ...turns] },
|
const pack = {
|
||||||
parameters: { result_format: 'message', stop: stopSeq },
|
model: this.model_name || "qwen-plus",
|
||||||
|
messages,
|
||||||
|
stop: stop_seq,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add default user message if all messages are 'system' role
|
let res = null;
|
||||||
if (turns.every((msg) => msg.role === 'system')) {
|
|
||||||
data.input.messages.push({ role: 'user', content: 'hello' });
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!data.model || !data.input || !data.input.messages || !data.parameters) {
|
|
||||||
console.error('Invalid request data format:', data);
|
|
||||||
throw new Error('Invalid request data format.');
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await this._makeHttpRequest(this.url, data);
|
console.log('Awaiting Qwen api response...')
|
||||||
const choice = response?.output?.choices?.[0];
|
// console.log('Messages:', messages);
|
||||||
|
let completion = await this.openai.chat.completions.create(pack);
|
||||||
if (choice?.finish_reason === 'length' && turns.length > 0) {
|
if (completion.choices[0].finish_reason == 'length')
|
||||||
return this.sendRequest(turns.slice(1), systemMessage, stopSeq, retryCount + 1);
|
throw new Error('Context length exceeded');
|
||||||
}
|
console.log('Received.')
|
||||||
|
res = completion.choices[0].message.content;
|
||||||
return choice?.message?.content || 'No content received.';
|
|
||||||
} catch (err) {
|
|
||||||
console.error('Error occurred:', err);
|
|
||||||
return 'An error occurred, please try again.';
|
|
||||||
}
|
}
|
||||||
|
catch (err) {
|
||||||
|
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
||||||
|
console.log('Context length exceeded, trying again with shorter context.');
|
||||||
|
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||||
|
} else {
|
||||||
|
console.log(err);
|
||||||
|
res = 'My brain disconnected, try again.';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Why random backoff?
|
||||||
|
// With a 30 requests/second limit on Alibaba Qwen's embedding service,
|
||||||
|
// random backoff helps maximize bandwidth utilization.
|
||||||
async embed(text) {
|
async embed(text) {
|
||||||
if (!text || typeof text !== 'string') {
|
const maxRetries = 5; // Maximum number of retries
|
||||||
console.error('Invalid embedding input: text must be a non-empty string:', text);
|
for (let retries = 0; retries < maxRetries; retries++) {
|
||||||
return 'Invalid embedding input: text must be a non-empty string.';
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = {
|
|
||||||
model: this.modelName,
|
|
||||||
input: { texts: [text] },
|
|
||||||
parameters: { text_type: 'query' },
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!data.model || !data.input || !data.input.texts || !data.parameters) {
|
|
||||||
console.error('Invalid embedding request data format:', data);
|
|
||||||
throw new Error('Invalid embedding request data format.');
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const response = await this._makeHttpRequest(this.url, data);
|
|
||||||
const embedding = response?.output?.embeddings?.[0]?.embedding;
|
|
||||||
|
|
||||||
return embedding || 'No embedding result received.';
|
|
||||||
} catch (err) {
|
|
||||||
console.log('Embed data:', data);
|
|
||||||
console.error('Embed error occurred:', err);
|
|
||||||
return 'An error occurred, please try again.';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async _makeHttpRequest(url, data, maxRetries = 10) {
|
|
||||||
const headers = {
|
|
||||||
'Authorization': `Bearer ${this.apiKey}`,
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
};
|
|
||||||
|
|
||||||
let retryCount = 0;
|
|
||||||
|
|
||||||
while (retryCount < maxRetries) {
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(url, {
|
const { data } = await this.openai.embeddings.create({
|
||||||
method: 'POST',
|
model: this.model_name || "text-embedding-v3",
|
||||||
headers,
|
input: text,
|
||||||
body: JSON.stringify(data),
|
encoding_format: "float",
|
||||||
});
|
});
|
||||||
|
return data[0].embedding;
|
||||||
if (response.ok) {
|
|
||||||
const responseText = await response.text();
|
|
||||||
try {
|
|
||||||
//Task completed successfully
|
|
||||||
return JSON.parse(responseText);
|
|
||||||
} catch (err) {
|
|
||||||
console.error('Failed to parse response JSON:', err);
|
|
||||||
throw new Error('Invalid response JSON format.');
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const errorText = await response.text();
|
|
||||||
|
|
||||||
if (response.status === 429 || response.statusText.includes('Too Many Requests')) {
|
|
||||||
// Handle rate limiting
|
|
||||||
retryCount++;
|
|
||||||
if (retryCount >= maxRetries) {
|
|
||||||
console.error('Exceeded maximum retry attempts, unable to get request result.');
|
|
||||||
throw new Error(`Request failed after ${maxRetries} retries due to rate limiting.`);
|
|
||||||
}
|
|
||||||
//Reached Qwen concurrency limit, waiting in queue
|
|
||||||
const waitTime = Math.random() * 1000; // Random wait between 0 to 1 seconds
|
|
||||||
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
||||||
continue; // Retry the request
|
|
||||||
} else {
|
|
||||||
console.error(`Request failed, status code ${response.status}: ${response.statusText}`);
|
|
||||||
console.error('Error response content:', errorText);
|
|
||||||
throw new Error(`Request failed, status code ${response.status}: ${response.statusText}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// Handle network errors or other exceptions
|
if (err.status === 429) {
|
||||||
console.error('Error occurred during HTTP request:', err);
|
// If a rate limit error occurs, calculate the exponential backoff with a random delay (1-5 seconds)
|
||||||
throw err; // Re-throw the error to be handled by the caller
|
const delay = Math.pow(2, retries) * 1000 + Math.floor(Math.random() * 2000);
|
||||||
|
// console.log(`Rate limit hit, retrying in ${delay} ms...`);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay)); // Wait for the delay before retrying
|
||||||
|
} else {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Exceeded maximum retries
|
// If maximum retries are reached and the request still fails, throw an error
|
||||||
console.error('Exceeded maximum retry attempts, unable to get request result.');
|
throw new Error('Max retries reached, request failed.');
|
||||||
throw new Error(`Request failed after ${maxRetries} retries.`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue