mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-09-01 20:03:09 +02:00
fix: use text description when vision features are used with a non-vision model
This commit is contained in:
parent
647655f206
commit
430ae24d20
7 changed files with 69 additions and 19 deletions
|
@ -71,7 +71,14 @@ export class VisionInterpreter {
|
|||
const imageBuffer = fs.readFileSync(`${this.fp}/${filename}.jpg`);
|
||||
const messages = this.agent.history.getHistory();
|
||||
res = await this.agent.prompter.vision_model.sendVisionRequest(messages, prompt, imageBuffer);
|
||||
log(bot, res);
|
||||
|
||||
if (res == 'Vision is only supported by certain models.') {
|
||||
log(bot, "Vision may not be supported on this model. Using text-based environment description instead.");
|
||||
log(bot, this._nearbyBlocks());
|
||||
} else {
|
||||
log(bot, res);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
log(this.agent.bot, `Error analyzing image: ${error.message}`);
|
||||
}
|
||||
|
|
|
@ -35,8 +35,12 @@ export class Claude {
|
|||
res = resp.content[0].text;
|
||||
}
|
||||
catch (err) {
|
||||
if (err.message.includes("does not support image input")) {
|
||||
res = "Vision is only supported by certain models.";
|
||||
} else {
|
||||
res = "My brain disconnected, try again.";
|
||||
}
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -102,15 +102,25 @@ export class Gemini {
|
|||
|
||||
const stop_seq = '***';
|
||||
const prompt = toSinglePrompt(turns, systemMessage, stop_seq, 'model');
|
||||
|
||||
console.log('Awaiting Google API vision response...');
|
||||
const result = await model.generateContent([prompt, imagePart]);
|
||||
const response = await result.response;
|
||||
const text = response.text();
|
||||
console.log('Received.');
|
||||
if (!text.includes(stop_seq)) return text;
|
||||
const idx = text.indexOf(stop_seq);
|
||||
return text.slice(0, idx);
|
||||
let res = null;
|
||||
try {
|
||||
console.log('Awaiting Google API vision response...');
|
||||
const result = await model.generateContent([prompt, imagePart]);
|
||||
const response = await result.response;
|
||||
const text = response.text();
|
||||
console.log('Received.');
|
||||
if (!text.includes(stop_seq)) return text;
|
||||
const idx = text.indexOf(stop_seq);
|
||||
res = text.slice(0, idx);
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
if (err.message.includes("Image input modality is not enabled for models/")) {
|
||||
res = "Vision is only supported by certain models.";
|
||||
} else {
|
||||
res = "An unexpected error occurred, please try again.";
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
|
|
|
@ -48,6 +48,9 @@ export class GPT {
|
|||
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
||||
console.log('Context length exceeded, trying again with shorter context.');
|
||||
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||
} else if (err.message.includes('image_url')) {
|
||||
console.log(err);
|
||||
res = 'Vision is only supported by certain models.';
|
||||
} else {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
|
|
|
@ -43,6 +43,9 @@ export class Grok {
|
|||
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
||||
console.log('Context length exceeded, trying again with shorter context.');
|
||||
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||
} else if (err.message.includes('The model expects a single `text` element per message.')) {
|
||||
console.log(err);
|
||||
res = 'Vision is only supported by certain models.';
|
||||
} else {
|
||||
console.log(err);
|
||||
res = 'My brain disconnected, try again.';
|
||||
|
@ -51,6 +54,24 @@ export class Grok {
|
|||
// sometimes outputs special token <|separator|>, just replace it
|
||||
return res.replace(/<\|separator\|>/g, '*no response*');
|
||||
}
|
||||
|
||||
async sendVisionRequest(messages, systemMessage, imageBuffer) {
|
||||
const imageMessages = [...messages];
|
||||
imageMessages.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: systemMessage },
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}`
|
||||
}
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
return this.sendRequest(imageMessages, systemMessage);
|
||||
}
|
||||
|
||||
async embed(text) {
|
||||
throw new Error('Embeddings are not supported by Grok.');
|
||||
|
|
|
@ -23,9 +23,6 @@ export class GroqCloudAPI {
|
|||
let res = null;
|
||||
try {
|
||||
console.log("Awaiting Groq response...");
|
||||
if (!this.params.max_tokens) {
|
||||
this.params.max_tokens = 16384;
|
||||
}
|
||||
let completion = await this.groq.chat.completions.create({
|
||||
"messages": messages,
|
||||
"model": this.model_name || "mixtral-8x7b-32768",
|
||||
|
@ -43,14 +40,19 @@ export class GroqCloudAPI {
|
|||
|
||||
}
|
||||
catch(err) {
|
||||
if (err.message.includes("content must be a string")) {
|
||||
res = "Vision is only supported by certain models.";
|
||||
} else {
|
||||
console.log(this.model_name);
|
||||
res = "My brain disconnected, try again.";
|
||||
}
|
||||
console.log(err);
|
||||
res = "My brain just kinda stopped working. Try again.";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async sendVisionRequest(messages, systemMessage, imageBuffer) {
|
||||
const imageMessages = [...messages];
|
||||
const imageMessages = messages.filter(message => message.role !== 'system');
|
||||
imageMessages.push({
|
||||
role: "user",
|
||||
content: [
|
||||
|
|
|
@ -56,9 +56,12 @@ export class Mistral {
|
|||
|
||||
result = response.choices[0].message.content;
|
||||
} catch (err) {
|
||||
console.log(err)
|
||||
|
||||
result = "My brain disconnected, try again.";
|
||||
if (err.message.includes("A request containing images has been given to a model which does not have the 'vision' capability.")) {
|
||||
result = "Vision is only supported by certain models.";
|
||||
} else {
|
||||
result = "My brain disconnected, try again.";
|
||||
}
|
||||
console.log(err);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
Loading…
Add table
Reference in a new issue