mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-09-02 12:23:08 +02:00
fix: use text description when vision features are used with a non-vision model
This commit is contained in:
parent
647655f206
commit
430ae24d20
7 changed files with 69 additions and 19 deletions
|
@ -71,7 +71,14 @@ export class VisionInterpreter {
|
||||||
const imageBuffer = fs.readFileSync(`${this.fp}/${filename}.jpg`);
|
const imageBuffer = fs.readFileSync(`${this.fp}/${filename}.jpg`);
|
||||||
const messages = this.agent.history.getHistory();
|
const messages = this.agent.history.getHistory();
|
||||||
res = await this.agent.prompter.vision_model.sendVisionRequest(messages, prompt, imageBuffer);
|
res = await this.agent.prompter.vision_model.sendVisionRequest(messages, prompt, imageBuffer);
|
||||||
log(bot, res);
|
|
||||||
|
if (res == 'Vision is only supported by certain models.') {
|
||||||
|
log(bot, "Vision may not be supported on this model. Using text-based environment description instead.");
|
||||||
|
log(bot, this._nearbyBlocks());
|
||||||
|
} else {
|
||||||
|
log(bot, res);
|
||||||
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
log(this.agent.bot, `Error analyzing image: ${error.message}`);
|
log(this.agent.bot, `Error analyzing image: ${error.message}`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,12 @@ export class Claude {
|
||||||
res = resp.content[0].text;
|
res = resp.content[0].text;
|
||||||
}
|
}
|
||||||
catch (err) {
|
catch (err) {
|
||||||
|
if (err.message.includes("does not support image input")) {
|
||||||
|
res = "Vision is only supported by certain models.";
|
||||||
|
} else {
|
||||||
|
res = "My brain disconnected, try again.";
|
||||||
|
}
|
||||||
console.log(err);
|
console.log(err);
|
||||||
res = 'My brain disconnected, try again.';
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,15 +102,25 @@ export class Gemini {
|
||||||
|
|
||||||
const stop_seq = '***';
|
const stop_seq = '***';
|
||||||
const prompt = toSinglePrompt(turns, systemMessage, stop_seq, 'model');
|
const prompt = toSinglePrompt(turns, systemMessage, stop_seq, 'model');
|
||||||
|
let res = null;
|
||||||
console.log('Awaiting Google API vision response...');
|
try {
|
||||||
const result = await model.generateContent([prompt, imagePart]);
|
console.log('Awaiting Google API vision response...');
|
||||||
const response = await result.response;
|
const result = await model.generateContent([prompt, imagePart]);
|
||||||
const text = response.text();
|
const response = await result.response;
|
||||||
console.log('Received.');
|
const text = response.text();
|
||||||
if (!text.includes(stop_seq)) return text;
|
console.log('Received.');
|
||||||
const idx = text.indexOf(stop_seq);
|
if (!text.includes(stop_seq)) return text;
|
||||||
return text.slice(0, idx);
|
const idx = text.indexOf(stop_seq);
|
||||||
|
res = text.slice(0, idx);
|
||||||
|
} catch (err) {
|
||||||
|
console.log(err);
|
||||||
|
if (err.message.includes("Image input modality is not enabled for models/")) {
|
||||||
|
res = "Vision is only supported by certain models.";
|
||||||
|
} else {
|
||||||
|
res = "An unexpected error occurred, please try again.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
async embed(text) {
|
async embed(text) {
|
||||||
|
|
|
@ -48,6 +48,9 @@ export class GPT {
|
||||||
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
||||||
console.log('Context length exceeded, trying again with shorter context.');
|
console.log('Context length exceeded, trying again with shorter context.');
|
||||||
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||||
|
} else if (err.message.includes('image_url')) {
|
||||||
|
console.log(err);
|
||||||
|
res = 'Vision is only supported by certain models.';
|
||||||
} else {
|
} else {
|
||||||
console.log(err);
|
console.log(err);
|
||||||
res = 'My brain disconnected, try again.';
|
res = 'My brain disconnected, try again.';
|
||||||
|
|
|
@ -43,6 +43,9 @@ export class Grok {
|
||||||
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
if ((err.message == 'Context length exceeded' || err.code == 'context_length_exceeded') && turns.length > 1) {
|
||||||
console.log('Context length exceeded, trying again with shorter context.');
|
console.log('Context length exceeded, trying again with shorter context.');
|
||||||
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
return await this.sendRequest(turns.slice(1), systemMessage, stop_seq);
|
||||||
|
} else if (err.message.includes('The model expects a single `text` element per message.')) {
|
||||||
|
console.log(err);
|
||||||
|
res = 'Vision is only supported by certain models.';
|
||||||
} else {
|
} else {
|
||||||
console.log(err);
|
console.log(err);
|
||||||
res = 'My brain disconnected, try again.';
|
res = 'My brain disconnected, try again.';
|
||||||
|
@ -51,6 +54,24 @@ export class Grok {
|
||||||
// sometimes outputs special token <|separator|>, just replace it
|
// sometimes outputs special token <|separator|>, just replace it
|
||||||
return res.replace(/<\|separator\|>/g, '*no response*');
|
return res.replace(/<\|separator\|>/g, '*no response*');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async sendVisionRequest(messages, systemMessage, imageBuffer) {
|
||||||
|
const imageMessages = [...messages];
|
||||||
|
imageMessages.push({
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: systemMessage },
|
||||||
|
{
|
||||||
|
type: "image_url",
|
||||||
|
image_url: {
|
||||||
|
url: `data:image/jpeg;base64,${imageBuffer.toString('base64')}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
return this.sendRequest(imageMessages, systemMessage);
|
||||||
|
}
|
||||||
|
|
||||||
async embed(text) {
|
async embed(text) {
|
||||||
throw new Error('Embeddings are not supported by Grok.');
|
throw new Error('Embeddings are not supported by Grok.');
|
||||||
|
|
|
@ -23,9 +23,6 @@ export class GroqCloudAPI {
|
||||||
let res = null;
|
let res = null;
|
||||||
try {
|
try {
|
||||||
console.log("Awaiting Groq response...");
|
console.log("Awaiting Groq response...");
|
||||||
if (!this.params.max_tokens) {
|
|
||||||
this.params.max_tokens = 16384;
|
|
||||||
}
|
|
||||||
let completion = await this.groq.chat.completions.create({
|
let completion = await this.groq.chat.completions.create({
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"model": this.model_name || "mixtral-8x7b-32768",
|
"model": this.model_name || "mixtral-8x7b-32768",
|
||||||
|
@ -43,14 +40,19 @@ export class GroqCloudAPI {
|
||||||
|
|
||||||
}
|
}
|
||||||
catch(err) {
|
catch(err) {
|
||||||
|
if (err.message.includes("content must be a string")) {
|
||||||
|
res = "Vision is only supported by certain models.";
|
||||||
|
} else {
|
||||||
|
console.log(this.model_name);
|
||||||
|
res = "My brain disconnected, try again.";
|
||||||
|
}
|
||||||
console.log(err);
|
console.log(err);
|
||||||
res = "My brain just kinda stopped working. Try again.";
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendVisionRequest(messages, systemMessage, imageBuffer) {
|
async sendVisionRequest(messages, systemMessage, imageBuffer) {
|
||||||
const imageMessages = [...messages];
|
const imageMessages = messages.filter(message => message.role !== 'system');
|
||||||
imageMessages.push({
|
imageMessages.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
content: [
|
content: [
|
||||||
|
|
|
@ -56,9 +56,12 @@ export class Mistral {
|
||||||
|
|
||||||
result = response.choices[0].message.content;
|
result = response.choices[0].message.content;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.log(err)
|
if (err.message.includes("A request containing images has been given to a model which does not have the 'vision' capability.")) {
|
||||||
|
result = "Vision is only supported by certain models.";
|
||||||
result = "My brain disconnected, try again.";
|
} else {
|
||||||
|
result = "My brain disconnected, try again.";
|
||||||
|
}
|
||||||
|
console.log(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
Loading…
Add table
Reference in a new issue