diff --git a/src/agent/vision_interpreter.js b/src/agent/vision_interpreter.js index 38b7a44..94be3de 100644 --- a/src/agent/vision_interpreter.js +++ b/src/agent/vision_interpreter.js @@ -37,7 +37,7 @@ export class VisionInterpreter { filename = await camera.capture(); } - if (!this.allow_vision || !this.agent.prompter.chat_model.sendVisionRequest) { + if (!this.allow_vision || !this.agent.prompter.vision_model.sendVisionRequest) { log(this.agent.bot, "Vision is disabled. Using text-based environment description instead."); log(this.agent.bot, this._nearbyBlocks()); } else { @@ -54,7 +54,7 @@ export class VisionInterpreter { let filename = await camera.capture(); - if (!this.allow_vision || !this.agent.prompter.chat_model.sendVisionRequest) { + if (!this.allow_vision || !this.agent.prompter.vision_model.sendVisionRequest) { log(this.agent.bot, "Vision is disabled. Using text-based environment description instead."); log(this.agent.bot, this._nearbyBlocks()); } else { @@ -70,7 +70,7 @@ export class VisionInterpreter { const bot = this.agent.bot; const imageBuffer = fs.readFileSync(`${this.fp}/${filename}.jpg`); const messages = this.agent.history.getHistory(); - res = await this.agent.prompter.chat_model.sendVisionRequest(messages, prompt, imageBuffer); + res = await this.agent.prompter.vision_model.sendVisionRequest(messages, prompt, imageBuffer); log(bot, res); } catch (error) { log(this.agent.bot, `Error analyzing image: ${error.message}`); diff --git a/src/models/mistral.js b/src/models/mistral.js index 01e0ecf..f1f3563 100644 --- a/src/models/mistral.js +++ b/src/models/mistral.js @@ -47,6 +47,7 @@ export class Mistral { ]; messages.push(...strictFormat(turns)); + console.log('Awaiting mistral api response...') const response = await this.#client.chat.complete({ model, messages, diff --git a/src/models/prompter.js b/src/models/prompter.js index a29a0b9..5ac6a1f 100644 --- a/src/models/prompter.js +++ b/src/models/prompter.js @@ -65,6 +65,14 @@ export class Prompter { this.code_model = this.chat_model; } + if (this.profile.vision_model) { + let vision_model_profile = this._selectAPI(this.profile.vision_model); + this.vision_model = this._createModel(vision_model_profile); + } + else { + this.vision_model = this.chat_model; + } + let embedding = this.profile.embedding; if (embedding === undefined) { if (chat_model_profile.api !== 'ollama')