Jules was unable to complete the task in time. Please review the work done so far and provide feedback for Jules to continue.

This commit is contained in:
google-labs-jules[bot] 2025-06-07 08:39:05 +00:00
parent 21481a7861
commit ffe3b0e528
7 changed files with 166 additions and 31 deletions

View file

@ -35,6 +35,7 @@ const settings = {
"allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk "allow_insecure_coding": false, // allows newAction command and model can write/run code on your computer. enable at own risk
"allow_vision": false, // allows vision model to interpret screenshots as inputs "allow_vision": false, // allows vision model to interpret screenshots as inputs
"vision_mode": "on", // "off", "on", or "always_active"
"blocked_actions" : ["!checkBlueprint", "!checkBlueprintLevel", "!getBlueprint", "!getBlueprintLevel"] , // commands to disable and remove from docs. Ex: ["!setMode"] "blocked_actions" : ["!checkBlueprint", "!checkBlueprintLevel", "!getBlueprint", "!getBlueprintLevel"] , // commands to disable and remove from docs. Ex: ["!setMode"]
"code_timeout_mins": -1, // minutes code is allowed to run. -1 for no timeout "code_timeout_mins": -1, // minutes code is allowed to run. -1 for no timeout
"relevant_docs_count": 5, // number of relevant code function docs to select for prompting. -1 for all "relevant_docs_count": 5, // number of relevant code function docs to select for prompting. -1 for all

View file

@ -20,6 +20,7 @@ import { say } from './speak.js';
export class Agent { export class Agent {
async start(profile_fp, load_mem=false, init_message=null, count_id=0, task_path=null, task_id=null) { async start(profile_fp, load_mem=false, init_message=null, count_id=0, task_path=null, task_id=null) {
this.last_sender = null; this.last_sender = null;
this.latestScreenshotPath = null;
this.count_id = count_id; this.count_id = count_id;
if (!profile_fp) { if (!profile_fp) {
throw new Error('No profile filepath provided'); throw new Error('No profile filepath provided');
@ -116,7 +117,7 @@ export class Agent {
this.checkAllPlayersPresent(); this.checkAllPlayersPresent();
console.log('Initializing vision intepreter...'); console.log('Initializing vision intepreter...');
this.vision_interpreter = new VisionInterpreter(this, settings.allow_vision); this.vision_interpreter = new VisionInterpreter(this, settings.vision_mode);
} catch (error) { } catch (error) {
console.error('Error in spawn event:', error); console.error('Error in spawn event:', error);
@ -172,7 +173,8 @@ export class Agent {
if (save_data?.self_prompt) { if (save_data?.self_prompt) {
if (init_message) { if (init_message) {
this.history.add('system', init_message); // Assuming init_message for self_prompt loading doesn't have an image
await this.history.add('system', init_message, null);
} }
await this.self_prompter.handleLoad(save_data.self_prompt, save_data.self_prompting_state); await this.self_prompter.handleLoad(save_data.self_prompt, save_data.self_prompting_state);
} }
@ -246,6 +248,15 @@ export class Agent {
const from_other_bot = convoManager.isOtherAgent(source); const from_other_bot = convoManager.isOtherAgent(source);
if (!self_prompt && !from_other_bot) { // from user, check for forced commands if (!self_prompt && !from_other_bot) { // from user, check for forced commands
if (settings.vision_mode === 'always_active' && this.vision_interpreter && this.vision_interpreter.camera) {
try {
const screenshotFilename = await this.vision_interpreter.camera.capture();
this.latestScreenshotPath = screenshotFilename;
console.log(`[${this.name}] Captured screenshot in always_active mode: ${screenshotFilename}`);
} catch (error) {
console.error(`[${this.name}] Error capturing screenshot in always_active mode:`, error);
}
}
const user_command_name = containsCommand(message); const user_command_name = containsCommand(message);
if (user_command_name) { if (user_command_name) {
if (!commandExists(user_command_name)) { if (!commandExists(user_command_name)) {
@ -256,7 +267,16 @@ export class Agent {
if (user_command_name === '!newAction') { if (user_command_name === '!newAction') {
// all user-initiated commands are ignored by the bot except for this one // all user-initiated commands are ignored by the bot except for this one
// add the preceding message to the history to give context for newAction // add the preceding message to the history to give context for newAction
this.history.add(source, message); // This is the user's message that contains the !newAction command.
// If a screenshot was taken due to always_active, it should be associated here.
let imagePathForNewActionCmd = null;
if (settings.vision_mode === 'always_active' && this.latestScreenshotPath && !self_prompt && !from_other_bot) {
imagePathForNewActionCmd = this.latestScreenshotPath;
}
await this.history.add(source, message, imagePathForNewActionCmd);
if (imagePathForNewActionCmd) {
this.latestScreenshotPath = null; // Consume path
}
} }
let execute_res = await executeCommand(this, message); let execute_res = await executeCommand(this, message);
if (execute_res) if (execute_res)
@ -281,11 +301,29 @@ export class Agent {
behavior_log = '...' + behavior_log.substring(behavior_log.length - MAX_LOG); behavior_log = '...' + behavior_log.substring(behavior_log.length - MAX_LOG);
} }
behavior_log = 'Recent behaviors log: \n' + behavior_log; behavior_log = 'Recent behaviors log: \n' + behavior_log;
await this.history.add('system', behavior_log); await this.history.add('system', behavior_log, null); // Behavior log unlikely to have an image
} }
// Handle other user messages // Handle other user messages (or initial system messages)
await this.history.add(source, message); let imagePathForInitialMessage = null;
if (!self_prompt && !from_other_bot) {
// If it's a user message and a screenshot was auto-captured for always_active
if (settings.vision_mode === 'always_active' && this.latestScreenshotPath) {
imagePathForInitialMessage = this.latestScreenshotPath;
}
} else if (source === 'system' && this.latestScreenshotPath && message.startsWith("You died at position")) {
// Example: System death message might use a path if set by some (future) death-capture logic
// For now, this is illustrative; death messages don't set latestScreenshotPath.
// More relevant if a system message is a direct consequence of an action that *did* set the path.
// However, explicit command result handling is better for those.
// imagePathForInitialMessage = this.latestScreenshotPath; // Generally, system messages here won't have an image unless specific logic sets it.
}
await this.history.add(source, message, imagePathForInitialMessage);
if (imagePathForInitialMessage) {
this.latestScreenshotPath = null; // Consume the path if used
}
this.history.save(); this.history.save();
if (!self_prompt && this.self_prompter.isActive()) // message is from user during self-prompting if (!self_prompt && this.self_prompter.isActive()) // message is from user during self-prompting
@ -306,10 +344,12 @@ export class Agent {
if (command_name) { // contains query or command if (command_name) { // contains query or command
res = truncCommandMessage(res); // everything after the command is ignored res = truncCommandMessage(res); // everything after the command is ignored
this.history.add(this.name, res); // Agent's own message stating the command it will execute
await this.history.add(this.name, res, null);
if (!commandExists(command_name)) { if (!commandExists(command_name)) {
this.history.add('system', `Command ${command_name} does not exist.`); // Agent hallucinated a command
await this.history.add('system', `Command ${command_name} does not exist.`, null);
console.warn('Agent hallucinated command:', command_name) console.warn('Agent hallucinated command:', command_name)
continue; continue;
} }
@ -333,13 +373,24 @@ export class Agent {
console.log('Agent executed:', command_name, 'and got:', execute_res); console.log('Agent executed:', command_name, 'and got:', execute_res);
used_command = true; used_command = true;
if (execute_res) if (execute_res) {
this.history.add('system', execute_res); let imagePathForCommandResult = null;
else // Vision commands (!lookAtPlayer, !lookAtPosition) set latestScreenshotPath in VisionInterpreter.
// This is relevant if mode is 'on' (analysis done, path stored by VI) or 'always_active' (screenshot taken, path stored by VI).
if (command_name && (command_name === '!lookAtPlayer' || command_name === '!lookAtPosition') && this.latestScreenshotPath) {
imagePathForCommandResult = this.latestScreenshotPath;
}
await this.history.add('system', execute_res, imagePathForCommandResult);
if (imagePathForCommandResult) {
this.latestScreenshotPath = null; // Consume the path
}
}
else { // command execution didn't return anything or failed in a way that implies loop break
break; break;
}
} }
else { // conversation response else { // conversation response (no command)
this.history.add(this.name, res); await this.history.add(this.name, res, null); // Agent's text response, no image typically
this.routeResponse(source, res); this.routeResponse(source, res);
break; break;
} }
@ -488,7 +539,8 @@ export class Agent {
cleanKill(msg='Killing agent process...', code=1) { cleanKill(msg='Killing agent process...', code=1) {
this.history.add('system', msg); // Assuming cleanKill messages don't have images
await this.history.add('system', msg, null);
this.bot.chat(code > 1 ? 'Restarting.': 'Exiting.'); this.bot.chat(code > 1 ? 'Restarting.': 'Exiting.');
this.history.save(); this.history.save();
process.exit(code); process.exit(code);
@ -497,7 +549,8 @@ export class Agent {
if (this.task.data) { if (this.task.data) {
let res = this.task.isDone(); let res = this.task.isDone();
if (res) { if (res) {
await this.history.add('system', `Task ended with score : ${res.score}`); // Assuming task end messages don't have images
await this.history.add('system', `Task ended with score : ${res.score}`, null);
await this.history.save(); await this.history.save();
// await new Promise(resolve => setTimeout(resolve, 3000)); // Wait 3 second for save to complete // await new Promise(resolve => setTimeout(resolve, 3000)); // Wait 3 second for save to complete
console.log('Task finished:', res.message); console.log('Task finished:', res.message);

View file

@ -428,6 +428,13 @@ export const actionsList = [
} }
}, },
perform: async function(agent, player_name, direction) { perform: async function(agent, player_name, direction) {
if (agent.vision_interpreter && agent.vision_interpreter.vision_mode === 'off') {
return "Vision commands are disabled as vision mode is 'off'.";
}
// Also check if vision_interpreter or camera is not available if mode is not 'off'
if (agent.vision_interpreter && !agent.vision_interpreter.camera && agent.vision_interpreter.vision_mode !== 'off') {
return "Camera is not available, cannot perform look command.";
}
if (direction !== 'at' && direction !== 'with') { if (direction !== 'at' && direction !== 'with') {
return "Invalid direction. Use 'at' or 'with'."; return "Invalid direction. Use 'at' or 'with'.";
} }
@ -448,6 +455,13 @@ export const actionsList = [
'z': { type: 'int', description: 'z coordinate' } 'z': { type: 'int', description: 'z coordinate' }
}, },
perform: async function(agent, x, y, z) { perform: async function(agent, x, y, z) {
if (agent.vision_interpreter && agent.vision_interpreter.vision_mode === 'off') {
return "Vision commands are disabled as vision mode is 'off'.";
}
// Also check if vision_interpreter or camera is not available if mode is not 'off'
if (agent.vision_interpreter && !agent.vision_interpreter.camera && agent.vision_interpreter.vision_mode !== 'off') {
return "Camera is not available, cannot perform look command.";
}
let result = ""; let result = "";
const actionFn = async () => { const actionFn = async () => {
result = await agent.vision_interpreter.lookAtPosition(x, y, z); result = await agent.vision_interpreter.lookAtPosition(x, y, z);

View file

@ -58,7 +58,7 @@ export class History {
} }
} }
async add(name, content) { async add(name, content, imagePath = null) {
let role = 'assistant'; let role = 'assistant';
if (name === 'system') { if (name === 'system') {
role = 'system'; role = 'system';
@ -67,7 +67,7 @@ export class History {
role = 'user'; role = 'user';
content = `${name}: ${content}`; content = `${name}: ${content}`;
} }
this.turns.push({role, content}); this.turns.push({role, content, imagePath});
if (this.turns.length >= this.max_messages) { if (this.turns.length >= this.max_messages) {
let chunk = this.turns.splice(0, this.summary_chunk_size); let chunk = this.turns.splice(0, this.summary_chunk_size);

View file

@ -3,19 +3,26 @@ import { Camera } from "./camera.js";
import fs from 'fs'; import fs from 'fs';
export class VisionInterpreter { export class VisionInterpreter {
constructor(agent, allow_vision) { constructor(agent, vision_mode) {
this.agent = agent; this.agent = agent;
this.allow_vision = allow_vision; this.vision_mode = vision_mode;
this.fp = './bots/'+agent.name+'/screenshots/'; this.fp = './bots/'+agent.name+'/screenshots/';
if (allow_vision) { if (this.vision_mode !== 'off') {
this.camera = new Camera(agent.bot, this.fp); this.camera = new Camera(agent.bot, this.fp);
} }
} }
async lookAtPlayer(player_name, direction) { async lookAtPlayer(player_name, direction) {
if (!this.allow_vision || !this.agent.prompter.vision_model.sendVisionRequest) { if (this.vision_mode === 'off') {
return "Vision is disabled. Use other methods to describe the environment."; return "Vision is disabled. Use other methods to describe the environment.";
} }
if (!this.camera) {
return "Camera is not initialized. Vision may be set to 'off'.";
}
if (!this.agent.prompter.vision_model.sendVisionRequest && this.vision_mode === 'on') {
return "Vision requests are not enabled for the current model. Cannot analyze image.";
}
let result = ""; let result = "";
const bot = this.agent.bot; const bot = this.agent.bot;
const player = bot.players[player_name]?.entity; const player = bot.players[player_name]?.entity;
@ -26,30 +33,51 @@ export class VisionInterpreter {
let filename; let filename;
if (direction === 'with') { if (direction === 'with') {
await bot.look(player.yaw, player.pitch); await bot.look(player.yaw, player.pitch);
result = `Looking in the same direction as ${player_name}\n`; result = `Looking in the same direction as ${player_name}.\n`;
filename = await this.camera.capture(); filename = await this.camera.capture();
this.agent.latestScreenshotPath = filename;
} else { } else {
await bot.lookAt(new Vec3(player.position.x, player.position.y + player.height, player.position.z)); await bot.lookAt(new Vec3(player.position.x, player.position.y + player.height, player.position.z));
result = `Looking at player ${player_name}\n`; result = `Looking at player ${player_name}.\n`;
filename = await this.camera.capture(); filename = await this.camera.capture();
this.agent.latestScreenshotPath = filename;
} }
return result + `Image analysis: "${await this.analyzeImage(filename)}"`; if (this.vision_mode === 'on') {
return result + `Image analysis: "${await this.analyzeImage(filename)}"`;
} else if (this.vision_mode === 'always_active') {
return result + "Screenshot taken and stored.";
}
// Should not be reached if vision_mode is one of the expected values
return "Error: Unknown vision mode.";
} }
async lookAtPosition(x, y, z) { async lookAtPosition(x, y, z) {
if (!this.allow_vision || !this.agent.prompter.vision_model.sendVisionRequest) { if (this.vision_mode === 'off') {
return "Vision is disabled. Use other methods to describe the environment."; return "Vision is disabled. Use other methods to describe the environment.";
} }
if (!this.camera) {
return "Camera is not initialized. Vision may be set to 'off'.";
}
if (!this.agent.prompter.vision_model.sendVisionRequest && this.vision_mode === 'on') {
return "Vision requests are not enabled for the current model. Cannot analyze image.";
}
let result = ""; let result = "";
const bot = this.agent.bot; const bot = this.agent.bot;
await bot.lookAt(new Vec3(x, y + 2, z)); await bot.lookAt(new Vec3(x, y + 2, z)); // lookAt requires y to be eye level, so +2 from feet
result = `Looking at coordinate ${x}, ${y}, ${z}\n`; result = `Looking at coordinate ${x}, ${y}, ${z}.\n`;
let filename = await this.camera.capture(); let filename = await this.camera.capture();
this.agent.latestScreenshotPath = filename;
return result + `Image analysis: "${await this.analyzeImage(filename)}"`; if (this.vision_mode === 'on') {
return result + `Image analysis: "${await this.analyzeImage(filename)}"`;
} else if (this.vision_mode === 'always_active') {
return result + "Screenshot taken and stored.";
}
// Should not be reached if vision_mode is one of the expected values
return "Error: Unknown vision mode.";
} }
getCenterBlockInfo() { getCenterBlockInfo() {

View file

@ -31,9 +31,10 @@ export class Gemini {
]; ];
this.genAI = new GoogleGenerativeAI(getKey('GEMINI_API_KEY')); this.genAI = new GoogleGenerativeAI(getKey('GEMINI_API_KEY'));
this.supportsRawImageInput = true;
} }
async sendRequest(turns, systemMessage) { async sendRequest(turns, systemMessage, imageData = null) {
let model; let model;
const modelConfig = { const modelConfig = {
model: this.model_name || "gemini-1.5-flash", model: this.model_name || "gemini-1.5-flash",
@ -64,6 +65,24 @@ export class Gemini {
}); });
} }
if (imageData && contents.length > 0) {
const lastContent = contents[contents.length - 1];
if (lastContent.role === 'user') { // Ensure the image is added to a user turn
lastContent.parts.push({
inline_data: {
mime_type: 'image/jpeg',
data: imageData.toString('base64')
}
});
} else {
// This case should ideally not happen if imageData is tied to a user message.
// If it does, we could append a new user turn with the image,
// or log a warning and send without the image.
// For now, let's assume the last message is the user's if imageData is present.
console.warn('[Gemini] imageData provided, but the last content entry was not from a user. Image not sent.');
}
}
const result = await model.generateContent({ const result = await model.generateContent({
contents, contents,
generationConfig: { generationConfig: {

View file

@ -334,9 +334,29 @@ export class Prompter {
let prompt = this.profile.conversing; let prompt = this.profile.conversing;
prompt = await this.replaceStrings(prompt, messages, this.convo_examples); prompt = await this.replaceStrings(prompt, messages, this.convo_examples);
let generation; let generation;
let imageData = null;
if (settings.vision_mode === 'always_active' && messages.length > 0) {
const lastMessage = messages[messages.length - 1];
// Check if the last message has an imagePath and if the model supports raw image input
if (lastMessage.imagePath && this.chat_model.supportsRawImageInput) {
try {
// Construct the full path to the image file
const agentScreenshotDir = path.join('bots', this.agent.name, 'screenshots');
const imageFullPath = path.join(agentScreenshotDir, lastMessage.imagePath);
console.log(`[Prompter] Attempting to read image for always_active mode: ${imageFullPath}`);
imageData = await fs.readFile(imageFullPath); // Read as buffer
console.log('[Prompter] Image data prepared for chat model.');
} catch (err) {
console.error(`[Prompter] Error reading image file ${lastMessage.imagePath}:`, err);
imageData = null; // Proceed without image data if reading fails
}
}
}
try { try {
generation = await this.chat_model.sendRequest(messages, prompt); generation = await this.chat_model.sendRequest(messages, prompt, imageData);
if (typeof generation !== 'string') { if (typeof generation !== 'string') {
console.error('Error: Generated response is not a string', generation); console.error('Error: Generated response is not a string', generation);
throw new Error('Generated response is not a string'); throw new Error('Generated response is not a string');