From 1be24f4867c2a49d01c591fe2bab8ad4f07a007e Mon Sep 17 00:00:00 2001 From: gmuffiness Date: Wed, 15 Jan 2025 17:26:13 +0900 Subject: [PATCH] feat: add screenshots and look action works on gpt --- package.json | 4 +- settings.js | 4 +- src/agent/commands/actions.js | 55 +++++++++++++++++++++++++ src/agent/library/skills.js | 77 +++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 689f8db..00901de 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,9 @@ "yargs": "^17.7.2", "socket.io": "^4.7.2", "socket.io-client": "^4.7.2", - "express": "^4.18.2" + "express": "^4.18.2", + "three": "0.128.0", + "node-canvas-webgl": "PrismarineJS/node-canvas-webgl" }, "scripts": { "postinstall": "patch-package", diff --git a/settings.js b/settings.js index a4681fa..f8dc1ba 100644 --- a/settings.js +++ b/settings.js @@ -2,7 +2,7 @@ export default { "minecraft_version": "1.20.4", // supports up to 1.21.1 "host": "127.0.0.1", // or "localhost", "your.ip.address.here" - "port": 55916, + "port": 56069, "auth": "offline", // or "microsoft" // the mindserver manages all agents and hosts the UI @@ -25,7 +25,7 @@ export default // using more than 1 profile requires you to /msg each bot indivually ], "load_memory": false, // load memory from previous session - "init_message": "Respond with hello world and your name", // sends to all on spawn + // "init_message": "Respond with hello world and your name", // sends to all on spawn "only_chat_with": [], // users that the bots listen to and send general messages to. if empty it will chat publicly "language": "en", // translate to/from this language. Supports these language names: https://cloud.google.com/translate/docs/languages diff --git a/src/agent/commands/actions.js b/src/agent/commands/actions.js index 34e6693..1c6bbfe 100644 --- a/src/agent/commands/actions.js +++ b/src/agent/commands/actions.js @@ -1,6 +1,8 @@ import * as skills from '../library/skills.js'; import settings from '../../../settings.js'; import convoManager from '../conversation.js'; +import fs from 'fs'; +import { GPT } from '../../models/gpt.js'; function runAsAction (actionFn, resume = false, timeout = -1) { let actionLabel = null; // Will be set on first use @@ -407,6 +409,59 @@ export const actionsList = [ return `Converstaion with ${player_name} ended.`; } }, + { + name: '!takeScreenshot', + description: 'Takes and saves a screenshot of the specified coordinates.', + params: { + 'x': { + type: 'int', + description: 'x coordinate to capture', + optional: true + }, + 'y': { + type: 'int', + description: 'y coordinate to capture', + optional: true + }, + 'z': { + type: 'int', + description: 'z coordinate to capture', + optional: true + }, + 'filename': { + type: 'string', + description: 'Filename to save (without extension). If not specified, saves with timestamp.', + optional: true + } + }, + perform: runAsAction(async (agent, x, y, z, filename) => { + await skills.takeScreenshot(agent.bot, x, y, z, filename); + }) + }, + { + name: '!look', + description: 'Takes a screenshot of specified coordinates and analyzes its contents.', + params: { + 'x': { + type: 'int', + description: 'x coordinate to look at', + optional: true + }, + 'y': { + type: 'int', + description: 'y coordinate to look at', + optional: true + }, + 'z': { + type: 'int', + description: 'z coordinate to look at', + optional: true + } + }, + perform: runAsAction(async (agent, x, y, z) => { + await skills.look(agent, x, y, z); + }) + }, // { // commented for now, causes confusion with goal command // name: '!npcGoal', // description: 'Set a simple goal for an item or building to automatically work towards. Do not use for complex goals.', diff --git a/src/agent/library/skills.js b/src/agent/library/skills.js index be5882f..e492d16 100644 --- a/src/agent/library/skills.js +++ b/src/agent/library/skills.js @@ -2,6 +2,8 @@ import * as mc from "../../utils/mcdata.js"; import * as world from "./world.js"; import pf from 'mineflayer-pathfinder'; import Vec3 from 'vec3'; +import fs from 'fs'; +import { Camera } from "../../utils/camera.js"; export function log(bot, message) { @@ -1340,3 +1342,78 @@ export async function activateNearestBlock(bot, type) { log(bot, `Activated ${type} at x:${block.position.x.toFixed(1)}, y:${block.position.y.toFixed(1)}, z:${block.position.z.toFixed(1)}.`); return true; } + +export async function takeScreenshot(bot, x, y, z, filename=null) { + /** + * Takes a screenshot from the bot's current view or specified position + * @param {MinecraftBot} bot, reference to the minecraft bot + * @param {int} x x coordinate to look at (optional) + * @param {int} y y coordinate to look at (optional) + * @param {int} z z coordinate to look at (optional) + * @param {string} filename filename to save (without extension). If not specified, saves with timestamp + * @returns {Promise} whether the screenshot was successful + * @example + * await skills.takeScreenshot(bot, { name: 'my_screenshot', x: 100, y: 65, z: -200 }); + **/ + + try { + bot.camera = new Camera(bot); + await new Promise(resolve => bot.camera.once('ready', resolve)); + + await bot.lookAt(new Vec3(x, y, z)); + await new Promise(resolve => setTimeout(resolve, 500)); + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + if (filename === null) { + filename = `screenshot_${timestamp}`; + } + await bot.camera.takePicture(filename, x, y, z); + + log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`); + log(bot, `Target coordinates: x:${x}, y:${y}, z:${z}`); + return [true, filename]; + } catch (err) { + log(bot, `Failed to take screenshot: ${err.message}`); + return [false, null]; + } +} + +export async function look(agent, x, y, z) { + const bot = agent.bot; + const history = agent.history; + + const [success, filename] = await takeScreenshot(bot, x, y, z); + if (!success) { + log(bot, `Failed to take screenshot: ${filename}`); + return false; + } + + try { + const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`); + const base64Image = imageBuffer.toString('base64'); + + let messages = history.getHistory(); + messages.push({ + role: "user", + content: [ + { type: "text", text: "Briefly describe the screen you are looking at now." }, + { + type: "image_url", + image_url: { + "url": `data:image/jpeg;base64,${base64Image}`, + } + } + ] + }); + console.log(messages); + + let res = await agent.prompter.chat_model.sendRequest(messages, `You are a playful Minecraft bot. Briefly describe the screen you are looking at now.`); + console.log(res); + + log(bot, res); + return true; + } catch (error) { + log(bot, `Error analyzing image: ${error.message}`); + return false; + } +} \ No newline at end of file