feat: update skill look to lookAtPlayer & export lookAtPosition

This commit is contained in:
gmuffiness 2025-01-16 13:17:39 +09:00
parent 1be24f4867
commit f5923db43a
5 changed files with 116 additions and 124 deletions

View file

@ -25,8 +25,8 @@
"socket.io": "^4.7.2", "socket.io": "^4.7.2",
"socket.io-client": "^4.7.2", "socket.io-client": "^4.7.2",
"express": "^4.18.2", "express": "^4.18.2",
"three": "0.128.0", "node-canvas-webgl": "^0.2.6",
"node-canvas-webgl": "PrismarineJS/node-canvas-webgl" "three": "^0.128.0"
}, },
"scripts": { "scripts": {
"postinstall": "patch-package", "postinstall": "patch-package",

View file

@ -9,6 +9,8 @@
"bot_responder": "You are a minecraft bot named $NAME that is currently in conversation with another AI bot. Both of you can take actions with the !command syntax, and actions take time to complete. You are currently busy with the following action: '$ACTION' but have received a new message. Decide whether to 'respond' immediately or 'ignore' it and wait for your current action to finish. Be conservative and only respond when necessary, like when you need to change/stop your action, or convey necessary information. Example 1: You:Building a house! !newAction('Build a house.').\nOther Bot: 'Come here!'\nYour decision: ignore\nExample 2: You:Collecting dirt !collectBlocks('dirt',10).\nOther Bot: 'No, collect some wood instead.'\nYour decision: respond\nExample 3: You:Coming to you now. !goToPlayer('billy',3).\nOther Bot: 'What biome are you in?'\nYour decision: respond\nActual Conversation: $TO_SUMMARIZE\nDecide by outputting ONLY 'respond' or 'ignore', nothing else. Your decision:", "bot_responder": "You are a minecraft bot named $NAME that is currently in conversation with another AI bot. Both of you can take actions with the !command syntax, and actions take time to complete. You are currently busy with the following action: '$ACTION' but have received a new message. Decide whether to 'respond' immediately or 'ignore' it and wait for your current action to finish. Be conservative and only respond when necessary, like when you need to change/stop your action, or convey necessary information. Example 1: You:Building a house! !newAction('Build a house.').\nOther Bot: 'Come here!'\nYour decision: ignore\nExample 2: You:Collecting dirt !collectBlocks('dirt',10).\nOther Bot: 'No, collect some wood instead.'\nYour decision: respond\nExample 3: You:Coming to you now. !goToPlayer('billy',3).\nOther Bot: 'What biome are you in?'\nYour decision: respond\nActual Conversation: $TO_SUMMARIZE\nDecide by outputting ONLY 'respond' or 'ignore', nothing else. Your decision:",
"image_conversing": "You are a playful Minecraft bot. Briefly describe the screen you are looking at now.",
"modes": { "modes": {
"self_preservation": true, "self_preservation": true,
"unstuck": true, "unstuck": true,
@ -65,14 +67,20 @@
{"role": "user", "content": "derg: (FROM OTHER BOT)Thanks, see you later!"}, {"role": "user", "content": "derg: (FROM OTHER BOT)Thanks, see you later!"},
{"role": "assistant", "content": "See you later. !endConversation(\"derg\")"} {"role": "assistant", "content": "See you later. !endConversation(\"derg\")"}
], ],
[ [
{"role": "user", "content": "grombo_Xx: What do you see?"}, {"role": "user", "content": "d7297: look here"},
{"role": "assistant", "content": "Let me see... !nearbyBlocks"}, {"role": "assistant", "content": "Okay! !lookAtPlayer(\"d7297\", \"at\")"},
{"role": "system", "content": "NEARBY_BLOCKS\n- oak_log\n- dirt\n- cobblestone"}, {"role": "system", "content": "Code Output:\nLooking at player d7297. There is a giant waterfall behind d7297."},
{"role": "assistant", "content": "I see some oak logs, dirt, and cobblestone."} {"role": "assistant", "content": "Woo, that is a cool waterfall!"}
], ],
[
{"role": "user", "content": "d7297: look there"},
{"role": "assistant", "content": "Okay! !lookAtPlayer(\"d7297\", \"with\")"},
{"role": "system", "content": "Code Output:\nLooking in the same direction as d7297\nThere is a castle made of stone."},
{"role": "assistant", "content": "Wow, that stone castle looks amazing!"}
],
[ [
{"role": "user", "content": "greg: Collect 10 wood"}, {"role": "user", "content": "greg: Collect 10 wood"},
{"role": "assistant", "content": "Let me see what's nearby... !nearbyBlocks"}, {"role": "assistant", "content": "Let me see what's nearby... !nearbyBlocks"},

View file

@ -1,8 +1,6 @@
import * as skills from '../library/skills.js'; import * as skills from '../library/skills.js';
import settings from '../../../settings.js'; import settings from '../../../settings.js';
import convoManager from '../conversation.js'; import convoManager from '../conversation.js';
import fs from 'fs';
import { GPT } from '../../models/gpt.js';
function runAsAction (actionFn, resume = false, timeout = -1) { function runAsAction (actionFn, resume = false, timeout = -1) {
let actionLabel = null; // Will be set on first use let actionLabel = null; // Will be set on first use
@ -410,69 +408,33 @@ export const actionsList = [
} }
}, },
{ {
name: '!takeScreenshot', name: '!lookAtPlayer',
description: 'Takes and saves a screenshot of the specified coordinates.', description: 'Look at a player or look in the same direction as the player.',
params: { params: {
'x': { 'player_name': {
type: 'int', type: 'string',
description: 'x coordinate to capture', description: 'Name of the target player'
optional: true
}, },
'y': { 'direction': {
type: 'int', type: 'string',
description: 'y coordinate to capture', description: 'How to look ("at": look at the player, "with": look in the same direction as the player)',
optional: true enum: ['at', 'with']
},
'z': {
type: 'int',
description: 'z coordinate to capture',
optional: true
},
'filename': {
type: 'string',
description: 'Filename to save (without extension). If not specified, saves with timestamp.',
optional: true
} }
}, },
perform: runAsAction(async (agent, x, y, z, filename) => { perform: runAsAction(async (agent, player_name, direction) => {
await skills.takeScreenshot(agent.bot, x, y, z, filename); await skills.lookAtPlayer(agent, agent.bot, player_name, direction);
}) })
}, },
{ {
name: '!look', name: '!lookAtPosition',
description: 'Takes a screenshot of specified coordinates and analyzes its contents.', description: 'Look at specified coordinates.',
params: { params: {
'x': { 'x': { type: 'int', description: 'x coordinate' },
type: 'int', 'y': { type: 'int', description: 'y coordinate' },
description: 'x coordinate to look at', 'z': { type: 'int', description: 'z coordinate' }
optional: true
},
'y': {
type: 'int',
description: 'y coordinate to look at',
optional: true
},
'z': {
type: 'int',
description: 'z coordinate to look at',
optional: true
}
}, },
perform: runAsAction(async (agent, x, y, z) => { perform: runAsAction(async (agent, x, y, z) => {
await skills.look(agent, x, y, z); await skills.lookAtPosition(agent, agent.bot, x, y, z);
}) })
}, }
// { // commented for now, causes confusion with goal command
// name: '!npcGoal',
// description: 'Set a simple goal for an item or building to automatically work towards. Do not use for complex goals.',
// params: {
// 'name': { type: 'string', description: 'The name of the goal to set. Can be item or building name. If empty will automatically choose a goal.' },
// 'quantity': { type: 'int', description: 'The quantity of the goal to set. Default is 1.', domain: [1, Number.MAX_SAFE_INTEGER] }
// },
// perform: async function (agent, name=null, quantity=1) {
// await agent.npc.setGoal(name, quantity);
// agent.bot.emit('idle'); // to trigger the goal
// return 'Set npc goal: ' + agent.npc.data.curr_goal.name;
// }
// },
]; ];

View file

@ -1,9 +1,9 @@
import * as mc from "../../utils/mcdata.js"; import * as mc from "../../utils/mcdata.js";
import { Camera } from "../../utils/camera.js";
import * as world from "./world.js"; import * as world from "./world.js";
import pf from 'mineflayer-pathfinder'; import pf from 'mineflayer-pathfinder';
import Vec3 from 'vec3'; import Vec3 from 'vec3';
import fs from 'fs'; import fs from 'fs';
import { Camera } from "../../utils/camera.js";
export function log(bot, message) { export function log(bot, message) {
@ -1343,77 +1343,76 @@ export async function activateNearestBlock(bot, type) {
return true; return true;
} }
export async function takeScreenshot(bot, x, y, z, filename=null) { export async function lookAtPlayer(agent, bot, player_name, direction) {
/** /**
* Takes a screenshot from the bot's current view or specified position * Look at a player or look in the same direction as the player
* @param {MinecraftBot} bot, reference to the minecraft bot * @param {MinecraftBot} bot reference to the minecraft bot
* @param {int} x x coordinate to look at (optional) * @param {string} player_name name of the target player
* @param {int} y y coordinate to look at (optional) * @param {string} direction 'at' to look at player, 'with' to look in same direction
* @param {int} z z coordinate to look at (optional) * @returns {Promise<boolean>} whether the look action was successful
* @param {string} filename filename to save (without extension). If not specified, saves with timestamp
* @returns {Promise<boolean>} whether the screenshot was successful
* @example * @example
* await skills.takeScreenshot(bot, { name: 'my_screenshot', x: 100, y: 65, z: -200 }); * await skills.lookAtPlayer(bot, "player1", "at");
* await skills.lookAtPlayer(bot, "player1", "with");
**/ **/
try {
bot.camera = new Camera(bot);
await new Promise(resolve => bot.camera.once('ready', resolve));
await bot.lookAt(new Vec3(x, y, z)); const player = bot.players[player_name]?.entity;
await new Promise(resolve => setTimeout(resolve, 500)); if (!player) {
log(bot, `Could not find player ${player_name}`);
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
if (filename === null) {
filename = `screenshot_${timestamp}`;
}
await bot.camera.takePicture(filename, x, y, z);
log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
log(bot, `Target coordinates: x:${x}, y:${y}, z:${z}`);
return [true, filename];
} catch (err) {
log(bot, `Failed to take screenshot: ${err.message}`);
return [false, null];
}
}
export async function look(agent, x, y, z) {
const bot = agent.bot;
const history = agent.history;
const [success, filename] = await takeScreenshot(bot, x, y, z);
if (!success) {
log(bot, `Failed to take screenshot: ${filename}`);
return false; return false;
} }
let filename;
if (direction === 'with') {
// Copy player's view direction
await bot.look(player.yaw, player.pitch);
const camera = new Camera(bot);
await new Promise(resolve => setTimeout(resolve, 500));
log(bot, `Looking in the same direction as ${player_name}`);
filename = await camera.captureDirection(player.yaw, player.pitch);
console.log(player.yaw, player.pitch);
// log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
} else {
// Look at player's position
await bot.lookAt(new Vec3(player.position.x, player.position.y + player.height, player.position.z));
const camera = new Camera(bot);
await new Promise(resolve => setTimeout(resolve, 500));
log(bot, `Looking at player ${player_name}`);
filename = await camera.captureTargetPoint(player.position.x, player.position.y + player.height, player.position.z);
// log(bot, `Screenshot saved: bots/${bot.username}/screenshots/${filename}.jpg`);
// log(bot, `Target coordinates: x:${player.position.x}, y:${player.position.y}, z:${player.position.z}`);
}
try { try {
const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`); const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
const base64Image = imageBuffer.toString('base64'); const messages = agent.history.getHistory();
let res = await agent.prompter.promptImageConvo(messages, imageBuffer);
let messages = history.getHistory();
messages.push({
role: "user",
content: [
{ type: "text", text: "Briefly describe the screen you are looking at now." },
{
type: "image_url",
image_url: {
"url": `data:image/jpeg;base64,${base64Image}`,
}
}
]
});
console.log(messages);
let res = await agent.prompter.chat_model.sendRequest(messages, `You are a playful Minecraft bot. Briefly describe the screen you are looking at now.`);
console.log(res);
log(bot, res); log(bot, res);
return true; return true;
} catch (error) { } catch (error) {
log(bot, `Error analyzing image: ${error.message}`); log(bot, `Error analyzing image: ${error.message}`);
return false; return false;
} }
} }
export async function lookAtPosition(agent, bot, x, y, z) {
await bot.lookAt(new Vec3(x, y + 2, z));
const camera = new Camera(bot);
await new Promise(resolve => setTimeout(resolve, 500));
log(bot, `Looking at coordinate ${x, y, z}`);
let filename = await camera.captureTargetPoint(x, y + 2, z);
try {
const imageBuffer = fs.readFileSync(`bots/${bot.username}/screenshots/${filename}.jpg`);
const messages = agent.history.getHistory();
let res = await agent.prompter.promptImageConvo(messages, imageBuffer);
log(bot, res);
return true;
} catch (error) {
log(bot, `Error analyzing image: ${error.message}`);
return false;
}
}

View file

@ -264,6 +264,29 @@ export class Prompter {
return ''; return '';
} }
async promptImageConvo(messages, imageBuffer) {
await this.checkCooldown();
let prompt = this.profile.image_conversing;
let imageMessages = [...messages];
imageMessages.push({
role: "user",
content: [
{ type: "text", text: "Briefly describe the screen you are looking at now." },
{
type: "image_url",
image_url: {
"url": `data:image/jpeg;base64,${imageBuffer.toString('base64')}`,
}
}
]
});
return await this.chat_model.sendRequest(
imageMessages,
prompt
);
}
async promptCoding(messages) { async promptCoding(messages) {
if (this.awaiting_coding) { if (this.awaiting_coding) {
console.warn('Already awaiting coding response, returning no response.'); console.warn('Already awaiting coding response, returning no response.');