Merge branch 'main' into improve-coder

2025-08-11 01:35:34 +02:00 · 2025-03-04 15:41:19 -06:00 · 2025-03-04 15:41:19 -06:00 · 783641df3e
commit 783641df3e
parent ca2da30f54 0b372b3c66
8 changed files with 383 additions and 84 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,3 +13,6 @@ services/viaproxy/plugins/**
 services/viaproxy/ViaLoader/**
 services/viaproxy/saves.json
 services/viaproxy/viaproxy.yml
 tmp/
 wandb/
 experiments/
--- a/evaluation_script.py
+++ b/evaluation_script.py
@ -1,9 +1,13 @@
 import argparse
 import json
 import shutil
 import subprocess
 import time
 from datetime import datetime
 import re
 import sys
 import os
 import time
 def read_settings(file_path):
    """Read and parse the settings.js file to get agent profiles."""
@ -80,68 +84,267 @@ def update_results_file(task_id, success_count, total_count, time_taken, experim
        f.write(f"Average time per experiment: {total_time / total_count:.2f} seconds\n")
        f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
 def run_experiment(task_path, task_id, num_exp):
    """Run the specified number of experiments and track results."""
    # Read agent profiles from settings.js
    agents = read_settings(file_path="settings.js")
    print(f"Detected agents: {agents}")
-    # Generate timestamp at the start of experiments
+def set_environment_variable_tmux_session(session_name, key, value):
-    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    """Set an environment variable for the current process."""
-    results_filename = f"results_{task_id}_{timestamp}.txt"
+    subprocess.run(["tmux", "send-keys", "-t", session_name, f"export {key}={value}", "C-m"])
    print(f"Results will be saved to: {results_filename}")
-    success_count = 0
+def launch_parallel_experiments(task_path, 
-    experiment_results = []
+                                num_exp, 
                                exp_name, 
                                num_agents=2, 
                                model="gpt-4o", 
                                num_parallel=1):
-    for exp_num in range(num_exp):
+    with open(task_path, 'r', encoding='utf-8') as file:
-        print(f"\nRunning experiment {exp_num + 1}/{num_exp}")
+        content = file.read()
    json_data = json.loads(content)
-        start_time = time.time()
+    task_ids = json_data.keys()
-        # Run the node command
+    # split the task_ids into num_parallel groups
-        cmd = f"node main.js --task_path {task_path} --task_id {task_id}"
+    task_ids = list(task_ids)
-        try:
+    task_ids_split = [task_ids[i::num_parallel] for i in range(num_parallel)]
            subprocess.run(cmd, shell=True, check=True)
        except subprocess.CalledProcessError as e:
            print(f"Error running experiment: {e}")
            continue
-        # Check if task was successful
+    servers = create_server_files("../server_data/", num_parallel)
-        success = check_task_completion(agents)
+    date_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        if success:
+    experiments_folder = f"experiments/{exp_name}_{date_time}"
-            success_count += 1
+    exp_name = f"{exp_name}_{date_time}"
-            print(f"Experiment {exp_num + 1} successful")
+
    # start wandb
    os.makedirs(experiments_folder, exist_ok=True)
    for i, server in enumerate(servers):
        launch_server_experiment(task_path, task_ids_split[i], num_exp, server, experiments_folder, exp_name)
        time.sleep(5)
 def launch_server_experiment(task_path, 
                             task_ids, 
                             num_exp, 
                             server, 
                             experiments_folder,
                             exp_name="exp", 
                             num_agents=2, 
                             model="gpt-4o"):
    """
    Launch a Minecraft server and run experiments on it.
    @param task_path: Path to the task file
    @param task_ids: IDs of the tasks to run
    @param num_exp: Number of experiments to run
    @param server: Tuple containing server path and port
    @param experiments_folder: Folder to store experiment results
    @param exp_name: Name of the experiment for wandb dataset
    @param num_agents: Number of agents to run
    @param model: Model to use for the agents
    """
    server_path, server_port = server
    edit_file(os.path.join(server_path, "server.properties"), {"server-port": server_port})
    mindserver_port = server_port - 55916 + 8080
    # set up server and agents 
    session_name = str(server_port - 55916)
    if num_agents == 2:
        agent_names = [f"andy_{session_name}", f"jill_{session_name}"]
        models = [model] * 2
    else:
-            print(f"Experiment {exp_num + 1} failed")
+        agent_names = [f"andy_{session_name}", f"jill_{session_name}", f"bob_{session_name}"]
        models = [model] * 3
    make_profiles(agent_names, models)
-        end_time = time.time()
+    # edit_file("settings.js", {"profiles": [f"./{agent}.json" for agent in agent_names]})
-        time_taken = end_time - start_time
+    agent_profiles = [f"./{agent}.json" for agent in agent_names]
    agent_profiles_str = f"\'[\"{agent_profiles[0]}\", \"{agent_profiles[1]}\"]\'"
    print(agent_profiles_str)
    launch_world(server_path, session_name="server_" + session_name, agent_names=agent_names)
-        # Store individual experiment result
+    subprocess.run(['tmux', 'new-session', '-d', '-s', session_name], check=True) 
        experiment_results.append({
            'success': success,
            'time_taken': time_taken
        })
-        # Update results file after each experiment using the constant filename
+    # set environment variables
-        update_results_file(task_id, success_count, exp_num + 1, time_taken, experiment_results, results_filename)
+    set_environment_variable_tmux_session(session_name, "MINECRAFT_PORT", server_port)
    set_environment_variable_tmux_session(session_name, "MINDSERVER_PORT", mindserver_port)
    set_environment_variable_tmux_session(session_name, "PROFILES", agent_profiles_str)
-        # Small delay between experiments
+    script_content = ""
-        time.sleep(1)
+    for task_id in task_ids:
        cmd = f"node main.js --task_path {task_path} --task_id {task_id}"
        cp_cmd = f"cp {agent_names[0]}.json {server_path}bots/{agent_names[0]}/profile.json"
        for _ in range(num_exp):
            script_content += f"{cmd}\n"
            script_content += "sleep 2\n"
            for agent in agent_names:
                cp_cmd = f"cp bots/{agent}/memory.json {experiments_folder}/{task_id}_{agent}_{_}.json"
                script_content += f"{cp_cmd}\n"
                script_content += "sleep 1\n"
                script_content += f"echo 'Uploading {experiments_folder}/{task_id}_{agent}_{_}.json to wandb'\n"
                wandb_cmd = f"wandb artifact put {experiments_folder}/{task_id}_{agent}_{_}.json --name {exp_name}_{task_id}_{agent}_{_} --type dataset"
                script_content += f"echo '{wandb_cmd}'\n"
                script_content += f"{wandb_cmd}\n"
                script_content += "sleep 1\n"
            script_content += "sleep 1\n"
-    final_ratio = success_count / num_exp
+    # Create a temporary shell script file
-    print(f"\nExperiments completed. Final success ratio: {final_ratio:.2f}")
+    script_file = f"./tmp/experiment_script_{session_name}.sh"
    script_dir = os.path.dirname(script_file)
    os.makedirs(script_dir, exist_ok=True)
    # Call the function before writing the script file
    with open(script_file, 'w') as f:
        f.write(script_content)
    script_file_run = "bash " + script_file
    # Execute the shell script using subprocess
    subprocess.run(["tmux", "send-keys", "-t", session_name, script_file_run, "C-m"])
    # subprocess.run(["tmux", "send-keys", "-t", session_name, f"/op {agent_names[0]}", "C-m"])
 def make_profiles(agent_names, models):
    assert len(agent_names) == len(models)
    for index in range(len(agent_names)):
        content = {"name": agent_names[index], "model": models[index], "modes": {"hunting": False}}
        with open(f"{agent_names[index]}.json", 'w') as f:
            json.dump(content, f)
 def create_server_files(source_path, num_copies):
    """Create multiple copies of server files for parallel experiments."""
    print("Creating server files...")
    print(num_copies)
    servers = []
    for i in range(num_copies):
        dest_path = f"../server_data_{i}/"
        copy_server_files(source_path, dest_path)
        print(dest_path)
        edit_file(dest_path + "server.properties", {"server-port": 55916 + i})
        # edit_server_properties_file(dest_path, 55916 + i)
        servers.append((dest_path, 55916 + i))
    return servers
 def edit_file(file, content_dict):
    try:
        with open(file, 'r') as f:
            lines = f.readlines()
        with open(file, 'w') as f:
            for line in lines:
                for key, value in content_dict.items():
                    if line.startswith(key):
                        f.write(f"{key}={value}\n")
                    else:
                        f.write(line)
        print(f"{file} updated with {content_dict}")  
    except Exception as e:
        print(f"Error editing file {file}: {e}")
 def clean_up_server_files(num_copies):
    """Delete server files from multiple locations."""
    for i in range(num_copies):
        dest_path = f"../server_data_{i}/"
        delete_server_files(dest_path)
 def copy_server_files(source_path, dest_path):
    """Copy server files to the specified location."""
    try:
        shutil.copytree(source_path, dest_path)
        print(f"Server files copied to {dest_path}")
    except Exception as e:
        print(f"Error copying server files: {e}")
 def delete_server_files(dest_path):
    """Delete server files from the specified location."""
    try:
        shutil.rmtree(dest_path)
        print(f"Server files deleted from {dest_path}")
    except Exception as e:
        print(f"Error deleting server files: {e}")
 def launch_world(server_path="../server_data/", agent_names=["andy", "jill"], session_name="server"):
    """Launch the Minecraft world."""
    print(server_path)
    cmd = f"cd {server_path} && java -jar server.jar"
    subprocess.run(['tmux', 'new-session', '-d', '-s', session_name], check=True)
    subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"])
    for agent in agent_names:
        subprocess.run(["tmux", "send-keys", "-t", session_name, f"/op {agent}", "C-m"]) 
    time.sleep(5)
 def kill_world(session_name="server"):
    """Kill the Minecraft world."""
    subprocess.run(["tmux", "send-keys", "-t", session_name, "stop", "C-m"])
    time.sleep(5)
    subprocess.run(["tmux", "kill-session", "-t", session_name])
 def detach_process(command):
    """
    Launches a subprocess and detaches from it, allowing it to run independently.
    Args:
        command: A list of strings representing the command to execute, e.g., ['python', 'my_script.py'].
    """
    try:
        # Create a new process group so the child doesn't get signals intended for the parent.
        #  This is crucial for proper detachment.
        kwargs = {}
        if sys.platform == 'win32':
            kwargs.update(creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)  # Windows specific
        process = subprocess.Popen(command, 
                                   stdin=subprocess.PIPE, # Prevent stdin blocking
                                   stdout=subprocess.PIPE, # Redirect stdout
                                   stderr=subprocess.PIPE, # Redirect stderr
                                   close_fds=True,  # Close open file descriptors
                                   **kwargs)
        print(f"Process launched with PID: {process.pid}")
        return process.pid  # Return the PID of the detached process
    except FileNotFoundError:
        print(f"Error: Command not found: {command}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
 def main():
    # edit_settings("settings.js", {"profiles": ["./andy.json", "./jill.json"], "port": 55917})
    # edit_server_properties_file("../server_data/", 55917)
    parser = argparse.ArgumentParser(description='Run Minecraft AI agent experiments')
-    parser.add_argument('task_path', help='Path to the task file')
+    parser.add_argument('--task_path', default="multiagent_crafting_tasks.json", help='Path to the task file')
-    parser.add_argument('task_id', help='ID of the task to run')
+    parser.add_argument('--task_id', default=None, help='ID of the task to run')
-    parser.add_argument('num_exp', type=int, help='Number of experiments to run')
+    parser.add_argument('--num_exp', default=1, type=int, help='Number of experiments to run')
    parser.add_argument('--num_parallel', default=1, type=int, help='Number of parallel servers to run')
    parser.add_argument('--exp_name', default="exp", help='Name of the experiment')
    parser.add_argument('--wandb', action='store_true', help='Whether to use wandb')
    parser.add_argument('--wandb-project', default="minecraft_experiments", help='wandb project name')
    args = parser.parse_args()
-    run_experiment(args.task_path, args.task_id, args.num_exp)
+    if args.wandb:
        import wandb
        wandb.init(project=args.wandb_project, name=args.exp_name)
    # kill all tmux session before starting
    try: 
        subprocess.run(['tmux', 'kill-server'], check=True)
    except: 
        print("No tmux session to kill")
    # delete all server files
    clean_up_server_files(args.num_parallel)
    if args.task_id is None:
        launch_parallel_experiments(args.task_path, num_exp=args.num_exp, exp_name=args.exp_name, num_parallel=args.num_parallel)
    # servers = create_server_files("../server_data/", args.num_parallel)
    # date_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    # experiments_folder = f"{args.exp_name}_{date_time}"
    # os.makedirs(experiments_folder, exist_ok=True)
    # for server in servers:
    #     launch_server_experiment(args.task_path, [args.task_id], args.num_exp, server, experiments_folder)
    #     time.sleep(5)
    # run_experiment(args.task_path, args.task_id, args.num_exp)
 if __name__ == "__main__":
    main()
--- a/example_tasks.json
+++ b/example_tasks.json
@ -59,6 +59,23 @@
        "type": "techtree",
        "timeout": 300
    }, 
    "multiagent_techtree_1_shears": {
        "goal": "Collaborate with other agents to build a shear.",
        "conversation": "Let's collaborate to build a shear.",
        "agent_count": 2,
        "initial_inventory": {
            "0": {
                "iron_ingot": 1
            },
            "1": {
                "iron_ingot": 1
            }
        },
        "target": "shears",
        "number_of_target": 1,
        "type": "techtree",
        "timeout": 60
    },
    "smelt_ingot": {
        "goal": "Smelt 1 iron ingot and 1 copper ingot",
        "agent_count": 1,
--- a/multiagent_crafting_tasks.json
+++ b/multiagent_crafting_tasks.json
@ -0,0 +1,43 @@
 {
    "multiagent_techtree_1_stone_pickaxe": {
        "conversation": "Let's collaborate to build a stone pickaxe",
        "agent_count": 2,
        "initial_inventory": {
            "0": {
                "wooden_pickaxe": 1
            },
            "1": {
                "wooden_axe": 1
            }
        },
        "blocked_actions": {
            "0": [],
            "1": []
        },
        "target": "stone_pickaxe",
        "number_of_target": 1,
        "type": "techtree",
        "timeout": 20
    }, 
    "multiagent_techtree_1_shears": {
        "goal": "Collaborate with other agents to build a shear.",
        "conversation": "Let's collaborate to build a shear.",
        "agent_count": 2,
        "initial_inventory": {
            "0": {
                "iron_ingot": 1
            },
            "1": {
                "iron_ingot": 1
            }
        },
        "blocked_actions": {
            "0": [],
            "1": []
        },
        "target": "shears",
        "number_of_target": 1,
        "type": "techtree",
        "timeout": 20
    }
 }
--- a/package.json
+++ b/package.json
@ -8,7 +8,7 @@
        "canvas": "^3.1.0",
        "express": "^4.18.2",
        "google-translate-api-x": "^10.7.1",
-        "groq-sdk": "^0.5.0",
+        "groq-sdk": "^0.15.0",
        "minecraft-data": "^3.78.0",
        "mineflayer": "^4.23.0",
        "mineflayer-armor-manager": "^2.0.1",
--- a/settings.js
+++ b/settings.js
@ -2,17 +2,17 @@ export default
 {
    "minecraft_version": "1.20.4", // supports up to 1.21.1
    "host": "127.0.0.1", // or "localhost", "your.ip.address.here"
-    "port": 55916,
+    "port": process.env.MINECRAFT_PORT || 55916,
    "auth": "offline", // or "microsoft"
    // the mindserver manages all agents and hosts the UI
    "host_mindserver": true, // if true, the mindserver will be hosted on this machine. otherwise, specify a public IP address
    "mindserver_host": "localhost",
-    "mindserver_port": 8080,
+    "mindserver_port": process.env.MINDSERVER_PORT || 8080,
    // the base profile is shared by all bots for default prompts/examples/modes
    "base_profile": "./profiles/defaults/survival.json", // also see creative.json, god_mode.json
-    "profiles": [
+    "profiles": ((process.env.PROFILES) && JSON.parse(process.env.PROFILES)) || [
        "./andy.json",
        // "./profiles/gpt.json",
        // "./profiles/claude.json",
--- a/src/agent/tasks.js
+++ b/src/agent/tasks.js
@ -36,7 +36,6 @@ export class TaskValidator {
    }
 }
 export class Task {
    constructor(agent, task_path, task_id) {
        this.agent = agent;
@ -50,7 +49,11 @@ export class Task {
            this.taskTimeout = this.data.timeout || 300;
            this.taskStartTime = Date.now();
            this.validator = new TaskValidator(this.data, this.agent);
-            this.blocked_actions = this.data.blocked_actions || [];
+            if (this.data.blocked_actions) {
                this.blocked_actions = this.data.blocked_actions[this.agent.count_id.toString()] || [];
            } else {
                this.blocked_actions = [];
            }
            this.restrict_to_inventory = !!this.data.restrict_to_inventory;
            if (this.data.goal)
                this.blocked_actions.push('!endGoal');
@ -81,11 +84,6 @@ export class Task {
    isDone() {
        if (this.validator && this.validator.validate())
            return {"message": 'Task successful', "code": 2};
        // TODO check for other terminal conditions
        // if (this.task.goal && !this.self_prompter.isActive())
        //     return {"message": 'Agent ended goal', "code": 3};
        // if (this.task.conversation && !inConversation())
        //     return {"message": 'Agent ended conversation', "code": 3};
        if (this.taskTimeout) {
            const elapsedTime = (Date.now() - this.taskStartTime) / 1000;
            if (elapsedTime >= this.taskTimeout) {
@ -105,6 +103,10 @@ export class Task {
        bot.chat(`/clear ${name}`);
        console.log(`Cleared ${name}'s inventory.`);
        //kill all drops
        if (this.agent.count_id === 0) {
            bot.chat(`/kill @e[type=item]`);
        }
        //wait for a bit so inventory is cleared
        await new Promise((resolve) => setTimeout(resolve, 500));
        let initial_inventory = null;
--- a/src/models/groq.js
+++ b/src/models/groq.js
@ -1,53 +1,84 @@
 import Groq from 'groq-sdk'
 import { getKey } from '../utils/keys.js';
 // THIS API IS NOT TO BE CONFUSED WITH GROK!
 // Go to grok.js for that. :)
-// Umbrella class for Mixtral, LLama, Gemma...
+// Umbrella class for everything under the sun... That GroqCloud provides, that is.
 export class GroqCloudAPI {
    constructor(model_name, url, params) {
        this.model_name = model_name;
        this.url = url;
        this.params = params || {};
        // ReplicateAPI theft :3
        if (this.url) {
        // Remove any mention of "tools" from params:
        if (this.params.tools)
            delete this.params.tools;
        // This is just a bit of future-proofing in case we drag Mindcraft in that direction.
        // I'm going to do a sneaky ReplicateAPI theft for a lot of this, aren't I?
        if (this.url)
            console.warn("Groq Cloud has no implementation for custom URLs. Ignoring provided URL.");
-        }
+
        this.groq = new Groq({ apiKey: getKey('GROQCLOUD_API_KEY') });
    }
    async sendRequest(turns, systemMessage, stop_seq=null) {
-        let messages = [{"role": "system", "content": systemMessage}].concat(turns);
+
        let messages = [{"role": "system", "content": systemMessage}].concat(turns); // The standard for GroqCloud is just appending to a messages array starting with the system prompt, but
                                                                                     // this is perfectly acceptable too, and I recommend it. 
                                                                                     // I still feel as though I should note it for any future revisions of MindCraft, though.
        // These variables look odd, but they're for the future. Please keep them intact.
        let raw_res = null;
        let res = null;
        let tool_calls = null;
        try {
            console.log("Awaiting Groq response...");
-            if (!this.params.max_tokens) {
+
-                this.params.max_tokens = 16384;
+            if (this.params.max_tokens) {
                console.warn("GROQCLOUD WARNING: A profile is using `max_tokens`. This is deprecated. Please move to `max_completion_tokens`.");
                this.params.max_completion_tokens = this.params.max_tokens;
                delete this.params.max_tokens;
            }
            if (!this.params.max_completion_tokens) {
                this.params.max_completion_tokens = 8000; // Set it lower. This is a common theme.
            }
            let completion = await this.groq.chat.completions.create({
                "messages": messages,
-                "model": this.model_name || "mixtral-8x7b-32768",
+                "model": this.model_name || "llama-3.3-70b-versatile",
-                "stream": true,
+                "stream": false,
                "stop": stop_seq,
                ...(this.params || {})
            });
-            let temp_res = "";
+            raw_res = completion.choices[0].message;
-            for await (const chunk of completion) {
+            res = raw_res.content;
                temp_res += chunk.choices[0]?.delta?.content || '';
            }
            res = temp_res;
        }
        catch(err) {
            console.log(err);
            res = "My brain just kinda stopped working. Try again.";
        }
        return res;
    }
-    async embed(text) {
+    async embed(_) {
        throw new Error('Embeddings are not supported by Groq.');
    }
 }