From 84d8ab0c5eeacb5e44d4454c543095208a85f5f1 Mon Sep 17 00:00:00 2001 From: Isadora White Date: Mon, 21 Apr 2025 16:20:35 -0700 Subject: [PATCH] fixed task paths --- .gitignore | 1 + minecollab.md | 2 + tasks/evaluation_script.py | 436 +++++++++++++------------------------ 3 files changed, 157 insertions(+), 282 deletions(-) diff --git a/.gitignore b/.gitignore index 3610774..7a1b80f 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ tasks/construction_tasks/train_multiagent_construction_tasks.json tasks/construction_tasks/test/** tasks/construction_tasks/train/** server_data* +.DS_Store \ No newline at end of file diff --git a/minecollab.md b/minecollab.md index 5d65e39..37aa779 100644 --- a/minecollab.md +++ b/minecollab.md @@ -1,5 +1,7 @@ # MineCollab +MineCollab is a versatile benchmark for assessing the embodied and collaborative communication abilities of agents. + ## Tasks ### Cooking diff --git a/tasks/evaluation_script.py b/tasks/evaluation_script.py index 22523f9..1111a63 100644 --- a/tasks/evaluation_script.py +++ b/tasks/evaluation_script.py @@ -16,11 +16,6 @@ import socket from tqdm import tqdm import boto3 -# Calculate project root directory -project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -# Define tasks directory -tasks_dir = os.path.dirname(os.path.abspath(__file__)) - BLOCKED_ACTIONS_COOKING = [ '!activate', '!attackPlayer', '!checkBlueprint', '!checkBlueprintLevel', '!clearChat', '!clearFurnace', '!consume', '!craftable', '!discard', @@ -202,10 +197,6 @@ def check_folder_results(folder_path): def read_settings(file_path): """Read and parse the settings.js file to get agent profiles.""" - # Ensure file_path is absolute or relative to project_root - if not os.path.isabs(file_path): - file_path = os.path.join(project_root, file_path) - with open(file_path, 'r', encoding='utf-8') as file: content = file.read() @@ -233,10 +224,7 @@ def read_settings(file_path): def update_keys_json(): """Update the keys.json file with the specified key-value pair.""" - keys_example_path = os.path.join(project_root, "keys.example.json") - keys_path = os.path.join(project_root, "keys.json") - - with open(keys_example_path, 'r', encoding='utf-8') as file: + with open("keys.example.json", 'r', encoding='utf-8') as file: content = file.read() data = json.loads(content) @@ -246,7 +234,7 @@ def update_keys_json(): if env_value: # If the variable exists, update it data[key] = env_value - with open(keys_path, 'w', encoding='utf-8') as file: + with open("keys.json", 'w', encoding='utf-8') as file: json.dump(data, file, indent=4) def set_environment_variable_tmux_session(session_name, key, value): @@ -271,14 +259,6 @@ def launch_parallel_experiments(task_path, block_conversation=False, run_in_tmux=True): - # Resolve relative template_profile path - if not os.path.isabs(template_profile): - template_profile = os.path.join(project_root, template_profile) - - # Resolve relative task_path path - if not os.path.isabs(task_path): - task_path = os.path.join(project_root, task_path) - with open(task_path, 'r', encoding='utf-8') as file: content = file.read() json_data = json.loads(content) @@ -298,9 +278,9 @@ def launch_parallel_experiments(task_path, world_name = "Superflat" if run_in_tmux: - servers = create_server_files("./server_data/", num_parallel, world_name=world_name) + servers = create_server_files("./tasks/server_data/", num_parallel, world_name=world_name) else: - servers = [(f"./server_data_{i}/", 55916 + i) for i in range(num_parallel)] + servers = [(f"./tasks/server_data_{i}/", 55916 + i) for i in range(num_parallel)] date_time = datetime.now().strftime("%m-%d_%H-%M") experiments_folder = f"experiments/{exp_name}_{date_time}" exp_name = f"{exp_name}_{date_time}" @@ -387,16 +367,19 @@ def launch_server_experiment(task_path, block_conversation=False, run_in_tmux=True): - # Resolve relative template_profile path - if not os.path.isabs(template_profile): - template_profile = os.path.join(project_root, template_profile) - - # Resolve relative task_path path - if not os.path.isabs(task_path): - task_path = os.path.join(project_root, task_path) - - experiments_folder = os.path.join(project_root, experiments_folder) - + """ + Launch a Minecraft server and run experiments on it. + @param task_path: Path to the task file + @param task_ids: IDs of the tasks to run + @param num_exp: Number of experiments to run + @param server: Tuple containing server path and port + @param experiments_folder: Folder to store experiment results + @param exp_name: Name of the experiment for wandb dataset + @param num_agents: Number of agents to run + @param model: Model to use for the agents + @param s3: Boolean flag to enable S3 upload + @param bucket_name: Name of the S3 bucket + """ server_path, server_port = server edit_file(os.path.join(server_path, "server.properties"), {"server-port": server_port}) mindserver_port = server_port - 55916 + 8080 @@ -473,59 +456,6 @@ def launch_server_experiment(task_path, session_name=session_name, run_in_tmux=run_in_tmux) - # add the bots as op - # op_script_content = "sleep 5\n\op @p" * 20 - # op_script_file = f"./tmp/op_script_{session_name}.sh" - # make_script_file_and_run(op_script_content, "server_" + session_name, op_script_file) - # blocked_actions = [] - # if not no_pruning: - # if task_type == "cooking": - # blocked_actions = BLOCKED_ACTIONS_COOKING - # elif task_type == "techtree": - # blocked_actions = BLOCKED_ACTIONS_CRAFTING - # elif task_type == "construction": - # blocked_actions = BLOCKED_ACTIONS_CONSTRUCTION - # if block_conversation: - # blocked_actions += ["!endConversation", "!startConversation"] - # set_environment_variable_tmux_session(session_name, "BLOCKED_ACTIONS", blocked_actions) - - - - # script_content = "" - # for task_id in task_ids: - # # Create a separate folder for each task_id - # task_folder = os.path.join(experiments_folder, str(task_id)) - # os.makedirs(task_folder, exist_ok=True) - # assert os.path.exists(task_folder), f"Directory {task_folder} was not created" - # print(f"Created directory: {task_folder}") - - # cmd = f"node main.js --task_path \'{task_path}\' --task_id {task_id}" - # cp_cmd = f"cp {agent_names[0]}.json {server_path}bots/{agent_names[0]}/profile.json" - # for _ in range(num_exp): - # script_content += f"{cmd}\n" - # script_content += "sleep 2\n" - # for agent in agent_names: - # agent_file_path = os.path.join(task_folder, f"{agent}_{_}.json") - # script_content += f"echo 'Saving to {agent_file_path}'\n" - # cp_cmd = f"cp bots/{agent}/memory.json {agent_file_path}" - # script_content += f"echo '{cp_cmd}'\n" - # script_content += f"{cp_cmd}\n" - # script_content += "sleep 1\n" - # if s3: - # s3_cmd = f"aws s3 cp {agent_file_path} s3://{s3_path}/{task_id}/{agent}_{_}.json" - # script_content += f"echo 'Uploading {agent_file_path} to S3'\n" - # script_content += f"echo '{s3_cmd}'\n" - # script_content += f"{s3_cmd}\n" - # script_content += "sleep 1\n" - # script_content += f"sleep 10\n" - # if s3: - # for agent in agent_names: - # script_content += f"aws s3 cp bots/{agent} s3://{s3_path}/bots/{agent} --recursive\n" - - # # Create a temporary shell script file - # script_file = f"./tmp/experiment_script_{session_name}.sh" - # make_script_file_and_run(script_content, script_file, session_name=session_name, run_in_tmux=True) - def run_script(task_path, task_ids, num_exp, @@ -536,66 +466,55 @@ def run_script(task_path, s3_path="mindcraft-experiments", session_name="0", run_in_tmux=True,): - - # Resolve relative task_path path - if not os.path.isabs(task_path): - task_path = os.path.join(project_root, task_path) - - # Resolve relative experiments_folder path - if not os.path.isabs(experiments_folder): - experiments_folder = os.path.join(project_root, experiments_folder) - - # Resolve relative server_path path - if not os.path.isabs(server_path): - server_path = os.path.join(project_root, server_path) + script_content = "" + for task_id in task_ids: + # Create a separate folder for each task_id + task_folder = os.path.join(experiments_folder, str(task_id)) + os.makedirs(task_folder, exist_ok=True) + assert os.path.exists(task_folder), f"Directory {task_folder} was not created" + print(f"Created directory: {task_folder}") - # Construct command (assuming main.js is in root) - main_js_path = os.path.join(project_root, "main.js") - - for exp in range(num_exp): - for task_id in task_ids: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - exp_folder = os.path.join(experiments_folder, f"{task_id}_{exp}_{timestamp}") - - # Need to create the folder first if using subprocess and cwd - os.makedirs(exp_folder, exist_ok=True) + cmd = f"node main.js --task_path \'{task_path}\' --task_id {task_id}" + cp_cmd = f"cp {agent_names[0]}.json {server_path}bots/{agent_names[0]}/profile.json" + for _ in range(num_exp): + script_content += f"{cmd}\n" + script_content += "sleep 2\n" + for agent in agent_names: + agent_file_path = os.path.join(task_folder, f"{agent}_{_}.json") + script_content += f"echo 'Saving to {agent_file_path}'\n" + cp_cmd = f"cp bots/{agent}/memory.json {agent_file_path}" + script_content += f"echo '{cp_cmd}'\n" + script_content += f"{cp_cmd}\n" + script_content += "sleep 1\n" + if s3: + s3_cmd = f"aws s3 cp {agent_file_path} s3://{s3_path}/{task_id}/{agent}_{_}.json" + script_content += f"echo 'Uploading {agent_file_path} to S3'\n" + script_content += f"echo '{s3_cmd}'\n" + script_content += f"{s3_cmd}\n" + script_content += "sleep 1\n" + script_content += f"sleep 10\n" + if s3: + for agent in agent_names: + script_content += f"aws s3 cp bots/{agent} s3://{s3_path}/bots/{agent} --recursive\n" + + # Create a temporary shell script file + script_file = f"./tmp/experiment_script_{session_name}.sh" + make_script_file_and_run(script_content, script_file, session_name=session_name, run_in_tmux=run_in_tmux) - cmd = [ - "node", main_js_path, - "--task_path", task_path, - "--task_id", task_id, - "--agent_name", agent_names[0], - "--agent_name", agent_names[1], - "--server", server_path, - "--logs_path", exp_folder, # Ensure logs_path is absolute or handled by main.js relative to root - ] - - if s3: - cmd.extend(["--s3", "--s3_path", s3_path]) - - script_content = " ".join(cmd) - make_script_file_and_run(script_content, file_name=f"exp_{exp}_{task_id}_{timestamp}.sh", session_name=session_name, run_in_tmux=run_in_tmux) - - print(f"Launched Experiment {exp+1}/{num_exp} for Task {task_id}") - time.sleep(1) # Stagger launches def make_ops(agent_names, session_name): """Make the agents operators in the Minecraft world.""" print('Making agents operators...') - # Construct path to example tasks relative to project_root - example_task_path = os.path.join(project_root, "tasks/example_tasks.json") - cmd = f"node {os.path.join(project_root, 'main.js')} --task_path {example_task_path} --task_id debug_{len(agent_names)}_agent_timeout" + cmd = f"node main.js --task_path tasks/example_tasks.json --task_id debug_{len(agent_names)}_agent_timeout" - subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"], cwd=project_root) + subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"]) time.sleep(30) subprocess.run(["tmux", "send-keys", "-t", "server_" + session_name, f"/op @a", "C-m"]) - # Check ops file inside the correct tasks/server_data/X directory - ops_file_path = os.path.join(tasks_dir, "server_data", session_name, "ops.json") - agents_op = check_agent_ops(agent_names, ops_file=ops_file_path) + agents_op = check_agent_ops(agent_names, ops_file=f"./tasks/server_data_{session_name}/ops.json") if agents_op: print("Agents are operators! You are good to go :D") else: @@ -603,15 +522,6 @@ def make_ops(agent_names, session_name): make_ops(agent_names, session_name) def check_agent_ops(agent_names, ops_file="ops.json"): - """Check if agents are OPs on the server.""" - # ops_file path is now provided absolute by caller (make_ops) - # if not os.path.isabs(ops_file): - # ops_file = os.path.join(project_root, ops_file) # OLD LOGIC - - if not os.path.exists(ops_file): - print(f"Error: ops.json file not found: {ops_file}") - return False - with open(ops_file, "r") as f: ops_data = json.load(f) @@ -626,39 +536,26 @@ def make_script_file_and_run(script_content, file_name, session_name="0", run_in_tmux=True): - # Create script inside tasks/tmp/ - script_base_dir = os.path.join(tasks_dir, "tmp") - os.makedirs(script_base_dir, exist_ok=True) - script_abs_path = os.path.join(script_base_dir, file_name) - - script_dir = os.path.dirname(script_abs_path) - # os.makedirs(script_dir, exist_ok=True) # Already handled by script_base_dir creation + script_dir = os.path.dirname(file_name) + os.makedirs(script_dir, exist_ok=True) assert os.path.exists(script_dir), f"Script directory {script_dir} was not created" print(f"Created script directory: {script_dir}") # Call the function before writing the script file - with open(script_abs_path, 'w') as f: + with open(file_name, 'w') as f: f.write(script_content) - assert os.path.exists(script_abs_path), f"Script file {script_abs_path} was not created" + assert os.path.exists(file_name), f"Script file {file_name} was not created" - script_file_run = "bash " + script_abs_path + script_file_run = "bash " + file_name # Execute the shell script using subprocess - # Run subprocess from project_root so node main.js etc work if run_in_tmux: - subprocess.run(["tmux", "send-keys", "-t", session_name, script_file_run, "C-m"], cwd=project_root) + subprocess.run(["tmux", "send-keys", "-t", session_name, script_file_run, "C-m"]) else: - subprocess.run(script_file_run.split(), cwd=project_root) + subprocess.run(script_file_run.split()) def make_profiles(agent_names, models, apis, template_profile="profiles/collab_profile.json", url="http://127.0.0.1:8000/v1"): - """Generate profile JSON files for each agent.""" - - # Resolve relative template_profile path relative to project_root - if template_profile.startswith("profiles/") and not os.path.isabs(template_profile): - template_profile = os.path.join(project_root, template_profile) - elif not os.path.isabs(template_profile): - # Assume relative to tasks dir if not in profiles/ structure - template_profile = os.path.join(tasks_dir, template_profile) + assert len(agent_names) == len(models) with open(template_profile, 'r') as f: content = f.read() @@ -682,34 +579,19 @@ def make_profiles(agent_names, models, apis, template_profile="profiles/collab_p else: profile["model"] = models[index] - # Save profiles inside tasks/profiles/ - profiles_output_dir = os.path.join(tasks_dir, "profiles") - os.makedirs(profiles_output_dir, exist_ok=True) - profile_name = f"{agent_names[index]}.json" - profile_path = os.path.join(profiles_output_dir, profile_name) - - with open(profile_path, 'w', encoding='utf-8') as outfile: - json.dump(profile, outfile, indent=4) + with open(f"{agent_names[index]}.json", 'w') as f: + json.dump(profile, f, indent=4) def create_server_files(source_path, num_copies, world_name="Forest"): - """Create multiple copies of the server files inside tasks/server_data.""" - servers = [] # Define servers list - # Ensure source_path is relative to project_root if not absolute - if not os.path.isabs(source_path): - source_path = os.path.join(project_root, source_path) - - # Base dir inside tasks/ - server_base_dir = os.path.join(tasks_dir, "server_data") - os.makedirs(server_base_dir, exist_ok=True) - + """Create multiple copies of server files for parallel experiments.""" + print("Creating server files...") + print(num_copies) + servers = [] for i in range(num_copies): - # Server copies go into tasks/server_data/0/, tasks/server_data/1/, etc. - dest_path = os.path.join(server_base_dir, str(i)) + dest_path = f"./tasks/server_data_{i}/" copy_server_files(source_path, dest_path) print(dest_path) - # Adjust path for edit_file - server_prop_path = os.path.join(dest_path, "server.properties") - edit_file(server_prop_path, {"server-port": 55916 + i, + edit_file(dest_path + "server.properties", {"server-port": 55916 + i, "level-name": world_name}) # edit_server_properties_file(dest_path, 55916 + i) servers.append((dest_path, 55916 + i)) @@ -731,24 +613,13 @@ def edit_file(file, content_dict): print(f"Error editing file {file}: {e}") def clean_up_server_files(num_copies): - """Delete server files from multiple locations within tasks/server_data.""" - server_base_dir = os.path.join(tasks_dir, "server_data") + """Delete server files from multiple locations.""" for i in range(num_copies): - # Target paths like tasks/server_data/0/ - dest_path = os.path.join(server_base_dir, str(i)) + dest_path = f"./tasks/server_data_{i}/" delete_server_files(dest_path) def copy_server_files(source_path, dest_path): - """Copy server files from source to destination (dest assumed relative to tasks_dir if not absolute).""" - # Ensure source_path is relative to project_root if not absolute - if not os.path.isabs(source_path): - source_path = os.path.join(project_root, source_path) - # Destination path is now expected inside tasks/server_data/, handled by caller (create_server_files) - # if not os.path.isabs(dest_path): - # dest_path = os.path.join(project_root, dest_path) # OLD LOGIC - - if os.path.exists(dest_path): - shutil.rmtree(dest_path) + """Copy server files to the specified location.""" try: shutil.copytree(source_path, dest_path) print(f"Server files copied to {dest_path}") @@ -773,13 +644,12 @@ def check_same_files(d1, d2): return True def delete_server_files(dest_path): - """Delete server files at the destination path (assumed relative to tasks_dir if not absolute).""" - # Path is now expected inside tasks/server_data/, handled by callers - # if not os.path.isabs(dest_path): - # dest_path = os.path.join(project_root, dest_path) # OLD LOGIC - - if os.path.exists(dest_path): + """Delete server files from the specified location.""" + try: shutil.rmtree(dest_path) + print(f"Server files deleted from {dest_path}") + except Exception as e: + print(f"Error deleting server files: {e}") if not os.path.exists(dest_path): print("Server files deleted successfully.") # else: @@ -787,26 +657,16 @@ def delete_server_files(dest_path): # delete_server_files(dest_path) -def launch_world(server_path="./server_data/", agent_names=["andy", "jill"], session_name="server", port=55916): - """Launch the Minecraft server world (server assumed inside tasks/server_data).""" - # Ensure path is relative to tasks_dir if not absolute (expecting tasks/server_data/X) - if not os.path.isabs(server_path): - server_path = os.path.join(tasks_dir, server_path) - - ops_file = os.path.join(server_path, "ops.json") # ops.json inside specific server dir - check_agent_ops(agent_names, ops_file=ops_file) - - # Launch server using tmux (cwd should be the server_path itself) - java_cmd = f"java -jar server.jar nogui" - # Create tmux session for the server +def launch_world(server_path="./tasks/server_data/", agent_names=["andy", "jill"], session_name="server", port=55916): + """Launch the Minecraft world.""" + print(f"Launching Minecraft world with port {port}...") + cmd = f"cd {server_path} && java -jar server.jar" subprocess.run(['tmux', 'new-session', '-d', '-s', session_name], check=True) - # Send command to the server session, running from its directory - subprocess.run(["tmux", "send-keys", "-t", session_name, java_cmd, "C-m"], cwd=server_path) - print(f"Launched Minecraft world in session {session_name} from {server_path} on port {port}...") - # Add a delay and check if server started - time.sleep(20) # Increased delay + subprocess.run(["tmux", "send-keys", "-t", session_name, cmd, "C-m"]) + time.sleep(10) if not test_server_running(port): - print(f"Warning: Server on port {port} didn't seem to start correctly after launch.") + print("Server failed to start. Retrying...") + launch_world(server_path, agent_names, session_name, port) def test_server_running(port=55916): host = 'localhost' @@ -827,69 +687,81 @@ def kill_world(session_name="server"): subprocess.run(["tmux", "kill-session", "-t", session_name]) def detach_process(command): - """Detach a process using tmux.""" - # Assume commands are run from project root if needed elsewhere - process = subprocess.Popen(command, shell=True, preexec_fn=os.setsid) # Example, might need cwd + """ + Launches a subprocess and detaches from it, allowing it to run independently. + + Args: + command: A list of strings representing the command to execute, e.g., ['python', 'my_script.py']. + """ + + try: + # Create a new process group so the child doesn't get signals intended for the parent. + # This is crucial for proper detachment. + kwargs = {} + if sys.platform == 'win32': + kwargs.update(creationflags=subprocess.CREATE_NEW_PROCESS_GROUP) # Windows specific + + process = subprocess.Popen(command, + stdin=subprocess.PIPE, # Prevent stdin blocking + stdout=subprocess.PIPE, # Redirect stdout + stderr=subprocess.PIPE, # Redirect stderr + close_fds=True, # Close open file descriptors + **kwargs) + + print(f"Process launched with PID: {process.pid}") + return process.pid # Return the PID of the detached process + + except FileNotFoundError: + print(f"Error: Command not found: {command}") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None def main(): - parser = argparse.ArgumentParser(description="Evaluate MindCraft tasks") - parser.add_argument("--task_path", type=str, default="tasks/example_tasks.json", help="Path to the task file or directory (relative to project root)") - parser.add_argument("--task_ids", type=str, nargs="+", default=None, help="Specific task IDs to run") - parser.add_argument("--num_exp", type=int, default=1, help="Number of experiments per task") - parser.add_argument("--num_agents", type=int, default=2, help="Number of agents") - parser.add_argument("--model", type=str, default="gpt-4o-mini", help="Model name") - parser.add_argument("--api", type=str, default="openai", help="API provider") - parser.add_argument("--num_parallel", type=int, default=1, help="Number of parallel experiments") - parser.add_argument("--s3", action="store_true", help="Use S3 for storage") - parser.add_argument("--bucket_name", type=str, default="mindcraft-experiments", help="S3 bucket name") - parser.add_argument("--template_profile", type=str, default="profiles/tasks/collab_profile.json", help="Template profile path") - parser.add_argument("--insecure_coding", action="store_true", help="Allow insecure coding practices") - parser.add_argument("--url", type=str, default="http://127.0.0.1:8000/v1", help="API URL") - parser.add_argument("--check_results", action="store_true", help="Only check results in the specified folder") - parser.add_argument("--servers", type=str, nargs="+", default=["local"], help="List of server directories (e.g., 0 1 2 for server_data/0, server_data/1, etc.) or 'local' for parallel local runs") - parser.add_argument("--exp_name", type=str, default="exp", help="Experiment name prefix") - parser.add_argument("--s3_path", type=str, default="", help="S3 path prefix") - parser.add_argument("--max_messages", type=int, default=15, help="Maximum messages per agent") - parser.add_argument("--num_examples", type=int, default=2, help="Number of examples for few-shot learning") - parser.add_argument("--no_pruning", action="store_true", help="Disable pruning") - parser.add_argument("--block_conversation", action="store_true", help="Block agent conversation actions") - parser.add_argument("--run_in_tmux", action="store_false", help="Run experiment directly without tmux") # Default is True + # edit_settings("settings.js", {"profiles": ["./andy.json", "./jill.json"], "port": 55917}) + # edit_server_properties_file("../server_data/", 55917) + + parser = argparse.ArgumentParser(description='Run Minecraft AI agent experiments') + parser.add_argument('--no_launch_world', action='store_true', help='Do not launch the Minecraft world') + parser.add_argument('--task_path', default="tasks/multiagent_crafting_tasks.json", help='Path to the task file') + parser.add_argument('--num_agents', default=2, type=int, help='Number of agents to run') + parser.add_argument('--num_exp', default=1, type=int, help='Number of experiments to run') + parser.add_argument('--num_parallel', default=1, type=int, help='Number of parallel servers to run') + parser.add_argument('--exp_name', default="exp", help='Name of the experiment') + parser.add_argument('--s3', action='store_true', help='Whether to upload to s3') + parser.add_argument('--bucket_name', default="mindcraft-experiments", help='Name of the s3 bucket') + parser.add_argument('--add_keys', action='store_true', help='Create the keys.json to match the environment variables') + parser.add_argument('--template_profile', default="profiles/tasks/collab_profile.json", help='Model to use for the agents') + parser.add_argument('--model', default="gpt-4o-mini", help='Model to use for the agents') + parser.add_argument('--api', default="openai", help='API to use for the agents') + # parser.add_argument('--world_name', default="Forest", help='Name of the world') + parser.add_argument('--insecure_coding', action='store_true', help='Enable insecure coding') + parser.add_argument('--url', default="http://127.0.0.1:8000/v1") + parser.add_argument('--max_messages', default=15, type=int, help='Maximum number of messages before summarizing') + parser.add_argument('--num_examples', default=2, type=int, help='Maximum number of turns before summarizing') + parser.add_argument('--no-pruning', action='store_true', help='Disable pruning of the actions') + parser.add_argument('--block_conversation', action='store_true', help='Block conversation actions') + parser.add_argument('--check', metavar='FOLDER_PATH', help='Check and evaluate results in the specified folder without running experiments') args = parser.parse_args() - - # Resolve relative paths provided as arguments or defaults (relative to project root) - if not os.path.isabs(args.task_path): - args.task_path = os.path.join(project_root, args.task_path) - if not os.path.isabs(args.template_profile): - # Special handling for default profile path relative to project root - if args.template_profile.startswith("profiles/"): - args.template_profile = os.path.join(project_root, args.template_profile) - else: # Assume relative to tasks dir otherwise - args.template_profile = os.path.join(tasks_dir, args.template_profile) - - if args.check_results: - # Hardcode check_folder_results to read from project_root/experiments - check_dir = os.path.join(project_root, "experiments") - check_folder_results(check_dir) + print(args) + + # If --check flag is provided, evaluate results in the specified folder and exit + if args.check: + check_folder_results(args.check) return - # Default server source path relative to project_root - default_server_source = os.path.join(project_root, "server_data") - if not args.run_in_tmux: # Assuming this corresponds to needing server files - # Pass default_server_source to create_server_files - servers = create_server_files(default_server_source, args.num_parallel, world_name="Forest") # Example world name - # The rest of the logic might need adjustment if not using tmux - else: - # Logic for when run_in_tmux is True (perhaps no server creation needed here?) - # Or maybe create_server_files should always run? Adjusting based on original logic - # Let's assume server files are always needed for parallel runs - servers = create_server_files(default_server_source, args.num_parallel, world_name="Forest") # Example world name + if not args.no_launch_world: + try: + subprocess.run(['tmux', 'kill-server'], check=True) + except: + print("No tmux session to kill") - # delete all server files (now inside tasks/server_data) - # The clean_up_server_files function now uses the correct base path - clean_up_server_files(args.num_parallel) - - if hasattr(args, 'add_keys') and args.add_keys: # Check if arg exists before using + # delete all server files + if not args.no_launch_world: + clean_up_server_files(args.num_parallel) + if args.add_keys: update_keys_json() launch_parallel_experiments(args.task_path, @@ -908,7 +780,7 @@ def main(): num_examples=args.num_examples, no_pruning=args.no_pruning, block_conversation=args.block_conversation, - run_in_tmux=not args.run_in_tmux) + run_in_tmux=not args.no_launch_world) if __name__ == "__main__": main() \ No newline at end of file