diff --git a/tasks/crafting_tasks/test_tasks/filtered_tasks_3_agents.json b/tasks/crafting_tasks/test_tasks/filtered_tasks_3_agents.json index 49ce490..26df64f 100644 --- a/tasks/crafting_tasks/test_tasks/filtered_tasks_3_agents.json +++ b/tasks/crafting_tasks/test_tasks/filtered_tasks_3_agents.json @@ -155,7 +155,7 @@ "missing_items": [], "requires_crafting_table": true }, - "multiagent_crafting_requires_ctable_blue_banner_2_with_plan__depth_0_num_agents_3": { + "multiagent_crafting_requires_ctable_blue_banner_2_with_partial_plan__depth_0_num_agents_3": { "goal": "Collaborate with other agents to craft an blue_banner", "conversation": "Let's work together to craft an blue_banner.", "initial_inventory": { @@ -190,7 +190,7 @@ "missing_items": [], "requires_crafting_table": true }, - "multiagent_crafting_requires_ctable_cyan_bed_1_with_plan__depth_0_num_agents_3": { + "multiagent_crafting_requires_ctable_cyan_bed_1_with_partial_plan__depth_0_num_agents_3": { "goal": "Collaborate with other agents to craft an cyan_bed", "conversation": "Let's work together to craft an cyan_bed.", "initial_inventory": { diff --git a/tasks/evaluation_script.py b/tasks/evaluation_script.py index a57af0a..cb380f5 100644 --- a/tasks/evaluation_script.py +++ b/tasks/evaluation_script.py @@ -13,7 +13,6 @@ import json import glob import socket -from tqdm import tqdm import boto3 BLOCKED_ACTIONS_COOKING = [ @@ -119,7 +118,7 @@ def aggregate_results(local_folders): elif "construction" in task_type: task_type = "construction" - for folder_path in tqdm(local_folders): + for folder_path in local_folders: folder_name = os.path.basename(folder_path) try: diff --git a/tasks/experiment_script.sh b/tasks/experiment_script.sh index 083fb52..f5cd34e 100644 --- a/tasks/experiment_script.sh +++ b/tasks/experiment_script.sh @@ -1,2 +1,9 @@ +python3 tasks/evaluation_script.py --model claude-3-5-sonnet-latest --num_parallel 1 --num_exp 1 --exp_name "claude_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_hells_kitchen_full.json --num_agents 2 sleep 360 -python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "4o_1_agent_crafting" --template_profile ./profiles/tasks/crafting_profile.json --task_path tasks/crafting_tasks/test_tasks/1_agent.json --num_agents 1 \ No newline at end of file +python3 tasks/evaluation_script.py --model claude-3-5-sonnet-latest --num_parallel 1 --num_exp 1 --exp_name "claude_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_full.json --num_agents 2 +sleep 360 +python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "4o_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_hells_kitchen_full.json --num_agents 2 +sleep 360 +python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "4o_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_full.json --num_agents 2 +sleep 360 +python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "claude_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_block_recipe_full.json --num_agents 2 \ No newline at end of file