small changes

This commit is contained in:
Isadora White 2025-05-14 14:27:38 -07:00
parent 87e56092bf
commit a1bd99dc43
3 changed files with 11 additions and 5 deletions

View file

@ -155,7 +155,7 @@
"missing_items": [],
"requires_crafting_table": true
},
"multiagent_crafting_requires_ctable_blue_banner_2_with_plan__depth_0_num_agents_3": {
"multiagent_crafting_requires_ctable_blue_banner_2_with_partial_plan__depth_0_num_agents_3": {
"goal": "Collaborate with other agents to craft an blue_banner",
"conversation": "Let's work together to craft an blue_banner.",
"initial_inventory": {
@ -190,7 +190,7 @@
"missing_items": [],
"requires_crafting_table": true
},
"multiagent_crafting_requires_ctable_cyan_bed_1_with_plan__depth_0_num_agents_3": {
"multiagent_crafting_requires_ctable_cyan_bed_1_with_partial_plan__depth_0_num_agents_3": {
"goal": "Collaborate with other agents to craft an cyan_bed",
"conversation": "Let's work together to craft an cyan_bed.",
"initial_inventory": {

View file

@ -13,7 +13,6 @@ import json
import glob
import socket
from tqdm import tqdm
import boto3
BLOCKED_ACTIONS_COOKING = [
@ -119,7 +118,7 @@ def aggregate_results(local_folders):
elif "construction" in task_type:
task_type = "construction"
for folder_path in tqdm(local_folders):
for folder_path in local_folders:
folder_name = os.path.basename(folder_path)
try:

View file

@ -1,2 +1,9 @@
python3 tasks/evaluation_script.py --model claude-3-5-sonnet-latest --num_parallel 1 --num_exp 1 --exp_name "claude_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_hells_kitchen_full.json --num_agents 2
sleep 360
python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "4o_1_agent_crafting" --template_profile ./profiles/tasks/crafting_profile.json --task_path tasks/crafting_tasks/test_tasks/1_agent.json --num_agents 1
python3 tasks/evaluation_script.py --model claude-3-5-sonnet-latest --num_parallel 1 --num_exp 1 --exp_name "claude_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_full.json --num_agents 2
sleep 360
python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "4o_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_hells_kitchen_full.json --num_agents 2
sleep 360
python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "4o_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_full.json --num_agents 2
sleep 360
python3 tasks/evaluation_script.py --model gpt-4o --num_parallel 1 --num_exp 1 --exp_name "claude_2_agent_block_recipe" --template_profile ./profiles/tasks/cooking_profile.json --task_path tasks/cooking_tasks/require_collab_test_2_items/2_agent_block_recipe_full.json --num_agents 2