Merge branch 'main' of https://github.com/icwhite/mindcraft

2025-08-04 22:35:35 +02:00 · 2025-03-23 11:31:08 -05:00 · 2025-03-23 11:31:08 -05:00 · db962d722a
commit db962d722a
parent 98f9d861bf 8f9ed98776
5 changed files with 2356 additions and 33 deletions
--- a/analyze_construction_tasks.py
+++ b/analyze_construction_tasks.py
@ -1,39 +1,89 @@
 import boto3
 import os
 import json
 from collections import defaultdict
 from prettytable import PrettyTable
 import re
 from botocore.exceptions import ClientError
 import json
 import argparse
 from tqdm import tqdm
 import glob
 def extract_success_scores(root_dir):
    task_scores = {}  # Stores task-wise scores
    material_groups = defaultdict(list)
    room_groups = defaultdict(list)
-def analyze_json_file(file_path):
+    # Regex pattern to extract material and room numbers
-    """
+    pattern = re.compile(r"materials_(\d+)_rooms_(\d+)")
    Analyzes a single JSON file to extract the task outcome.
-    Args:
+    # Iterate through each task folder
-        file_path (str): Path to the JSON file.
+    for task_folder in os.listdir(root_dir):
        task_path = os.path.join(root_dir, task_folder)
        if os.path.isdir(task_path):
            logs_found = False  # Flag to track if logs exist
-    Returns:
+            # Check for JSON files
-        str or None: The task outcome string if found, otherwise None.
+            for file_name in os.listdir(task_path):
-    """
+                if file_name.endswith(".json"): 
-    try:
+                    logs_found = True  # JSON file exists
-        with open(file_path, 'r') as f:
+                    file_path = os.path.join(task_path, file_name)
-            data = json.load(f)
+                    
-            if 'turns' in data and isinstance(data['turns'], list):
+                    # Read JSON file
-                for turn in reversed(data['turns']):  # Check turns from the end
+                    try:
-                    if turn.get('role') == 'system' and isinstance(turn.get('content'), str):
+                        with open(file_path, 'r') as file:
-                        if "Task successful ended with code : 2" in turn['content'] or "Task ended in score: 1" in turn["content"]:
+                            data = json.load(file)
-                            return True
+                            
-        return False
+                            # Extract success score from the last system message
-    except FileNotFoundError:
+                            for turn in reversed(data.get("turns", [])):
-        print(f"Error: File not found: {file_path}")
+                                if turn["role"] == "system" and "Task ended with score" in turn["content"]:
-        return None
+                                    score = float(turn["content"].split(":")[-1].strip())
-    except json.JSONDecodeError:
+                                    task_scores[task_folder] = score  # Store per-task score
-        print(f"Error: Invalid JSON format in: {file_path}")
+                                    break  # Stop searching if found
-        return None
+                            
-    except Exception as e:
+                            # Stop checking other files in the folder if score is found
-        print(f"An unexpected error occurred while processing {file_path}: {e}")
+                            if task_folder in task_scores:
-        return None
+                                break 
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
            # If no logs were found, print a message
            if not logs_found:
                print(f"No log files found in {task_folder}")
    # Group scores by material and room
    for task, score in task_scores.items():
        match = pattern.search(task)
        if match:
            material = int(match.group(1))  # Extract material number
            room = int(match.group(2))  # Extract room number
            material_groups[material].append(score)
            room_groups[room].append(score)
        else:
            print(f"Warning: Task folder '{task}' does not match expected format.")
    # Calculate average scores
    def calculate_average(group):
        return {key: sum(values) / len(values) for key, values in group.items()}
    avg_material_scores = calculate_average(material_groups)
    avg_room_scores = calculate_average(room_groups)
    # Display results using PrettyTable
    def display_table(title, data):
        table = PrettyTable(["Category", "Average Score"])
        for key, value in sorted(data.items()):
            table.add_row([key, round(value, 2)])
        print(f"\n{title}")
        print(table)
    def display_task_scores():
        table = PrettyTable(["Task", "Success Score"])
        for task, score in sorted(task_scores.items()):
            table.add_row([task, round(score, 2)])
        print("\nTask-wise Success Scores")
        print(table)
    # Print all tables
    display_task_scores()
    display_table("Average Success Score by Material (Grouped by Number)", avg_material_scores)
    display_table("Average Success Score by Room (Grouped by Number)", avg_room_scores)
 # Example usage (replace 'root_directory' with actual path)
 root_directory = "experiments/exp_03-22_19-29"
 extract_success_scores(root_directory)
--- a/profiles/tasks/task_construct.json
+++ b/profiles/tasks/task_construct.json
@ -1,7 +1,7 @@
 {
    "name": "andy",
-    "model": "gpt-4o",
+    "model": "gpt-4o-mini",
    "conversing": "You are a task-focused Minecraft bot named $NAME. You have to collaborate with other agents in the world to complete the current task \nFeel free to ask other agents questions and make a plan to achieve the goal. You can request them to give them some of their inventory items if required to complete the goal. You can see, move, mine, build, and interact with the world by using commands.\n$SELF_PROMPT Act human-like as if you were a typical Minecraft player, rather than an AI. Be very brief in your responses, don't apologize constantly, don't give instructions or make lists unless asked, and don't refuse requests. Don't pretend to act, use commands immediately when requested. Do NOT say this: 'Sure, I've stopped.', instead say this: 'Sure, I'll stop. !stop'. Do NOT say this: 'On my way! Give me a moment.', instead say this: 'On my way! !goToPlayer(\"playername\", 3)'. Respond only as $NAME, never output '(FROM OTHER BOT)' or pretend to be someone else. If you have nothing to say or do, respond with an just a tab '\t'. Share resources and information with other bots! This is extremely important to me, take a deep breath and have fun :) \nSummarized memory:'$MEMORY'\n$STATS\n$INVENTORY\n$COMMAND_DOCS\n$EXAMPLES\nConversation Begin:",
--- a/tasks/construction_tasks/filter_easy_tasks.py
+++ b/tasks/construction_tasks/filter_easy_tasks.py
@ -0,0 +1,241 @@
 import json
 import re
 import statistics
 import random
 import os
 def extract_difficulty(task_name):
    """Extract difficulty parameters from the task name."""
    match = re.search(r'materials_(\d+)_rooms_(\d+)_window_(\d+)_carpet_(\d+)_variant_\d+', task_name)
    if match:
        return tuple(map(int, match.groups()))  # (m, r, w, c)
    return (0, 0, 0, 0)  # Default to lowest difficulty if not found
 def calculate_difficulty_score(task_name, task, alpha=1.0, beta=3.0):
    """Compute a difficulty score based on parameters."""
    m, r, w, c = extract_difficulty(task_name)
    num_levels = len(task.get("blueprint", {}).get("levels", []))
    # Higher values mean more difficulty
    score = (m*4 + r*10 + w*2 + c*1)
    return score
 def process_json(file_path, output_path, alpha=1.0, beta=3.0):
    """Process the JSON file to count tasks, quantify difficulty, and filter easiest 30."""
    with open(file_path, 'r') as f:
        data = json.load(f)
    # Count total tasks
    total_tasks = len(data)
    print(f"Total tasks: {total_tasks}")
    # Compute difficulty scores for tasks with at least 3 levels
    task_difficulties = []
    filtered_out = 0
    for task_name, task_details in data.items():
        num_levels = len(task_details.get("blueprint", {}).get("levels", []))
        # Skip tasks with fewer than 3 levels
        if num_levels < 3:
            filtered_out += 1
            continue
        score = calculate_difficulty_score(task_name, task_details, alpha, beta)
        task_difficulties.append((task_name, score, task_details))
    print(f"Filtered out {filtered_out} tasks with fewer than 3 levels")
    print(f"Remaining tasks after filtering: {len(task_difficulties)}")
    # Calculate statistics on the filtered tasks
    if task_difficulties:
        difficulty_scores = [score for _, score, _ in task_difficulties]
        stats = {
            "mean": statistics.mean(difficulty_scores),
            "median": statistics.median(difficulty_scores),
            "min": min(difficulty_scores),
            "max": max(difficulty_scores),
        }
        print(f"Difficulty Statistics for Overall Tasks: {stats}")
    else:
        stats = {"mean": 0, "median": 0, "min": 0, "max": 0}
        print("No tasks remaining after filtering!")
    # Sort tasks by difficulty (ascending)
    task_difficulties.sort(key=lambda x: x[1])
    # Get the 30 easiest tasks (or all if less than 30)
    num_tasks_to_select = min(30, len(task_difficulties))
    easiest_tasks = {task[0]: task[2] for task in task_difficulties[:num_tasks_to_select]}
    # Difficulty scores of the easiest tasks
    easiest_difficulty_scores = [score for _, score, _ in task_difficulties[:num_tasks_to_select]]
    easiest_stats = {
        "mean": statistics.mean(easiest_difficulty_scores),
        "median": statistics.median(easiest_difficulty_scores),
        "min": min(easiest_difficulty_scores),
        "max": max(easiest_difficulty_scores),
    }
    print(f"Difficulty Statistics for Easiest Tasks: {easiest_stats}")
    # Add a group by of all unique (m, r, w, c) combinations in the easiest tasks
    unique_difficulties = {}
    for task_name, _, task_details in task_difficulties[:num_tasks_to_select]:
        m, r, w, c = extract_difficulty(task_name)
        unique_difficulties[(m, r, w, c)] = unique_difficulties.get((m, r, w, c), 0) + 1
    print(f"Unique (m, r, w, c) combinations in the easiest tasks:")
    for difficulty, count in unique_difficulties.items():
        print(f"  {difficulty}: {count} tasks")
    # Add statistics to output
    output_data = easiest_tasks
    # Save to output file
    with open(output_path, 'w') as f:
        json.dump(output_data, f, indent=4)
    print(f"Saved {num_tasks_to_select} easiest tasks with statistics to {output_path}")
 def sample_tasks_with_distribution(file_path, output_path):
    """
    Sample tasks with a specific distribution:
    - 3 tasks for each of the 9 possibilities of (m,r) where 0 <= m <= 2 and 0 <= r <= 2
    - Random (w,c) between 0 and 1 for the above tasks
    - 2 additional tasks from (m,r,w,c) = (0,0,0,0)
    - 1 additional task from (m,r,w,c) = (1,0,0,0)
    """
    with open(file_path, 'r') as f:
        data = json.load(f)
    # Filter tasks with at least 3 levels
    valid_tasks = {}
    for task_name, task_details in data.items():
        num_levels = len(task_details.get("blueprint", {}).get("levels", []))
        if num_levels >= 3:
            valid_tasks[task_name] = task_details
    # print(f"Total valid tasks: {len(valid_tasks)}")
    # Categorize tasks by their (m,r,w,c) values
    tasks_by_params = {}
    for task_name, task_details in valid_tasks.items():
        m, r, w, c = extract_difficulty(task_name)
        key = (m, r, w, c)
        if key not in tasks_by_params:
            tasks_by_params[key] = []
        tasks_by_params[key].append((task_name, task_details))
    # # Print available combinations
    # print("Available (m,r,w,c) combinations:")
    # for params, tasks in tasks_by_params.items():
    #     print(f"  {params}: {len(tasks)} tasks")
    # Sample tasks according to the distribution
    sampled_tasks = {}
    already_sampled = set()
    # 1. Sample 3 tasks for each (m,r) where 0 <= m <= 2 and 0 <= r <= 2
    for m in range(3):
        for r in range(3):
            # Find all tasks with the current (m,r) and w,c between 0 and 1
            candidates = []
            for params, tasks in tasks_by_params.items():
                if params[0] == m and params[1] == r and params[2] <= 1 and params[3] <= 1:
                    candidates.extend(tasks)
            # Sample 3 tasks if possible
            if len(candidates) >= 3:
                sampled = random.sample(candidates, 3)
                for task_name, task_details in sampled:
                    if task_name not in already_sampled:
                        sampled_tasks[task_name] = task_details
                        already_sampled.add(task_name)
            else:
                print(f"Warning: Not enough tasks for (m={m}, r={r}) with w,c <= 1. Found {len(candidates)}.")
                # Add all available
                for task_name, task_details in candidates:
                    if task_name not in already_sampled:
                        sampled_tasks[task_name] = task_details
                        already_sampled.add(task_name)
    # 2. Add 2 tasks with (m,r,w,c) = (0,0,0,0)
    zero_zero_zero_zero = tasks_by_params.get((0,0,0,0), [])
    zero_zero_zero_zero = [t for t in zero_zero_zero_zero if t[0] not in already_sampled]
    if len(zero_zero_zero_zero) >= 2:
        additional = random.sample(zero_zero_zero_zero, 2)
        for task_name, task_details in additional:
            sampled_tasks[task_name] = task_details
            already_sampled.add(task_name)
    else:
        print(f"Warning: Not enough tasks for (0,0,0,0). Found {len(zero_zero_zero_zero)}.")
        for task_name, task_details in zero_zero_zero_zero:
            sampled_tasks[task_name] = task_details
            already_sampled.add(task_name)
    # 3. Add 1 task with (m,r,w,c) = (1,0,0,0)
    one_zero_zero_zero = tasks_by_params.get((1,0,0,0), [])
    one_zero_zero_zero = [t for t in one_zero_zero_zero if t[0] not in already_sampled]
    if len(one_zero_zero_zero) >= 1:
        additional = random.sample(one_zero_zero_zero, 1)
        for task_name, task_details in additional:
            sampled_tasks[task_name] = task_details
            already_sampled.add(task_name)
    else:
        print(f"Warning: Not enough tasks for (1,0,0,0). Found {len(one_zero_zero_zero)}.")
        for task_name, task_details in one_zero_zero_zero:
            sampled_tasks[task_name] = task_details
            already_sampled.add(task_name)
    # Print summary of sampled tasks
    print(f"\nTotal sampled tasks: {len(sampled_tasks)}")
    # Count tasks by their (m,r) values
    distribution = {}
    for task_name in sampled_tasks:
        m, r, w, c = extract_difficulty(task_name)
        key = (m, r)
        if key not in distribution:
            distribution[key] = []
        distribution[key].append((w, c))
    print("\nDistribution of sampled tasks:")
    for mr, wc_list in distribution.items():
        print(f"  (m={mr[0]}, r={mr[1]}): {len(wc_list)} tasks")
        for wc in wc_list:
            print(f"    (w={wc[0]}, c={wc[1]})")
    # Check for duplicates in sampled tasks
    if len(sampled_tasks) != len(set(sampled_tasks.keys())):
        print("\nWARNING: Duplicate tasks detected!")
        # Find the duplicates
        task_counts = {}
        for task_name in sampled_tasks.keys():
            task_counts[task_name] = task_counts.get(task_name, 0) + 1
        duplicates = [task for task, count in task_counts.items() if count > 1]
        print(f"Duplicate tasks: {duplicates}")
    else:
        print("\nVerification: No duplicates found in the sampled tasks.")
    # Save to output file
    with open(output_path, 'w') as f:
        json.dump(sampled_tasks, f, indent=4)
    print(f"\nSaved {len(sampled_tasks)} distributed tasks to {output_path}")
 # Example usage:
 # process_json('test/2agents.json', 'test/2_agents_easiest_tasks.json', alpha=1.0, beta=3.0)
 # Iterate through files in tasks folder
 tasks_dir = 'test'
 for filename in os.listdir(tasks_dir):
    if filename.endswith('agents.json'):
        input_path = os.path.join(tasks_dir, filename)
        # Create output filename by replacing .json with _distributed_tasks.json
        output_filename = filename.replace('.json', '_distributed_tasks.json')
        output_path = os.path.join(tasks_dir, output_filename)
        print(f"\nProcessing {filename}...")
        sample_tasks_with_distribution(input_path, output_path)
--- a/tasks/construction_tasks/filter_training_tasks.py
+++ b/tasks/construction_tasks/filter_training_tasks.py
@ -0,0 +1,87 @@
 import json
 import re
 import random
 import os
 from collections import defaultdict
 def extract_difficulty(task_name):
    """Extract difficulty parameters from the task name."""
    match = re.search(r'materials_(\d+)_rooms_(\d+)_window_(\d+)_carpet_(\d+)_variant_\d+', task_name)
    if match:
        return tuple(map(int, match.groups()))  # (m, r, w, c)
    return (0, 0, 0, 0)  # Default if not found
 def filter_and_sample_tasks(file_path, output_path):
    """Filters, samples, and saves 500 unique tasks based on given criteria."""
    with open(file_path, 'r') as f:
        data = json.load(f)
    total_tasks = len(data)
    print(f"\nProcessing file: {file_path}")
    print(f"Total available tasks: {total_tasks}")
    valid_tasks = {}
    # Filter tasks with at least 3 levels
    for task_name, task_details in data.items():
        num_levels = len(task_details.get("blueprint", {}).get("levels", []))
        if num_levels >= 3:
            valid_tasks[task_name] = task_details
    print(f"Tasks with at least 3 levels: {len(valid_tasks)}")
    # Organize tasks by difficulty parameters (m, r, w, c)
    tasks_by_params = defaultdict(list)
    for task_name, task_details in valid_tasks.items():
        key = extract_difficulty(task_name)
        tasks_by_params[key].append((task_name, task_details))
    # Sort keys in increasing order
    sorted_keys = sorted(tasks_by_params.keys())
    sampled_tasks = {}
    total_selected = 0
    sampled_task_counts = defaultdict(int)
    # Pick tasks sequentially until 500 are collected
    for key in sorted_keys:
        if total_selected >= 500:
            break
        if key in tasks_by_params:
            candidates = tasks_by_params[key]
            for task_name, task_details in candidates:
                if total_selected < 500:
                    sampled_tasks[task_name] = task_details
                    sampled_task_counts[key] += 1  # Keep the key as a tuple
                    total_selected += 1
                else:
                    break
    print(f"\nTotal sampled tasks: {len(sampled_tasks)}")
    # Print task count per (m, r, w, c) tuple
    print("\nTask count per (m, r, w, c):")
    for key, count in sorted(sampled_task_counts.items()):
        print(f"{key}: {count}")
    # Randomly shuffle the tasks before saving
    shuffled_tasks = list(sampled_tasks.items())
    random.shuffle(shuffled_tasks)
    final_tasks = dict(shuffled_tasks)
    # Save sampled tasks to JSON
    with open(output_path, 'w') as f:
        json.dump(final_tasks, f, indent=4)
    print(f"\nSaved {len(final_tasks)} tasks to {output_path}")
 # Process all relevant files
 tasks_dir = 'train'
 all_filenames = [f for f in os.listdir(tasks_dir) if f.endswith('agents.json')]
 all_filenames.sort()
 for i, filename in enumerate(all_filenames):
    input_path = os.path.join(tasks_dir, filename)
    output_filename = filename.replace('.json', '_sampled_tasks_for_training.json')
    output_path = os.path.join(tasks_dir, output_filename)
    filter_and_sample_tasks(input_path, output_path)
--- a/tasks/crafting_tasks/train_tasks/filtered_train_tasks.json
+++ b/tasks/crafting_tasks/train_tasks/filtered_train_tasks.json