From d39b254a06317fe6b6fac33f181302f0c68dc868 Mon Sep 17 00:00:00 2001
From: Ayush Maniar <ayushmaniar70@gmail.com>
Date: Fri, 28 Mar 2025 13:13:16 -0700
Subject: [PATCH] Updates on construction and cooking tasks, prettyTable,
 flexibility to enter multiple folders, ....

---
 analyze_construction_tasks.py |  52 +++---
 analyze_cooking_tasks.py      | 338 +++++++++++++++++-----------------
 2 files changed, 200 insertions(+), 190 deletions(-)

diff --git a/analyze_construction_tasks.py b/analyze_construction_tasks.py
index 7593d1a..4c1f94f 100644
--- a/analyze_construction_tasks.py
+++ b/analyze_construction_tasks.py
@@ -9,11 +9,11 @@ def extract_success_scores(folders, model_names):
     
     all_task_scores = defaultdict(dict)  # Stores task-wise scores per model
     zero_score_tasks = defaultdict(list)  # Stores tasks with 0 score per model
-    null_score_tasks = defaultdict(list)  # Stores tasks with null score per model
     material_groups = defaultdict(lambda: defaultdict(list))
     room_groups = defaultdict(lambda: defaultdict(list))
     material_room_groups = defaultdict(lambda: defaultdict(list))
     overall_scores = defaultdict(list)  # New dict to store all scores for each model
+    skipped_tasks = defaultdict(list)  # Stores tasks with no score message per model
     
     pattern = re.compile(r"materials_(\d+)_rooms_(\d+)")
     
@@ -50,22 +50,22 @@ def extract_success_scores(folders, model_names):
                             print(f"Error reading {file_path}: {e}")
                 
                 if logs_found and not score_found:
-                    # Score not found but logs exist - mark as null
-                    all_task_scores[task_folder][model_name] = None
-                    null_score_tasks[model_name].append(task_folder)
+                    # Score not found but logs exist - skip this task
+                    skipped_tasks[model_name].append(task_folder)
+                    print(f"Error: No score message found for task '{task_folder}' with model '{model_name}'. Skipping this task.")
                 
                 if not logs_found:
                     print(f"No log files found in {task_folder}")
     
-    # Calculate model completion rates (ignore null scores)
+    # Calculate model completion rates (only consider tasks with scores)
     model_completion_rates = {}
     for model_name in model_names:
-        valid_tasks = [task for task in all_task_scores.keys() if model_name in all_task_scores[task] and all_task_scores[task][model_name] is not None]
+        valid_tasks = [task for task in all_task_scores.keys() if model_name in all_task_scores[task]]
         total_tasks = len(valid_tasks)
         completed_tasks = len([task for task in valid_tasks if all_task_scores[task][model_name] > 0])
         model_completion_rates[model_name] = (completed_tasks / total_tasks) if total_tasks > 0 else 0
     
-    # Process task scores into groups (ignore null and 0 scores)
+    # Process task scores into groups (ignore 0 scores)
     for task, model_scores in all_task_scores.items():
         match = pattern.search(task)
         if match:
@@ -73,7 +73,7 @@ def extract_success_scores(folders, model_names):
             room = int(match.group(2))
             
             for model, score in model_scores.items():
-                if score is not None and score > 0:  # Ignore null and 0 scores
+                if score > 0:  # Ignore 0 scores
                     material_groups[material][model].append(score)
                     room_groups[room][model].append(score)
                     material_room_groups[(material, room)][model].append(score)
@@ -102,14 +102,14 @@ def extract_success_scores(folders, model_names):
             for model in model_names:
                 score = all_task_scores[task].get(model)
                 if score is None:
-                    row.append("null")
+                    row.append("-")
                 else:
                     row.append(round(score, 2))
             table.add_row(row)
         print("\nTask-wise Success Scores")
         print(table)
     
-    def display_zero_and_null_score_tasks():
+    def display_zero_and_skipped_tasks():
         for model in model_names:
             if zero_score_tasks[model]:
                 table = PrettyTable([f"{model} - Tasks with 0 Score"])
@@ -118,28 +118,28 @@ def extract_success_scores(folders, model_names):
                 print(f"\n{model} - Tasks with 0 Success Score")
                 print(table)
             
-            if null_score_tasks[model]:
-                table = PrettyTable([f"{model} - Tasks with Null Score"])
-                for task in null_score_tasks[model]:
+            if skipped_tasks[model]:
+                table = PrettyTable([f"{model} - Skipped Tasks (No Score Message)"])
+                for task in skipped_tasks[model]:
                     table.add_row([task])
-                print(f"\n{model} - Tasks with Null Success Score")
+                print(f"\n{model} - Skipped Tasks (No Score Message)")
                 print(table)
     
     def display_overall_averages():
         table = PrettyTable(["Metric"] + model_names)
         
-        # Overall average score (including zeros, excluding nulls)
+        # Overall average score (including zeros)
         row_with_zeros = ["Average Score (All Tasks)"]
         for model in model_names:
-            valid_scores = [s for s in overall_scores[model] if s is not None]
+            valid_scores = overall_scores[model]
             avg = sum(valid_scores) / len(valid_scores) if valid_scores else 0
             row_with_zeros.append(round(avg, 2))
         table.add_row(row_with_zeros)
         
-        # Overall average score (excluding zeros and nulls)
+        # Overall average score (excluding zeros)
         row_without_zeros = ["Average Score (Completed Tasks)"]
         for model in model_names:
-            completed_scores = [s for s in overall_scores[model] if s is not None and s > 0]
+            completed_scores = [s for s in overall_scores[model] if s > 0]
             avg = sum(completed_scores) / len(completed_scores) if completed_scores else 0
             row_without_zeros.append(round(avg, 2))
         table.add_row(row_without_zeros)
@@ -150,24 +150,30 @@ def extract_success_scores(folders, model_names):
             completion_row.append(round(model_completion_rates[model] * 100, 2))
         table.add_row(completion_row)
         
-        # Total number of tasks (excluding nulls)
+        # Total number of tasks
         task_count_row = ["Total Tasks"]
         for model in model_names:
-            valid_tasks = [task for task in all_task_scores.keys() if model in all_task_scores[task] and all_task_scores[task][model] is not None]
+            valid_tasks = [task for task in all_task_scores.keys() if model in all_task_scores[task]]
             task_count_row.append(len(valid_tasks))
         table.add_row(task_count_row)
         
+        # Number of skipped tasks
+        skipped_count_row = ["Skipped Tasks"]
+        for model in model_names:
+            skipped_count_row.append(len(skipped_tasks[model]))
+        table.add_row(skipped_count_row)
+        
         print("\nOverall Performance Metrics")
         print(table)
     
     display_overall_averages()  # Display overall averages first
     display_task_scores()
-    display_zero_and_null_score_tasks()
+    display_zero_and_skipped_tasks()
     display_table("Average Success Score by Material", avg_material_scores)
     display_table("Average Success Score by Room", avg_room_scores)
     display_table("Average Success Score by (Material, Room) Tuples", avg_material_room_scores, tuple_keys=True)
 
 # Example usage
-folders = ["experiments/gpt-4o_construction_tasks", "experiments/exp_03-23_12-31"]
-model_names = ["GPT-4o","Claude 3.5 sonnet"]
+folders = ["experiments/gpt-4o_construction_tasks", "experiments/claude-3-5-sonnet-latest_construction_tasks"]
+model_names = ["GPT-4o", "Claude 3.5 sonnet"]
 extract_success_scores(folders, model_names)
\ No newline at end of file
diff --git a/analyze_cooking_tasks.py b/analyze_cooking_tasks.py
index d33be22..7575d3c 100644
--- a/analyze_cooking_tasks.py
+++ b/analyze_cooking_tasks.py
@@ -2,6 +2,7 @@ import os
 import json
 import re
 from collections import defaultdict
+from prettytable import PrettyTable
 
 def extract_cooking_items(exp_dir):
     """Extract cooking items from experiment directory name."""
@@ -36,8 +37,8 @@ def analyze_experiments(root_dir, model_name):
     # Keep track of all unique cooking items
     all_cooking_items = set()
     
-    # Track skipped experiments
-    skipped_experiments = []
+    # Keep track of ignored tasks
+    ignored_tasks = []
     
     # Get a list of all experiment directories
     experiment_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d)) 
@@ -78,18 +79,18 @@ def analyze_experiments(root_dir, model_name):
                 with open(agent_file_path, 'r') as f:
                     agent_data = json.load(f)
                     
-                # Check for success in the turns data
+                # Check for score information in the turns data
                 if "turns" in agent_data:
                     for turn in agent_data["turns"]:
                         if turn.get("role") == "system" and "content" in turn:
-                            if isinstance(turn["content"], str) and "Task ended with score" in turn["content"]:
+                            if isinstance(turn["content"], str) and "Task ended with score : " in turn["content"]:
                                 score_found = True
                                 if "Task ended with score : 1" in turn["content"]:
                                     is_successful = True
-                                break
+                                    break
                 
-                # If we found score information, no need to check other files
-                if score_found:
+                # If we found success, no need to check other files
+                if is_successful:
                     break
                     
             except (json.JSONDecodeError, IOError) as e:
@@ -97,10 +98,9 @@ def analyze_experiments(root_dir, model_name):
                 # Continue to check other agent files instead of failing
                 continue
         
-        # Skip experiments with no score information
+        # If no score information was found in any agent file, ignore this task
         if not score_found:
-            skipped_experiments.append(exp_dir)
-            print(f"Warning: No task score found in experiment {exp_dir} - skipping")
+            ignored_tasks.append(exp_dir)
             continue
         
         # Update cooking item results
@@ -114,178 +114,195 @@ def analyze_experiments(root_dir, model_name):
         if is_successful:
             blocked_access_results[blocked_key]["success"] += 1
     
-    return blocked_access_results, cooking_item_results, all_cooking_items, skipped_experiments
+    # Print information about ignored tasks
+    if ignored_tasks:
+        print(f"\n{model_name}: Ignored {len(ignored_tasks)} tasks with no score information:")
+        for task in ignored_tasks:
+            print(f"  - {task}")
+    
+    return blocked_access_results, cooking_item_results, all_cooking_items, ignored_tasks
 
 def print_model_comparison_blocked(models_results):
     print("\nModel Comparison by Number of Agents with Blocked Access:")
     print("=" * 100)
-    
+
     # Get all possible blocked access keys
     all_blocked_keys = set()
     for model_results in models_results.values():
         all_blocked_keys.update(model_results.keys())
-    
+
     # Sort the keys
     sorted_keys = sorted(all_blocked_keys, key=lambda x: int(x.split()[0]))
-    
-    # Create the header
-    header = f"{'Blocked Agents':<15} | "
-    for model_name in models_results.keys():
-        header += f"{model_name+' Success Rate':<20} | {model_name+' Success/Total':<20} | "
-    print(header)
-    print("-" * 100)
-    
-    # Calculate and print the results for each blocked key
+
+    # Create the table
+    table = PrettyTable()
+    table.field_names = ["Blocked Agents"] + [
+        f"{model_name} (Success Rate | Success/Total)" for model_name in models_results.keys()
+    ]
+
+    # Calculate and add rows for each blocked key
     model_totals = {model: {"success": 0, "total": 0} for model in models_results.keys()}
-    
+
     for key in sorted_keys:
-        row = f"{key:<15} | "
-        
+        row = [key]
+
         for model_name, model_results in models_results.items():
             if key in model_results:
                 success = model_results[key]["success"]
                 total = model_results[key]["total"]
-                
+
                 model_totals[model_name]["success"] += success
                 model_totals[model_name]["total"] += total
-                
+
                 success_rate = (success / total * 100) if total > 0 else 0
-                row += f"{success_rate:>6.2f}%{'':<12} | {success}/{total}{'':<12} | "
+                row.append(f"{success_rate:.2f}% | {success}/{total}")
             else:
-                row += f"{'N/A':<19} | {'N/A':<19} | "
-        
-        print(row)
-    
+                row.append("N/A")
+
+        table.add_row(row)
+
+    # Print the table
+    print(table)
+
     # Print the overall results
-    print("-" * 100)
-    row = f"{'Overall':<15} | "
-    
+    overall_row = ["Overall"]
     for model_name, totals in model_totals.items():
         success = totals["success"]
         total = totals["total"]
         success_rate = (success / total * 100) if total > 0 else 0
-        row += f"{success_rate:>6.2f}%{'':<12} | {success}/{total}{'':<12} | "
-    
-    print(row)
+        overall_row.append(f"{success_rate:.2f}% | {success}/{total}")
+
+    table.add_row(overall_row)
+    print(table)
 
 def print_model_comparison_items(models_item_results, all_cooking_items):
     print("\nModel Comparison by Cooking Item:")
     print("=" * 100)
-    
-    # Create the header
-    header = f"{'Cooking Item':<20} | "
-    for model_name in models_item_results.keys():
-        header += f"{model_name+' Success Rate':<20} | {model_name+' Success/Total':<20} | "
-    print(header)
-    print("-" * 100)
-    
-    # Calculate and print the results for each cooking item
+
+    # Create the table
+    table = PrettyTable()
+    table.field_names = ["Cooking Item"] + [
+        f"{model_name} (Success Rate | Success/Total)" for model_name in models_item_results.keys()
+    ]
+
+    # Calculate and add rows for each cooking item
     model_totals = {model: {"success": 0, "total": 0} for model in models_item_results.keys()}
-    
+
     for item in sorted(all_cooking_items):
-        row = f"{item:<20} | "
-        
+        row = [item]
+
         for model_name, model_results in models_item_results.items():
             if item in model_results:
                 success = model_results[item]["success"]
                 total = model_results[item]["total"]
-                
+
                 model_totals[model_name]["success"] += success
                 model_totals[model_name]["total"] += total
-                
+
                 success_rate = (success / total * 100) if total > 0 else 0
-                row += f"{success_rate:>6.2f}%{'':<12} | {success}/{total}{'':<12} | "
+                row.append(f"{success_rate:.2f}% | {success}/{total}")
             else:
-                row += f"{'N/A':<19} | {'N/A':<19} | "
-        
-        print(row)
-    
+                row.append("N/A")
+
+        table.add_row(row)
+
+    # Print the table
+    print(table)
+
     # Print the overall results
-    print("-" * 100)
-    row = f"{'Overall':<20} | "
-    
+    overall_row = ["Overall"]
     for model_name, totals in model_totals.items():
         success = totals["success"]
         total = totals["total"]
         success_rate = (success / total * 100) if total > 0 else 0
-        row += f"{success_rate:>6.2f}%{'':<12} | {success}/{total}{'':<12} | "
-    
-    print(row)
+        overall_row.append(f"{success_rate:.2f}% | {success}/{total}")
+
+    table.add_row(overall_row)
+    print(table)
 
 def print_model_comparison_items_by_blocked(models_data, all_cooking_items):
     print("\nDetailed Model Comparison by Cooking Item and Blocked Agent Count:")
     print("=" * 120)
-    
+
     # For each cooking item, create a comparison table by blocked agent count
     for item in sorted(all_cooking_items):
         print(f"\nResults for cooking item: {item}")
         print("-" * 100)
-        
-        # Create the header
-        header = f"{'Blocked Agents':<15} | "
-        for model_name in models_data.keys():
-            header += f"{model_name+' Success Rate':<20} | {model_name+' Success/Total':<20} | "
-        print(header)
-        print("-" * 100)
-        
+
+        # Create the table
+        table = PrettyTable()
+        table.field_names = ["Blocked Agents"] + [
+            f"{model_name} Success Rate" for model_name in models_data.keys()
+        ] + [
+            f"{model_name} Success/Total" for model_name in models_data.keys()
+        ]
+
         # Get all possible blocked agent counts
         all_blocked_keys = set()
         for model_name, model_data in models_data.items():
             _, _, item_blocked_data = model_data
             for blocked_key in item_blocked_data.get(item, {}).keys():
                 all_blocked_keys.add(blocked_key)
-        
+
         # Sort the keys
         sorted_keys = sorted(all_blocked_keys, key=lambda x: int(x.split()[0]))
-        
-        # Print each row
+
+        # Add rows for each blocked key
         for blocked_key in sorted_keys:
-            row = f"{blocked_key:<15} | "
-            
+            row = [blocked_key]
+
             for model_name, model_data in models_data.items():
                 _, _, item_blocked_data = model_data
-                
+
                 if item in item_blocked_data and blocked_key in item_blocked_data[item]:
                     success = item_blocked_data[item][blocked_key]["success"]
                     total = item_blocked_data[item][blocked_key]["total"]
-                    
+
                     if total > 0:
                         success_rate = (success / total * 100)
-                        row += f"{success_rate:>6.2f}%{'':<12} | {success}/{total}{'':<12} | "
+                        row.append(f"{success_rate:.2f}%")
+                        row.append(f"{success}/{total}")
                     else:
-                        row += f"{'N/A':<19} | {'0/0':<19} | "
+                        row.append("N/A")
+                        row.append("0/0")
                 else:
-                    row += f"{'N/A':<19} | {'N/A':<19} | "
-            
-            print(row)
-        
+                    row.append("N/A")
+                    row.append("N/A")
+
+            table.add_row(row)
+
+        # Print the table
+        print(table)
+
         # Print item summary for each model
-        print("-" * 100)
-        row = f"{'Overall':<15} | "
-        
+        overall_row = ["Overall"]
         for model_name, model_data in models_data.items():
             _, item_results, _ = model_data
-            
+
             if item in item_results:
                 success = item_results[item]["success"]
                 total = item_results[item]["total"]
-                
+
                 if total > 0:
                     success_rate = (success / total * 100)
-                    row += f"{success_rate:>6.2f}%{'':<12} | {success}/{total}{'':<12} | "
+                    overall_row.append(f"{success_rate:.2f}%")
+                    overall_row.append(f"{success}/{total}")
                 else:
-                    row += f"{'N/A':<19} | {'0/0':<19} | "
+                    overall_row.append("N/A")
+                    overall_row.append("0/0")
             else:
-                row += f"{'N/A':<19} | {'N/A':<19} | "
-        
-        print(row)
+                overall_row.append("N/A")
+                overall_row.append("N/A")
+
+        table.add_row(overall_row)
+        print(table)
 
 def generate_item_blocked_data(experiments_root):
     # Organize data by item and blocked agent count
     item_blocked_data = defaultdict(lambda: defaultdict(lambda: {"success": 0, "total": 0}))
     
-    # Track skipped experiments
-    skipped_experiments = []
+    # Keep track of ignored tasks
+    ignored_tasks = []
     
     # Populate the data structure
     for exp_dir in os.listdir(experiments_root):
@@ -304,7 +321,7 @@ def generate_item_blocked_data(experiments_root):
         else:
             blocked_key = "0 agent(s)"
         
-        # Check if the task was successful
+        # Check if the task was successful and if score information exists
         is_successful = False
         score_found = False
         full_exp_path = os.path.join(experiments_root, exp_dir)
@@ -318,103 +335,90 @@ def generate_item_blocked_data(experiments_root):
                 if "turns" in agent_data:
                     for turn in agent_data["turns"]:
                         if turn.get("role") == "system" and "content" in turn:
-                            if isinstance(turn["content"], str) and "Task ended with score" in turn["content"]:
+                            if isinstance(turn["content"], str) and "Task ended with score : " in turn["content"]:
                                 score_found = True
                                 if "Task ended with score : 1" in turn["content"]:
                                     is_successful = True
-                                break
+                                    break
                 
-                if score_found:
+                if is_successful:
                     break
             except:
                 continue
         
-        # Skip experiments with no score information
+        # If no score information was found, skip this task
         if not score_found:
-            skipped_experiments.append(exp_dir)
+            ignored_tasks.append(exp_dir)
             continue
-        
+            
         # Update the item-blocked data
         for item in cooking_items:
             item_blocked_data[item][blocked_key]["total"] += 1
             if is_successful:
                 item_blocked_data[item][blocked_key]["success"] += 1
     
-    return item_blocked_data, skipped_experiments
+    return item_blocked_data, ignored_tasks
 
 def main():
-    base_dir = "experiments"
-    
-    # Get the model directories
-    all_model_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
-    gpt_dirs = [d for d in all_model_dirs if d.startswith("gpt-4o_30_cooking_tasks")]
-    claude_dirs = [d for d in all_model_dirs if d.startswith("claude-3-5-sonnet-latest_30_cooking_tasks")]
-    
-    if not gpt_dirs or not claude_dirs:
-        print("Error: Could not find both model directories. Please check your paths.")
+    # Define lists for model directories and corresponding model names
+    model_dirs = [
+        "experiments/gpt-4o_2agent_NEW_cooking_tasks",
+        # "experiments/claude-3-5-sonnet_2agent_NEW_cooking_tasks",
+        # "experiments/claude-3-5-sonnet_3agent_NEW_cooking_tasks",
+        "experiments/gpt-4o_3agent_NEW_cooking_tasks",
+        # "experiments/1_claude-3-5-sonnet_4agents_NEW_cooking_tasks",
+        "experiments/gpt-4o_4agents_NEW_cooking_tasks",
+        "experiments/gpt-4o_5agents_NEW_cooking_tasks",
+        # "experiments/"
+    ]
+    model_names = [
+        "GPT-4o-2agent",
+        # "Claude-3.5-2agent",
+        "GPT-4o-3agent",
+        # "Claude-3.5-3agent",
+        # "Claude-3.5-4agent",
+        "GPT-4o-4agent",
+        "GPT-4o-5agent",
+        # "Another-Model"
+    ]
+
+    # Ensure both lists are of the same size
+    if len(model_dirs) != len(model_names):
+        print("Error: The number of model directories and model names must be the same.")
         return
-    
-    # Use the first directory found for each model
-    gpt_dir = os.path.join(base_dir, gpt_dirs[0])
-    claude_dir = os.path.join(base_dir, claude_dirs[0])
-    
-    print(f"Analyzing GPT-4o experiments in: {gpt_dir}")
-    print(f"Analyzing Claude-3.5-Sonnet experiments in: {claude_dir}")
-    
+
     # Analyze each model directory
-    gpt_blocked_results, gpt_item_results, gpt_unique_items, gpt_skipped = analyze_experiments(gpt_dir, "GPT-4o")
-    claude_blocked_results, claude_item_results, claude_unique_items, claude_skipped = analyze_experiments(claude_dir, "Claude-3.5")
-    
-    # Combine unique cooking items
-    all_cooking_items = gpt_unique_items.union(claude_unique_items)
-    
-    # Generate item-blocked data for each model
-    gpt_item_blocked_data, gpt_skipped_detailed = generate_item_blocked_data(gpt_dir)
-    claude_item_blocked_data, claude_skipped_detailed = generate_item_blocked_data(claude_dir)
-    
-    # Create model comparison data structures
-    models_blocked_results = {
-        "GPT-4o": gpt_blocked_results,
-        "Claude-3.5": claude_blocked_results
-    }
-    
-    models_item_results = {
-        "GPT-4o": gpt_item_results,
-        "Claude-3.5": claude_item_results
-    }
-    
-    models_data = {
-        "GPT-4o": (gpt_blocked_results, gpt_item_results, gpt_item_blocked_data),
-        "Claude-3.5": (claude_blocked_results, claude_item_results, claude_item_blocked_data)
-    }
-    
+    models_blocked_results = {}
+    models_item_results = {}
+    all_cooking_items = set()
+    total_ignored_tasks = 0
+
+    for model_dir, model_name in zip(model_dirs, model_names):
+        print(f"Analyzing {model_name} experiments in: {model_dir}")
+
+        blocked_results, item_results, unique_items, ignored_tasks = analyze_experiments(model_dir, model_name)
+
+        models_blocked_results[model_name] = blocked_results
+        models_item_results[model_name] = item_results
+        all_cooking_items.update(unique_items)
+        total_ignored_tasks += len(ignored_tasks)
+
+        if ignored_tasks:
+            print(f"  - {model_name}: Ignored {len(ignored_tasks)} tasks with no score information.")
+
+    # Print summary of ignored tasks
+    if total_ignored_tasks > 0:
+        print(f"\nTotal ignored tasks (missing score information): {total_ignored_tasks}")
+
     # Print the comparison tables
     print_model_comparison_blocked(models_blocked_results)
     print_model_comparison_items(models_item_results, all_cooking_items)
-    print_model_comparison_items_by_blocked(models_data, all_cooking_items)
-    
+
     # Print overall statistics
     print("\nUnique Cooking Items Found:")
     print("=" * 60)
     print(", ".join(sorted(all_cooking_items)))
     print(f"Total unique items: {len(all_cooking_items)}")
-    
-    # Print skipped experiment information
-    print("\nSkipped Experiments (No Score Information):")
-    print("=" * 60)
-    print(f"GPT-4o: {len(gpt_skipped)} experiments skipped")
-    print(f"Claude-3.5: {len(claude_skipped)} experiments skipped")
-    
-    if gpt_skipped or claude_skipped:
-        print("\nSkipped experiment directories:")
-        if gpt_skipped:
-            print("GPT-4o:")
-            for exp in gpt_skipped:
-                print(f"  - {exp}")
-        if claude_skipped:
-            print("Claude-3.5:")
-            for exp in claude_skipped:
-                print(f"  - {exp}")
 
 if __name__ == "__main__":
     main()
\ No newline at end of file