diff --git a/evaluation_script.py b/evaluation_script.py
index c461a12..e44ff8e 100644
--- a/evaluation_script.py
+++ b/evaluation_script.py
@@ -57,7 +57,6 @@ def analyze_json_file(file_path):
                 for turn in data["turns"]:
                     if turn.get("role") == "system" and "content" in turn:
                         if isinstance(turn["content"], str) and "Task ended with score : " in turn["content"]:
-                            score_found = True
                             if "Task ended with score : 1" in turn["content"]:
                                 return 1
                             elif "Task ended with score : 0" in turn["content"]:
@@ -66,7 +65,8 @@ def analyze_json_file(file_path):
                                 score = float(turn["content"].split(":")[-1].strip())
                                 return score
                             
-        return False
+                            
+        return None
     except FileNotFoundError:
         print(f"Error: File not found: {file_path}")
         return None
@@ -86,11 +86,14 @@ def extract_result(folder_path):
         return None
     else: 
         score = None
+        curr_score = 0
         for json_file in json_files:
             score = analyze_json_file(json_file)
             if score is not None:
-                return score
-        return 0
+                max_score = max(score, curr_score)
+                curr_score = max_score
+
+        return curr_score
     
 def aggregate_results(local_folders):
     """
@@ -106,22 +109,77 @@ def aggregate_results(local_folders):
 
     total = 0
     successful = 0
+    successful_tasks = []
     for folder_path in tqdm(local_folders):
         folder_name = os.path.basename(folder_path)
 
         try: 
             result = extract_result(folder_path)
+            
+            if result == 1:
+                successful_tasks.append(folder_name)
             if result is not None:
                 total += 1
                 successful += result
         except Exception as e:
             print(f"Error processing {folder_name}: {e}")
+
+    successful_tasks.sort()
+
+    for i in successful_tasks:
+        print(f"Successful task: {i}")
     
     return {
         "total": total,
         "successful": successful,
     }
 
+def check_folder_results(folder_path):
+    """
+    Evaluate all JSON files in a folder and its subfolders and calculate success metrics.
+    
+    Args:
+        folder_path (str): Path to the folder containing JSON log files.
+        
+    Returns:
+        dict: A dictionary with success metrics.
+    """
+    print(f"Checking results in folder: {folder_path}")
+    
+    # Check if the folder exists
+    if not os.path.exists(folder_path):
+        print(f"Error: Folder not found: {folder_path}")
+        return None
+    
+    # Find all subfolders (task IDs) in the given folder
+    if os.path.isdir(folder_path):
+        subfolders = [f for f in glob.glob(os.path.join(folder_path, "*")) if os.path.isdir(f)]
+        if subfolders:
+            # If there are subfolders, evaluate each subfolder
+            print(f"Found {len(subfolders)} subfolders to evaluate")
+            results = aggregate_results(subfolders)
+        else:
+            # If no subfolders, treat the folder itself as a results folder
+            print("No subfolders found, evaluating the folder itself")
+            results = aggregate_results([folder_path])
+            
+        # Calculate success rate
+        if results["total"] > 0:
+            results["success_rate"] = results["successful"] / results["total"]
+        else:
+            results["success_rate"] = 0.0
+            
+        # Print summary
+        print("\n=== Evaluation Results ===")
+        print(f"Total tasks evaluated: {results['total']}")
+        print(f"Successful tasks: {results['successful']}")
+        print(f"Success rate: {results['success_rate']:.2%}")
+        
+        return results
+    else:
+        print(f"Error: {folder_path} is not a directory")
+        return None
+
 def read_settings(file_path):
     """Read and parse the settings.js file to get agent profiles."""
     with open(file_path, 'r', encoding='utf-8') as file:
@@ -722,9 +780,16 @@ def main():
     parser.add_argument('--num_examples', default=2, type=int, help='Maximum number of turns before summarizing')
     parser.add_argument('--no-pruning', action='store_true', help='Disable pruning of the actions')
     parser.add_argument('--block_conversation', action='store_true', help='Block conversation actions')
+    parser.add_argument('--check', metavar='FOLDER_PATH', help='Check and evaluate results in the specified folder without running experiments')
 
     args = parser.parse_args()
     print(args)
+    
+    # If --check flag is provided, evaluate results in the specified folder and exit
+    if args.check:
+        check_folder_results(args.check)
+        return
+    
     if not args.no_launch_world:
         try: 
             subprocess.run(['tmux', 'kill-server'], check=True)