import argparse import json import subprocess import time from datetime import datetime import re def read_settings(file_path): """Read and parse the settings.js file to get agent profiles.""" with open(file_path, 'r', encoding='utf-8') as file: content = file.read() # Remove `export default` and trailing commas content = re.sub(r'export\s+default', '', content) content = re.sub(r',\s*(?=[}\]])', '', content) # Remove JavaScript comments content = re.sub(r'//.*', '', content) # Remove trailing commas (e.g., before } or ]) content = re.sub(r',\s*(?=[}\]])', '', content) # Strip leading and trailing whitespace content = content.strip() json_data = json.loads(content) profiles = json_data['profiles'] ## profiles is a list of strings like "./andy.json" and "./bob.json" agent_names = [profile.split('/')[-1].split('.')[0] for profile in profiles] return agent_names def check_task_completion(agents): """Check memory.json files of all agents to determine task success/failure.""" for agent in agents: memory_path = f"bots/{agent}/memory.json" try: with open(memory_path, 'r') as f: memory = json.load(f) # Check the last system message in turns for turn in reversed(memory['turns']): if turn['role'] == 'system' and 'code' in turn['content']: # Extract completion code if 'code : 2' in turn['content']: return True # Task successful elif 'code : 4' in turn['content']: return False # Task failed except (FileNotFoundError, json.JSONDecodeError) as e: print(f"Error reading memory for agent {agent}: {e}") continue return False # Default to failure if no conclusive result found def update_results_file(task_id, success_count, total_count, time_taken, experiment_results, results_filename): """Update the results file with current success ratio and time taken.""" success_ratio = success_count / total_count with open(results_filename, 'w') as f: # 'w' mode overwrites the file each time f.write(f"Task ID: {task_id}\n") f.write(f"Experiments completed: {total_count}\n") f.write(f"Successful experiments: {success_count}\n") f.write(f"Success ratio: {success_ratio:.2f}\n") f.write(f"Time taken for last experiment: {time_taken:.2f} seconds\n") # Write individual experiment results for i, result in enumerate(experiment_results, 1): f.write(f"Experiment {i}: {'Success' if result['success'] else 'Failure'}, Time taken: {result['time_taken']:.2f} seconds\n") # Write aggregated metrics total_time = sum(result['time_taken'] for result in experiment_results) f.write(f"\nAggregated metrics:\n") f.write(f"Total experiments: {total_count}\n") f.write(f"Total successful experiments: {success_count}\n") f.write(f"Overall success ratio: {success_ratio:.2f}\n") f.write(f"Total time taken: {total_time:.2f} seconds\n") f.write(f"Average time per experiment: {total_time / total_count:.2f} seconds\n") f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") def run_experiment(task_path, task_id, num_exp): """Run the specified number of experiments and track results.""" # Read agent profiles from settings.js agents = read_settings(file_path="settings.js") print(f"Detected agents: {agents}") # Generate timestamp at the start of experiments timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') results_filename = f"results_{task_id}_{timestamp}.txt" print(f"Results will be saved to: {results_filename}") success_count = 0 experiment_results = [] for exp_num in range(num_exp): print(f"\nRunning experiment {exp_num + 1}/{num_exp}") start_time = time.time() # Run the node command cmd = f"node main.js --task_path {task_path} --task_id {task_id}" try: subprocess.run(cmd, shell=True, check=True) except subprocess.CalledProcessError as e: print(f"Error running experiment: {e}") continue # Check if task was successful success = check_task_completion(agents) if success: success_count += 1 print(f"Experiment {exp_num + 1} successful") else: print(f"Experiment {exp_num + 1} failed") end_time = time.time() time_taken = end_time - start_time # Store individual experiment result experiment_results.append({ 'success': success, 'time_taken': time_taken }) # Update results file after each experiment using the constant filename update_results_file(task_id, success_count, exp_num + 1, time_taken, experiment_results, results_filename) # Small delay between experiments time.sleep(1) final_ratio = success_count / num_exp print(f"\nExperiments completed. Final success ratio: {final_ratio:.2f}") def main(): parser = argparse.ArgumentParser(description='Run Minecraft AI agent experiments') parser.add_argument('task_path', help='Path to the task file') parser.add_argument('task_id', help='ID of the task to run') parser.add_argument('num_exp', type=int, help='Number of experiments to run') args = parser.parse_args() run_experiment(args.task_path, args.task_id, args.num_exp) if __name__ == "__main__": main()