mirror of
https://github.com/kolbytn/mindcraft.git
synced 2025-07-26 09:55:26 +02:00
fix: Resolve API naming inconsistency in analyse_results module
- Re-export enhanced function as 'aggregate_results' for backward compatibility - Users can now import aggregate_results and get the enhanced functionality - Updated architecture documentation to reflect the corrected API - Maintains intuitive API while providing enhanced model extraction features
This commit is contained in:
parent
f7947ec3c2
commit
18eca2f5d9
2 changed files with 10 additions and 7 deletions
|
@ -39,7 +39,7 @@ graph TD
|
|||
end
|
||||
|
||||
A -- "Calls" --> E
|
||||
B -- "Calls" --> E
|
||||
B -- "Calls" --> F
|
||||
C -- "Calls" --> E
|
||||
|
||||
E -- "Iterates over agent logs, calls" --> D
|
||||
|
@ -155,7 +155,7 @@ def aggregate_results_to_dataframe(task_outcomes: List[Dict[str, Any]]) -> pd.Da
|
|||
* After the loop, it will call `evaluation.aggregate_results_to_dataframe()` to get the final DataFrame.
|
||||
* All analysis (e.g., calculating overall success rate) will be done using the resulting DataFrame.
|
||||
3. **Refactor `tasks/analyse_results.py`:**
|
||||
* This script will follow the same refactoring pattern as `evaluation_script.py`.
|
||||
* It calls the `aggregate_results` function which is an enhanced version of `aggregate_results` from `evaluation.py` that adds model name extraction.
|
||||
* The complex, name-based categorization (`is_base`, `base_without_plan`) will be entirely replaced by simple Pandas `groupby()` operations on the DataFrame's columns (e.g., `df.groupby('task_type').success_rate.mean()`).
|
||||
4. **Refactor `tasks/analyze_cooking_tasks.py`:**
|
||||
* This script will also be refactored to use the new `evaluation` module.
|
||||
|
|
|
@ -13,9 +13,7 @@ import concurrent.futures
|
|||
# Set up basic logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
from tasks.evaluation import (
|
||||
aggregate_results,
|
||||
)
|
||||
from tasks.evaluation import aggregate_results as original_aggregate_results
|
||||
|
||||
# --- Constants and Setup ---
|
||||
# Calculate project root directory to allow for absolute path resolution
|
||||
|
@ -115,7 +113,7 @@ def analyze_results_with_model_extraction(local_folders: List[str], task_definit
|
|||
pd.DataFrame: A DataFrame containing the detailed evaluation results with model names.
|
||||
"""
|
||||
# Use the centralized function with progress bar enabled
|
||||
results_df = aggregate_results(local_folders, task_definitions, use_tqdm=True)
|
||||
results_df = original_aggregate_results(local_folders, task_definitions, use_tqdm=True)
|
||||
|
||||
# Extract model names from folder paths if possible
|
||||
if not results_df.empty and 'task_id' in results_df.columns:
|
||||
|
@ -139,6 +137,11 @@ def analyze_results_with_model_extraction(local_folders: List[str], task_definit
|
|||
|
||||
return results_df
|
||||
|
||||
|
||||
# Re-export the enhanced function under the name `aggregate_results`
|
||||
aggregate_results = analyze_results_with_model_extraction
|
||||
|
||||
|
||||
def get_immediate_subdirectories(a_dir: str) -> List[str]:
|
||||
"""
|
||||
Gets a list of immediate subdirectories within a given directory.
|
||||
|
@ -203,7 +206,7 @@ def main() -> None:
|
|||
return
|
||||
|
||||
# --- Step 3: Aggregate Results into a DataFrame ---
|
||||
results_df = analyze_results_with_model_extraction(folders_to_analyze, task_definitions)
|
||||
results_df = aggregate_results(folders_to_analyze, task_definitions)
|
||||
|
||||
if results_df.empty:
|
||||
logging.warning("Analysis generated no results. Exiting.")
|
||||
|
|
Loading…
Add table
Reference in a new issue