asta-bench-leaderboard

Running

App Files Files Community

Regan Huff commited on Aug 4

Commit

18f8616

unverified ·

1 Parent(s): aca1950

Bump agenteval version in leaderboard code (#20)

Browse files

Files changed (2) hide show

requirements.txt +3 -3
submission.py +4 -4

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-agent-eval==0.1.13
 aiobotocore==2.22.0
 aiofiles==24.1.0
 aiohappyeyeballs==2.6.1
@@ -43,7 +43,7 @@ huggingface-hub==0.30.2
 idna==3.10
 ijson==3.3.0
 importlib_metadata==8.7.0
-inspect_ai==0.3.94
 isort==6.0.1
 itsdangerous==2.2.0
 Jinja2==3.1.6
@@ -110,7 +110,7 @@ sniffio==1.3.1
 soupsieve==2.7
 starlette==0.46.2
 tenacity==9.1.2
-textual==3.2.0
 tiktoken==0.9.0
 tokenizers==0.21.1
 tomli==2.2.1

+agent-eval==0.1.24
 aiobotocore==2.22.0
 aiofiles==24.1.0
 aiohappyeyeballs==2.6.1
 idna==3.10
 ijson==3.3.0
 importlib_metadata==8.7.0
+inspect_ai==0.3.104
 isort==6.0.1
 itsdangerous==2.2.0
 Jinja2==3.1.6
 soupsieve==2.7
 starlette==0.46.2
 tenacity==9.1.2
+textual<3.0.0
 tiktoken==0.9.0
 tokenizers==0.21.1
 tomli==2.2.1

submission.py CHANGED Viewed

@@ -16,7 +16,7 @@ from agenteval import (
     upload_folder_to_hf,
     upload_summary_to_hf,
 )
-from agenteval.models import EvalResult
 from agenteval.leaderboard.upload import sanitize_path_component
 from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
 from datasets.data_files import EmptyDatasetError
@@ -58,7 +58,7 @@ os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
 CACHED_VIEWERS = {}
 CACHED_TAG_MAPS = {}
-# --- Submission Logic (largely unchanged from original, ensure EvalResult and other deps are fine) ---
 def try_load_dataset_submission(*args, **kwargs) -> DatasetDict: # Renamed to avoid conflict if LV has one
     try:
         return load_dataset(*args, **kwargs)
@@ -224,7 +224,7 @@ def add_new_eval(
         if not json_path.exists():
             return format_error(f"Missing manifest {AGENTEVAL_MANIFEST_NAME} in submission.")
-        eval_result_obj = EvalResult.model_validate_json(json_path.read_text(encoding="utf-8"))
         if eval_result_obj.suite_config.version != CONFIG_NAME:
             return format_error(f"Suite version mismatch: expected {CONFIG_NAME}, got {eval_result_obj.suite_config.version}.")
         if eval_result_obj.split != val_or_test:
@@ -250,7 +250,7 @@ def add_new_eval(
     else: print("mock uploaded scored submission", flush=True)
-    # Update EvalResult with submission details
     eval_result_obj.submission.agent_name = agent_name
     eval_result_obj.submission.agent_description = agent_description
     eval_result_obj.submission.agent_url = agent_url

     upload_folder_to_hf,
     upload_summary_to_hf,
 )
+from agenteval.leaderboard.models import LeaderboardSubmission
 from agenteval.leaderboard.upload import sanitize_path_component
 from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
 from datasets.data_files import EmptyDatasetError
 CACHED_VIEWERS = {}
 CACHED_TAG_MAPS = {}
+# --- Submission Logic (largely unchanged from original, ensure LeaderboardSubmission and other deps are fine) ---
 def try_load_dataset_submission(*args, **kwargs) -> DatasetDict: # Renamed to avoid conflict if LV has one
     try:
         return load_dataset(*args, **kwargs)
         if not json_path.exists():
             return format_error(f"Missing manifest {AGENTEVAL_MANIFEST_NAME} in submission.")
+        eval_result_obj = LeaderboardSubmission.model_validate_json(json_path.read_text(encoding="utf-8"))
         if eval_result_obj.suite_config.version != CONFIG_NAME:
             return format_error(f"Suite version mismatch: expected {CONFIG_NAME}, got {eval_result_obj.suite_config.version}.")
         if eval_result_obj.split != val_or_test:
     else: print("mock uploaded scored submission", flush=True)
+    # Update LeaderboardSubmission with submission details
     eval_result_obj.submission.agent_name = agent_name
     eval_result_obj.submission.agent_description = agent_description
     eval_result_obj.submission.agent_url = agent_url