Regan Huff commited on
Commit
18f8616
·
unverified ·
1 Parent(s): aca1950

Bump agenteval version in leaderboard code (#20)

Browse files
Files changed (2) hide show
  1. requirements.txt +3 -3
  2. submission.py +4 -4
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- agent-eval==0.1.13
2
  aiobotocore==2.22.0
3
  aiofiles==24.1.0
4
  aiohappyeyeballs==2.6.1
@@ -43,7 +43,7 @@ huggingface-hub==0.30.2
43
  idna==3.10
44
  ijson==3.3.0
45
  importlib_metadata==8.7.0
46
- inspect_ai==0.3.94
47
  isort==6.0.1
48
  itsdangerous==2.2.0
49
  Jinja2==3.1.6
@@ -110,7 +110,7 @@ sniffio==1.3.1
110
  soupsieve==2.7
111
  starlette==0.46.2
112
  tenacity==9.1.2
113
- textual==3.2.0
114
  tiktoken==0.9.0
115
  tokenizers==0.21.1
116
  tomli==2.2.1
 
1
+ agent-eval==0.1.24
2
  aiobotocore==2.22.0
3
  aiofiles==24.1.0
4
  aiohappyeyeballs==2.6.1
 
43
  idna==3.10
44
  ijson==3.3.0
45
  importlib_metadata==8.7.0
46
+ inspect_ai==0.3.104
47
  isort==6.0.1
48
  itsdangerous==2.2.0
49
  Jinja2==3.1.6
 
110
  soupsieve==2.7
111
  starlette==0.46.2
112
  tenacity==9.1.2
113
+ textual<3.0.0
114
  tiktoken==0.9.0
115
  tokenizers==0.21.1
116
  tomli==2.2.1
submission.py CHANGED
@@ -16,7 +16,7 @@ from agenteval import (
16
  upload_folder_to_hf,
17
  upload_summary_to_hf,
18
  )
19
- from agenteval.models import EvalResult
20
  from agenteval.leaderboard.upload import sanitize_path_component
21
  from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
22
  from datasets.data_files import EmptyDatasetError
@@ -58,7 +58,7 @@ os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
58
  CACHED_VIEWERS = {}
59
  CACHED_TAG_MAPS = {}
60
 
61
- # --- Submission Logic (largely unchanged from original, ensure EvalResult and other deps are fine) ---
62
  def try_load_dataset_submission(*args, **kwargs) -> DatasetDict: # Renamed to avoid conflict if LV has one
63
  try:
64
  return load_dataset(*args, **kwargs)
@@ -224,7 +224,7 @@ def add_new_eval(
224
  if not json_path.exists():
225
  return format_error(f"Missing manifest {AGENTEVAL_MANIFEST_NAME} in submission.")
226
 
227
- eval_result_obj = EvalResult.model_validate_json(json_path.read_text(encoding="utf-8"))
228
  if eval_result_obj.suite_config.version != CONFIG_NAME:
229
  return format_error(f"Suite version mismatch: expected {CONFIG_NAME}, got {eval_result_obj.suite_config.version}.")
230
  if eval_result_obj.split != val_or_test:
@@ -250,7 +250,7 @@ def add_new_eval(
250
  else: print("mock uploaded scored submission", flush=True)
251
 
252
 
253
- # Update EvalResult with submission details
254
  eval_result_obj.submission.agent_name = agent_name
255
  eval_result_obj.submission.agent_description = agent_description
256
  eval_result_obj.submission.agent_url = agent_url
 
16
  upload_folder_to_hf,
17
  upload_summary_to_hf,
18
  )
19
+ from agenteval.leaderboard.models import LeaderboardSubmission
20
  from agenteval.leaderboard.upload import sanitize_path_component
21
  from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
22
  from datasets.data_files import EmptyDatasetError
 
58
  CACHED_VIEWERS = {}
59
  CACHED_TAG_MAPS = {}
60
 
61
+ # --- Submission Logic (largely unchanged from original, ensure LeaderboardSubmission and other deps are fine) ---
62
  def try_load_dataset_submission(*args, **kwargs) -> DatasetDict: # Renamed to avoid conflict if LV has one
63
  try:
64
  return load_dataset(*args, **kwargs)
 
224
  if not json_path.exists():
225
  return format_error(f"Missing manifest {AGENTEVAL_MANIFEST_NAME} in submission.")
226
 
227
+ eval_result_obj = LeaderboardSubmission.model_validate_json(json_path.read_text(encoding="utf-8"))
228
  if eval_result_obj.suite_config.version != CONFIG_NAME:
229
  return format_error(f"Suite version mismatch: expected {CONFIG_NAME}, got {eval_result_obj.suite_config.version}.")
230
  if eval_result_obj.split != val_or_test:
 
250
  else: print("mock uploaded scored submission", flush=True)
251
 
252
 
253
+ # Update LeaderboardSubmission with submission details
254
  eval_result_obj.submission.agent_name = agent_name
255
  eval_result_obj.submission.agent_description = agent_description
256
  eval_result_obj.submission.agent_url = agent_url