Spaces:
Running
Running
File size: 2,853 Bytes
d66a6a3 3b3db42 8b1f7a0 3b3db42 12947f9 d66a6a3 3f84332 8b1f7a0 c2c3c10 8b1f7a0 3f84332 8b1f7a0 c85dcc4 3f84332 1ba1924 8b1f7a0 1ba1924 8b1f7a0 3165936 8b1f7a0 3165936 8b1f7a0 c2c3c10 1ba1924 c2c3c10 8b1f7a0 60906bd c2c3c10 8b1f7a0 c2c3c10 1ba1924 8b1f7a0 c85dcc4 8b1f7a0 60906bd 8b1f7a0 3165936 ceb2102 12947f9 8b1f7a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
"""Based on https://huggingface.co/spaces/demo-leaderboard-backend/leaderboard/blob/main/src/leaderboard/read_evals.py
Enhanced with Pydantic models.
"""
import glob
import json
import os
import warnings
from pathlib import Path
from typing import Any
from loguru import logger
from pydantic_core import from_json
from src.schemas.eval_result import EvalResult
def get_request_file_for_model(requests_path: str, model_name: str, precision: str) -> str:
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
request_files = os.path.join(
requests_path,
f"{model_name}_eval_request_*.json",
)
request_files = glob.glob(request_files)
# Select correct request file (precision)
request_file = ""
request_files = sorted(request_files, reverse=True)
for tmp_request_file in request_files:
with open(tmp_request_file) as f:
req_content = json.load(f)
if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
request_file = tmp_request_file
return request_file
def get_raw_eval_results(results_versions_dir: Path, requests_path: Path, *, results_version: str) -> list[EvalResult]:
"""From the path of the results folder root, extract all needed info for results"""
versioned_result_file = results_versions_dir / f"bench_{results_version}.json"
if not versioned_result_file.exists():
raise FileNotFoundError(
f"version={results_version!r} results file not found: {versioned_result_file.as_posix()!r}"
)
logger.info(f"Loading results from: {versioned_result_file.as_posix()!r}")
raw_results_model: dict[str, dict[str, Any]] = from_json(versioned_result_file.read_bytes())
logger.info(f"Loaded {len(raw_results_model)} results")
eval_results: dict[str, EvalResult] = {}
for _model_key, model_data in raw_results_model.items():
# Creation of result
eval_result = EvalResult.init_from_dict(model_data)
eval_result.update_with_request_file(requests_path)
# Store results of same eval together
eval_name = eval_result.eval_name
if eval_name in eval_results.keys():
results_loaded = {k: v for k, v in eval_result.results.items() if v is not None}
eval_results[eval_name].results.update(results_loaded)
else:
eval_results[eval_name] = eval_result
results: list[EvalResult] = []
for v in eval_results.values():
try:
v.to_dict() # we test if the dict version is complete
results.append(v)
except KeyError as e: # not all eval values present
warnings.warn(f"Not all eval values present for {v.eval_name}: {e}", stacklevel=2)
continue
return results
|