yangzhitao commited on
Commit
f84dfbe
·
1 Parent(s): 13edd99

fix: normalize model titles in MetaToml and update full_model retrieval in EvalResult

Browse files
Files changed (2) hide show
  1. src/leaderboard/read_evals.py +6 -2
  2. src/prepare.py +6 -5
src/leaderboard/read_evals.py CHANGED
@@ -90,7 +90,8 @@ class EvalResult(BaseModel):
90
 
91
  meta_toml = load_meta_toml()
92
  # update full_model from meta_toml if it exists
93
- full_model = meta_toml.model_title_to_repo_id.get(full_model, full_model)
 
94
 
95
  still_on_hub, _, model_config = is_model_on_hub(
96
  full_model, config.model_sha or "main", trust_remote_code=True, test_tokenizer=False
@@ -106,7 +107,10 @@ class EvalResult(BaseModel):
106
  for task in BENCHMARKS:
107
  # We average all scores of a given metric (not all metrics are present in all files)
108
  # TODO: support multiple metrics
109
- accs = np.array([v.get("acc", None) for k, v in data.results.items() if task.key == k])
 
 
 
110
  if accs.size == 0 or any(acc is None for acc in accs):
111
  continue
112
 
 
90
 
91
  meta_toml = load_meta_toml()
92
  # update full_model from meta_toml if it exists
93
+ if "/" not in full_model:
94
+ full_model = meta_toml.model_title_to_repo_id.get(full_model, full_model)
95
 
96
  still_on_hub, _, model_config = is_model_on_hub(
97
  full_model, config.model_sha or "main", trust_remote_code=True, test_tokenizer=False
 
107
  for task in BENCHMARKS:
108
  # We average all scores of a given metric (not all metrics are present in all files)
109
  # TODO: support multiple metrics
110
+ metric_keys = ["caa"]
111
+ accs = np.array([
112
+ v.get(metric_key, None) for k, v in data.results.items() if task.key == k for metric_key in metric_keys
113
+ ])
114
  if accs.size == 0 or any(acc is None for acc in accs):
115
  continue
116
 
src/prepare.py CHANGED
@@ -45,7 +45,7 @@ def prepare_space():
45
  repo_type="dataset",
46
  tqdm_class=None,
47
  etag_timeout=30,
48
- allow_patterns=["leaderboard/*.toml", "leaderboard/**/*.json"],
49
  token=settings.HF_TOKEN.get_secret_value(),
50
  )
51
  except Exception as e:
@@ -65,11 +65,11 @@ class MetaToml(BaseModel):
65
 
66
  @cached_property
67
  def model_title_to_key(self) -> dict[str, str]:
68
- return {model.title: model.key for model in self.models}
69
 
70
  @cached_property
71
  def benchmark_title_to_key(self) -> dict[str, str]:
72
- return {benchmark.title: benchmark.key for benchmark in self.benchmarks}
73
 
74
  @cached_property
75
  def model_key_to_repo_id(self) -> dict[str, str]:
@@ -79,11 +79,12 @@ class MetaToml(BaseModel):
79
  def model_title_to_repo_id(self) -> dict[str, str]:
80
  mapping: dict[str, str] = {}
81
  for model in self.models:
82
- model_key = self.model_title_to_key.get(model.title)
 
83
  if model_key:
84
  model_repo_id = self.model_key_to_repo_id.get(model_key)
85
  if model_repo_id:
86
- mapping[model.title] = model_repo_id
87
  return mapping
88
 
89
 
 
45
  repo_type="dataset",
46
  tqdm_class=None,
47
  etag_timeout=30,
48
+ # allow_patterns=["leaderboard/*.toml", "leaderboard/**/*.json"],
49
  token=settings.HF_TOKEN.get_secret_value(),
50
  )
51
  except Exception as e:
 
65
 
66
  @cached_property
67
  def model_title_to_key(self) -> dict[str, str]:
68
+ return {model.title.lower(): model.key for model in self.models}
69
 
70
  @cached_property
71
  def benchmark_title_to_key(self) -> dict[str, str]:
72
+ return {benchmark.title.lower(): benchmark.key for benchmark in self.benchmarks}
73
 
74
  @cached_property
75
  def model_key_to_repo_id(self) -> dict[str, str]:
 
79
  def model_title_to_repo_id(self) -> dict[str, str]:
80
  mapping: dict[str, str] = {}
81
  for model in self.models:
82
+ model_title = model.title.lower()
83
+ model_key = self.model_title_to_key.get(model_title)
84
  if model_key:
85
  model_repo_id = self.model_key_to_repo_id.get(model_key)
86
  if model_repo_id:
87
+ mapping[model_title] = model_repo_id
88
  return mapping
89
 
90