Spaces:

lmms-lab-si
/

EASI-Leaderboard

Running

yangzhitao commited on 10 days ago

Commit

ba366ad

1 Parent(s): 58bbf33

refactor: update benchmark display in submit tab

Files changed (2) hide show

app.py CHANGED Viewed

@@ -466,11 +466,11 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
                 for benchmark in benchmarks_list:
                     with gr.Row():
                         benchmark_checkbox = gr.Checkbox(
-                            label=f"{benchmark.title} ({benchmark.key})",
                             value=False,
                         )
                         result_input = gr.Number(
-                            label="Result Value",
                             value=None,
                             interactive=True,
                             visible=False,
@@ -567,8 +567,8 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
                 strict=True,
             ):
                 if checkbox_checked:
-                    # TODO: metric
-                    results[benchmark.key] = {"acc": float(result_value) if result_value is not None else None}
             if not results:
                 raise ValueError("At least one benchmark result is required.")

                 for benchmark in benchmarks_list:
                     with gr.Row():
                         benchmark_checkbox = gr.Checkbox(
+                            label=f"{benchmark.title} ({benchmark.default_metric_label})",
                             value=False,
                         )
                         result_input = gr.Number(
+                            label="Result Value (in %, e.g. 42.0 for 42.0%)",
                             value=None,
                             interactive=True,
                             visible=False,
                 strict=True,
             ):
                 if checkbox_checked:
+                    metric_key = benchmark.default_metric
+                    results[benchmark.key] = {metric_key: float(result_value) if result_value is not None else None}
             if not results:
                 raise ValueError("At least one benchmark result is required.")

src/schemas/meta_toml.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from functools import cached_property
-from pydantic import BaseModel, ConfigDict
 from typing_extensions import Self
@@ -84,6 +84,15 @@ class _HashableComparableMixin(BaseModel):
 class MetaToml_Benchmark(_HashableComparableMixin):
     disabled: bool = False
 class MetaToml_Model(_HashableComparableMixin): ...

 from functools import cached_property
+from pydantic import BaseModel, ConfigDict, computed_field
 from typing_extensions import Self
 class MetaToml_Benchmark(_HashableComparableMixin):
     disabled: bool = False
+    @computed_field
+    @property
+    def default_metric(self) -> str:
+        return "caa" if self.key.startswith("site") else "acc"
+    @property
+    def default_metric_label(self) -> str:
+        return "CAA" if self.default_metric == "caa" else "Acc."
 class MetaToml_Model(_HashableComparableMixin): ...