Spaces:
Running
Running
yangzhitao
commited on
Commit
·
aff7b1f
1
Parent(s):
ecc1bf2
refactor: optimize benchmark retrieval in create_submit_tab function to enhance clarity and error messaging
Browse files
app.py
CHANGED
|
@@ -2,8 +2,6 @@ import json
|
|
| 2 |
import re
|
| 3 |
import sys
|
| 4 |
import threading
|
| 5 |
-
from collections import namedtuple
|
| 6 |
-
from functools import partial
|
| 7 |
from textwrap import dedent
|
| 8 |
|
| 9 |
import gradio as gr
|
|
@@ -452,6 +450,7 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
|
|
| 452 |
table.select(fn=on_select, inputs=table, outputs=model_name_textbox)
|
| 453 |
|
| 454 |
# ========== Section 2: Steps 8-10 (Benchmark Evaluation Results) ==========
|
|
|
|
| 455 |
with gr.Accordion("📊 Benchmark Evaluation Results (Steps 8-10)", open=True):
|
| 456 |
gr.Markdown(
|
| 457 |
"**8-9. Select benchmarks and fill in evaluation result values**\n\n"
|
|
@@ -459,9 +458,8 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
|
|
| 459 |
)
|
| 460 |
|
| 461 |
# Simple form for benchmark results
|
| 462 |
-
benchmarks = get_benchmarks()
|
| 463 |
benchmark_results_form: list = []
|
| 464 |
-
for benchmark in
|
| 465 |
with gr.Row():
|
| 466 |
benchmark_checkbox = gr.Checkbox(
|
| 467 |
label=f"{benchmark.title} ({benchmark.key})",
|
|
@@ -555,16 +553,19 @@ def create_submit_tab(tab_id: int, demo: gr.Blocks):
|
|
| 555 |
|
| 556 |
# Build results: {benchmark_key: {metric: value}}
|
| 557 |
results = {}
|
| 558 |
-
benchmarks_list = get_benchmarks()
|
| 559 |
for benchmark, checkbox_checked, result_value in zip(
|
| 560 |
-
benchmarks_list,
|
|
|
|
|
|
|
|
|
|
| 561 |
):
|
| 562 |
if checkbox_checked and result_value is not None:
|
| 563 |
# Use "acc" as the default metric (can be extended)
|
| 564 |
results[benchmark.key] = {"acc": float(result_value)}
|
| 565 |
|
| 566 |
if not results:
|
| 567 |
-
|
|
|
|
| 568 |
|
| 569 |
return json.dumps({"config": config, "results": results}, indent=2, ensure_ascii=False)
|
| 570 |
|
|
|
|
| 2 |
import re
|
| 3 |
import sys
|
| 4 |
import threading
|
|
|
|
|
|
|
| 5 |
from textwrap import dedent
|
| 6 |
|
| 7 |
import gradio as gr
|
|
|
|
| 450 |
table.select(fn=on_select, inputs=table, outputs=model_name_textbox)
|
| 451 |
|
| 452 |
# ========== Section 2: Steps 8-10 (Benchmark Evaluation Results) ==========
|
| 453 |
+
benchmarks_list = get_benchmarks()
|
| 454 |
with gr.Accordion("📊 Benchmark Evaluation Results (Steps 8-10)", open=True):
|
| 455 |
gr.Markdown(
|
| 456 |
"**8-9. Select benchmarks and fill in evaluation result values**\n\n"
|
|
|
|
| 458 |
)
|
| 459 |
|
| 460 |
# Simple form for benchmark results
|
|
|
|
| 461 |
benchmark_results_form: list = []
|
| 462 |
+
for benchmark in benchmarks_list:
|
| 463 |
with gr.Row():
|
| 464 |
benchmark_checkbox = gr.Checkbox(
|
| 465 |
label=f"{benchmark.title} ({benchmark.key})",
|
|
|
|
| 553 |
|
| 554 |
# Build results: {benchmark_key: {metric: value}}
|
| 555 |
results = {}
|
|
|
|
| 556 |
for benchmark, checkbox_checked, result_value in zip(
|
| 557 |
+
benchmarks_list,
|
| 558 |
+
benchmark_checkbox_values,
|
| 559 |
+
benchmark_result_values,
|
| 560 |
+
strict=True,
|
| 561 |
):
|
| 562 |
if checkbox_checked and result_value is not None:
|
| 563 |
# Use "acc" as the default metric (can be extended)
|
| 564 |
results[benchmark.key] = {"acc": float(result_value)}
|
| 565 |
|
| 566 |
if not results:
|
| 567 |
+
msg = f"At least one benchmark result is required. benchmarks: {[b.title for b in benchmarks_list]!r}"
|
| 568 |
+
raise ValueError(msg)
|
| 569 |
|
| 570 |
return json.dumps({"config": config, "results": results}, indent=2, ensure_ascii=False)
|
| 571 |
|