Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor: refactor the naming
Browse files- app.py +58 -58
- src/loaders.py +2 -2
- src/models.py +1 -2
app.py
CHANGED
|
@@ -57,13 +57,13 @@ except Exception:
|
|
| 57 |
print("failed to download")
|
| 58 |
restart_space()
|
| 59 |
|
| 60 |
-
global
|
| 61 |
-
|
| 62 |
global datastore
|
| 63 |
-
datastore =
|
| 64 |
|
| 65 |
|
| 66 |
-
def
|
| 67 |
metric: str,
|
| 68 |
domains: list,
|
| 69 |
langs: list,
|
|
@@ -72,6 +72,7 @@ def update_metric_qa(
|
|
| 72 |
show_anonymous: bool,
|
| 73 |
show_revision_and_timestamp: bool,
|
| 74 |
):
|
|
|
|
| 75 |
return update_metric(
|
| 76 |
datastore,
|
| 77 |
"qa",
|
|
@@ -85,7 +86,7 @@ def update_metric_qa(
|
|
| 85 |
)
|
| 86 |
|
| 87 |
|
| 88 |
-
def
|
| 89 |
metric: str,
|
| 90 |
domains: list,
|
| 91 |
langs: list,
|
|
@@ -94,6 +95,7 @@ def update_metric_long_doc(
|
|
| 94 |
show_anonymous: bool,
|
| 95 |
show_revision_and_timestamp,
|
| 96 |
):
|
|
|
|
| 97 |
return update_metric(
|
| 98 |
datastore,
|
| 99 |
"long-doc",
|
|
@@ -107,30 +109,28 @@ def update_metric_long_doc(
|
|
| 107 |
)
|
| 108 |
|
| 109 |
|
| 110 |
-
def
|
| 111 |
global datastore
|
| 112 |
-
global
|
| 113 |
-
datastore =
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
def update_datastore_long_doc(version):
|
| 124 |
global datastore
|
| 125 |
-
global
|
| 126 |
-
datastore =
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
| 134 |
|
| 135 |
|
| 136 |
demo = gr.Blocks(css=custom_css)
|
|
@@ -142,7 +142,7 @@ with demo:
|
|
| 142 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 143 |
with gr.TabItem("Results", elem_id="results-tab-table"):
|
| 144 |
with gr.Row():
|
| 145 |
-
|
| 146 |
|
| 147 |
with gr.TabItem("QA", elem_id="qa-benchmark-tab-table", id=0):
|
| 148 |
with gr.Row():
|
|
@@ -174,10 +174,10 @@ with demo:
|
|
| 174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 175 |
hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
[
|
| 180 |
-
|
| 181 |
],
|
| 182 |
[selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table],
|
| 183 |
)
|
|
@@ -187,7 +187,7 @@ with demo:
|
|
| 187 |
lb_table,
|
| 188 |
hidden_lb_table,
|
| 189 |
search_bar,
|
| 190 |
-
|
| 191 |
selected_domains,
|
| 192 |
selected_langs,
|
| 193 |
selected_rerankings,
|
|
@@ -197,7 +197,7 @@ with demo:
|
|
| 197 |
|
| 198 |
# set metric listener
|
| 199 |
selected_metric.change(
|
| 200 |
-
|
| 201 |
[
|
| 202 |
selected_metric,
|
| 203 |
selected_domains,
|
|
@@ -233,10 +233,10 @@ with demo:
|
|
| 233 |
hidden_lb_df_retriever, datastore.qa_types, visible=False
|
| 234 |
)
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
| 238 |
[
|
| 239 |
-
|
| 240 |
],
|
| 241 |
[
|
| 242 |
selected_domains,
|
|
@@ -252,7 +252,7 @@ with demo:
|
|
| 252 |
lb_table_retriever,
|
| 253 |
hidden_lb_table_retriever,
|
| 254 |
search_bar_retriever,
|
| 255 |
-
|
| 256 |
selected_domains,
|
| 257 |
selected_langs,
|
| 258 |
selected_noreranker,
|
|
@@ -262,7 +262,7 @@ with demo:
|
|
| 262 |
|
| 263 |
# set metric listener
|
| 264 |
selected_metric.change(
|
| 265 |
-
|
| 266 |
[
|
| 267 |
selected_metric,
|
| 268 |
selected_domains,
|
|
@@ -298,10 +298,10 @@ with demo:
|
|
| 298 |
hidden_lb_df_reranker, datastore.qa_types, visible=False
|
| 299 |
)
|
| 300 |
|
| 301 |
-
|
| 302 |
-
|
| 303 |
[
|
| 304 |
-
|
| 305 |
],
|
| 306 |
[
|
| 307 |
selected_domains,
|
|
@@ -317,7 +317,7 @@ with demo:
|
|
| 317 |
lb_table_reranker,
|
| 318 |
hidden_lb_table_reranker,
|
| 319 |
search_bar_reranker,
|
| 320 |
-
|
| 321 |
selected_domains,
|
| 322 |
selected_langs,
|
| 323 |
selected_rerankings_reranker,
|
|
@@ -326,7 +326,7 @@ with demo:
|
|
| 326 |
)
|
| 327 |
# set metric listener
|
| 328 |
selected_metric.change(
|
| 329 |
-
|
| 330 |
[
|
| 331 |
selected_metric,
|
| 332 |
selected_domains,
|
|
@@ -373,10 +373,10 @@ with demo:
|
|
| 373 |
datastore.doc_raw_df, datastore.doc_types, visible=False
|
| 374 |
)
|
| 375 |
|
| 376 |
-
|
| 377 |
-
|
| 378 |
[
|
| 379 |
-
|
| 380 |
],
|
| 381 |
[
|
| 382 |
selected_domains,
|
|
@@ -392,7 +392,7 @@ with demo:
|
|
| 392 |
lb_table_long_doc,
|
| 393 |
hidden_lb_table_long_doc,
|
| 394 |
search_bar,
|
| 395 |
-
|
| 396 |
selected_domains,
|
| 397 |
selected_langs,
|
| 398 |
selected_rerankings,
|
|
@@ -402,7 +402,7 @@ with demo:
|
|
| 402 |
|
| 403 |
# set metric listener
|
| 404 |
selected_metric.change(
|
| 405 |
-
|
| 406 |
[
|
| 407 |
selected_metric,
|
| 408 |
selected_domains,
|
|
@@ -437,10 +437,10 @@ with demo:
|
|
| 437 |
hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
|
| 438 |
)
|
| 439 |
|
| 440 |
-
|
| 441 |
-
|
| 442 |
[
|
| 443 |
-
|
| 444 |
],
|
| 445 |
[
|
| 446 |
selected_domains,
|
|
@@ -456,7 +456,7 @@ with demo:
|
|
| 456 |
lb_table_retriever_long_doc,
|
| 457 |
hidden_lb_table_retriever_long_doc,
|
| 458 |
search_bar_retriever,
|
| 459 |
-
|
| 460 |
selected_domains,
|
| 461 |
selected_langs,
|
| 462 |
selected_noreranker,
|
|
@@ -465,7 +465,7 @@ with demo:
|
|
| 465 |
)
|
| 466 |
|
| 467 |
selected_metric.change(
|
| 468 |
-
|
| 469 |
[
|
| 470 |
selected_metric,
|
| 471 |
selected_domains,
|
|
@@ -502,10 +502,10 @@ with demo:
|
|
| 502 |
hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
|
| 503 |
)
|
| 504 |
|
| 505 |
-
|
| 506 |
-
|
| 507 |
[
|
| 508 |
-
|
| 509 |
],
|
| 510 |
[
|
| 511 |
selected_domains,
|
|
@@ -521,7 +521,7 @@ with demo:
|
|
| 521 |
lb_table_reranker_ldoc,
|
| 522 |
hidden_lb_table_reranker_ldoc,
|
| 523 |
search_bar_reranker_ldoc,
|
| 524 |
-
|
| 525 |
selected_domains,
|
| 526 |
selected_langs,
|
| 527 |
selected_rerankings_reranker_ldoc,
|
|
@@ -529,7 +529,7 @@ with demo:
|
|
| 529 |
show_revision_and_timestamp,
|
| 530 |
)
|
| 531 |
selected_metric.change(
|
| 532 |
-
|
| 533 |
[
|
| 534 |
selected_metric,
|
| 535 |
selected_domains,
|
|
|
|
| 57 |
print("failed to download")
|
| 58 |
restart_space()
|
| 59 |
|
| 60 |
+
global ds_dict
|
| 61 |
+
ds_dict = load_eval_results(EVAL_RESULTS_PATH)
|
| 62 |
global datastore
|
| 63 |
+
datastore = ds_dict[LATEST_BENCHMARK_VERSION]
|
| 64 |
|
| 65 |
|
| 66 |
+
def update_qa_metric(
|
| 67 |
metric: str,
|
| 68 |
domains: list,
|
| 69 |
langs: list,
|
|
|
|
| 72 |
show_anonymous: bool,
|
| 73 |
show_revision_and_timestamp: bool,
|
| 74 |
):
|
| 75 |
+
global datastore
|
| 76 |
return update_metric(
|
| 77 |
datastore,
|
| 78 |
"qa",
|
|
|
|
| 86 |
)
|
| 87 |
|
| 88 |
|
| 89 |
+
def update_doc_metric(
|
| 90 |
metric: str,
|
| 91 |
domains: list,
|
| 92 |
langs: list,
|
|
|
|
| 95 |
show_anonymous: bool,
|
| 96 |
show_revision_and_timestamp,
|
| 97 |
):
|
| 98 |
+
global datastore
|
| 99 |
return update_metric(
|
| 100 |
datastore,
|
| 101 |
"long-doc",
|
|
|
|
| 109 |
)
|
| 110 |
|
| 111 |
|
| 112 |
+
def update_qa_version(version):
|
| 113 |
global datastore
|
| 114 |
+
global ds_dict
|
| 115 |
+
datastore = ds_dict[version]
|
| 116 |
+
domain_elem = get_domain_dropdown(QABenchmarks[datastore.slug])
|
| 117 |
+
lang_elem = get_language_dropdown(QABenchmarks[datastore.slug])
|
| 118 |
+
model_elem = get_reranking_dropdown(datastore.reranking_models)
|
| 119 |
+
df_elem = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
|
| 120 |
+
hidden_df_elem = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
| 121 |
+
return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def update_doc_version(version):
|
|
|
|
| 125 |
global datastore
|
| 126 |
+
global ds_dict
|
| 127 |
+
datastore = ds_dict[version]
|
| 128 |
+
domain_elem = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
|
| 129 |
+
lang_elem = get_language_dropdown(LongDocBenchmarks[datastore.slug])
|
| 130 |
+
model_elem = get_reranking_dropdown(datastore.reranking_models)
|
| 131 |
+
df_elem = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
|
| 132 |
+
hidden_df_elem = get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
|
| 133 |
+
return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
demo = gr.Blocks(css=custom_css)
|
|
|
|
| 142 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 143 |
with gr.TabItem("Results", elem_id="results-tab-table"):
|
| 144 |
with gr.Row():
|
| 145 |
+
version = get_version_dropdown()
|
| 146 |
|
| 147 |
with gr.TabItem("QA", elem_id="qa-benchmark-tab-table", id=0):
|
| 148 |
with gr.Row():
|
|
|
|
| 174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 175 |
hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
| 176 |
|
| 177 |
+
version.change(
|
| 178 |
+
update_qa_version,
|
| 179 |
[
|
| 180 |
+
version,
|
| 181 |
],
|
| 182 |
[selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table],
|
| 183 |
)
|
|
|
|
| 187 |
lb_table,
|
| 188 |
hidden_lb_table,
|
| 189 |
search_bar,
|
| 190 |
+
version,
|
| 191 |
selected_domains,
|
| 192 |
selected_langs,
|
| 193 |
selected_rerankings,
|
|
|
|
| 197 |
|
| 198 |
# set metric listener
|
| 199 |
selected_metric.change(
|
| 200 |
+
update_qa_metric,
|
| 201 |
[
|
| 202 |
selected_metric,
|
| 203 |
selected_domains,
|
|
|
|
| 233 |
hidden_lb_df_retriever, datastore.qa_types, visible=False
|
| 234 |
)
|
| 235 |
|
| 236 |
+
version.change(
|
| 237 |
+
update_qa_version,
|
| 238 |
[
|
| 239 |
+
version,
|
| 240 |
],
|
| 241 |
[
|
| 242 |
selected_domains,
|
|
|
|
| 252 |
lb_table_retriever,
|
| 253 |
hidden_lb_table_retriever,
|
| 254 |
search_bar_retriever,
|
| 255 |
+
version,
|
| 256 |
selected_domains,
|
| 257 |
selected_langs,
|
| 258 |
selected_noreranker,
|
|
|
|
| 262 |
|
| 263 |
# set metric listener
|
| 264 |
selected_metric.change(
|
| 265 |
+
update_qa_metric,
|
| 266 |
[
|
| 267 |
selected_metric,
|
| 268 |
selected_domains,
|
|
|
|
| 298 |
hidden_lb_df_reranker, datastore.qa_types, visible=False
|
| 299 |
)
|
| 300 |
|
| 301 |
+
version.change(
|
| 302 |
+
update_qa_version,
|
| 303 |
[
|
| 304 |
+
version,
|
| 305 |
],
|
| 306 |
[
|
| 307 |
selected_domains,
|
|
|
|
| 317 |
lb_table_reranker,
|
| 318 |
hidden_lb_table_reranker,
|
| 319 |
search_bar_reranker,
|
| 320 |
+
version,
|
| 321 |
selected_domains,
|
| 322 |
selected_langs,
|
| 323 |
selected_rerankings_reranker,
|
|
|
|
| 326 |
)
|
| 327 |
# set metric listener
|
| 328 |
selected_metric.change(
|
| 329 |
+
update_qa_metric,
|
| 330 |
[
|
| 331 |
selected_metric,
|
| 332 |
selected_domains,
|
|
|
|
| 373 |
datastore.doc_raw_df, datastore.doc_types, visible=False
|
| 374 |
)
|
| 375 |
|
| 376 |
+
version.change(
|
| 377 |
+
update_doc_version,
|
| 378 |
[
|
| 379 |
+
version,
|
| 380 |
],
|
| 381 |
[
|
| 382 |
selected_domains,
|
|
|
|
| 392 |
lb_table_long_doc,
|
| 393 |
hidden_lb_table_long_doc,
|
| 394 |
search_bar,
|
| 395 |
+
version,
|
| 396 |
selected_domains,
|
| 397 |
selected_langs,
|
| 398 |
selected_rerankings,
|
|
|
|
| 402 |
|
| 403 |
# set metric listener
|
| 404 |
selected_metric.change(
|
| 405 |
+
update_doc_metric,
|
| 406 |
[
|
| 407 |
selected_metric,
|
| 408 |
selected_domains,
|
|
|
|
| 437 |
hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
|
| 438 |
)
|
| 439 |
|
| 440 |
+
version.change(
|
| 441 |
+
update_doc_version,
|
| 442 |
[
|
| 443 |
+
version,
|
| 444 |
],
|
| 445 |
[
|
| 446 |
selected_domains,
|
|
|
|
| 456 |
lb_table_retriever_long_doc,
|
| 457 |
hidden_lb_table_retriever_long_doc,
|
| 458 |
search_bar_retriever,
|
| 459 |
+
version,
|
| 460 |
selected_domains,
|
| 461 |
selected_langs,
|
| 462 |
selected_noreranker,
|
|
|
|
| 465 |
)
|
| 466 |
|
| 467 |
selected_metric.change(
|
| 468 |
+
update_doc_metric,
|
| 469 |
[
|
| 470 |
selected_metric,
|
| 471 |
selected_domains,
|
|
|
|
| 502 |
hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
|
| 503 |
)
|
| 504 |
|
| 505 |
+
version.change(
|
| 506 |
+
update_doc_version,
|
| 507 |
[
|
| 508 |
+
version,
|
| 509 |
],
|
| 510 |
[
|
| 511 |
selected_domains,
|
|
|
|
| 521 |
lb_table_reranker_ldoc,
|
| 522 |
hidden_lb_table_reranker_ldoc,
|
| 523 |
search_bar_reranker_ldoc,
|
| 524 |
+
version,
|
| 525 |
selected_domains,
|
| 526 |
selected_langs,
|
| 527 |
selected_rerankings_reranker_ldoc,
|
|
|
|
| 529 |
show_revision_and_timestamp,
|
| 530 |
)
|
| 531 |
selected_metric.change(
|
| 532 |
+
update_doc_metric,
|
| 533 |
[
|
| 534 |
selected_metric,
|
| 535 |
selected_domains,
|
src/loaders.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import os.path
|
| 2 |
-
from typing import List
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
|
@@ -94,7 +94,7 @@ def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
|
|
| 94 |
return lb_data_store
|
| 95 |
|
| 96 |
|
| 97 |
-
def load_eval_results(file_path: str):
|
| 98 |
output = {}
|
| 99 |
for version in BENCHMARK_VERSION_LIST:
|
| 100 |
fn = f"{file_path}/{version}"
|
|
|
|
| 1 |
import os.path
|
| 2 |
+
from typing import List, Dict
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
|
|
|
| 94 |
return lb_data_store
|
| 95 |
|
| 96 |
|
| 97 |
+
def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
|
| 98 |
output = {}
|
| 99 |
for version in BENCHMARK_VERSION_LIST:
|
| 100 |
fn = f"{file_path}/{version}"
|
src/models.py
CHANGED
|
@@ -147,5 +147,4 @@ class LeaderboardDataStore:
|
|
| 147 |
doc_fmt_df: Optional[pd.DataFrame]
|
| 148 |
reranking_models: Optional[list]
|
| 149 |
qa_types: Optional[list]
|
| 150 |
-
doc_types: Optional[list]
|
| 151 |
-
# qa_raw_df, docs_raw_df, qa_fmt_df, docs_fmt_df,
|
|
|
|
| 147 |
doc_fmt_df: Optional[pd.DataFrame]
|
| 148 |
reranking_models: Optional[list]
|
| 149 |
qa_types: Optional[list]
|
| 150 |
+
doc_types: Optional[list]
|
|
|