Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: implement versioning for long-doc reranker only
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ from src.display.css_html_js import custom_css
|
|
| 14 |
from src.envs import (
|
| 15 |
API,
|
| 16 |
EVAL_RESULTS_PATH,
|
| 17 |
-
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK
|
| 18 |
)
|
| 19 |
from src.loaders import (
|
| 20 |
load_eval_results
|
|
@@ -23,7 +23,7 @@ from src.utils import (
|
|
| 23 |
update_metric,
|
| 24 |
set_listeners,
|
| 25 |
reset_rank,
|
| 26 |
-
remove_html
|
| 27 |
)
|
| 28 |
from src.display.gradio_formatting import (
|
| 29 |
get_version_dropdown,
|
|
@@ -38,6 +38,8 @@ from src.display.gradio_formatting import (
|
|
| 38 |
get_leaderboard_table
|
| 39 |
)
|
| 40 |
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def restart_space():
|
| 43 |
API.restart_space(repo_id=REPO_ID)
|
|
@@ -247,7 +249,12 @@ with demo:
|
|
| 247 |
queue=True
|
| 248 |
)
|
| 249 |
with gr.TabItem("Reranking Only", id=12):
|
| 250 |
-
lb_df_reranker =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 252 |
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
| 253 |
with gr.Row():
|
|
@@ -439,10 +446,12 @@ with demo:
|
|
| 439 |
lb_table_retriever_long_doc,
|
| 440 |
queue=True
|
| 441 |
)
|
| 442 |
-
"""
|
| 443 |
with gr.TabItem("Reranking Only", id=22):
|
| 444 |
-
lb_df_reranker_ldoc =
|
| 445 |
-
datastore.leaderboard_df_long_doc[
|
|
|
|
|
|
|
|
|
|
| 446 |
]
|
| 447 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 448 |
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
|
@@ -458,11 +467,24 @@ with demo:
|
|
| 458 |
hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
|
| 459 |
)
|
| 460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
set_listeners(
|
| 462 |
"long-doc",
|
| 463 |
lb_table_reranker_ldoc,
|
| 464 |
hidden_lb_table_reranker_ldoc,
|
| 465 |
search_bar_reranker_ldoc,
|
|
|
|
| 466 |
selected_domains,
|
| 467 |
selected_langs,
|
| 468 |
selected_rerankings_reranker_ldoc,
|
|
@@ -551,7 +573,6 @@ with demo:
|
|
| 551 |
|
| 552 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
| 553 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 554 |
-
"""
|
| 555 |
|
| 556 |
if __name__ == "__main__":
|
| 557 |
scheduler = BackgroundScheduler()
|
|
|
|
| 14 |
from src.envs import (
|
| 15 |
API,
|
| 16 |
EVAL_RESULTS_PATH,
|
| 17 |
+
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK, BENCHMARK_VERSION_LIST
|
| 18 |
)
|
| 19 |
from src.loaders import (
|
| 20 |
load_eval_results
|
|
|
|
| 23 |
update_metric,
|
| 24 |
set_listeners,
|
| 25 |
reset_rank,
|
| 26 |
+
remove_html, upload_file, submit_results
|
| 27 |
)
|
| 28 |
from src.display.gradio_formatting import (
|
| 29 |
get_version_dropdown,
|
|
|
|
| 38 |
get_leaderboard_table
|
| 39 |
)
|
| 40 |
|
| 41 |
+
from src.about import EVALUATION_QUEUE_TEXT, BENCHMARKS_TEXT
|
| 42 |
+
|
| 43 |
|
| 44 |
def restart_space():
|
| 45 |
API.restart_space(repo_id=REPO_ID)
|
|
|
|
| 249 |
queue=True
|
| 250 |
)
|
| 251 |
with gr.TabItem("Reranking Only", id=12):
|
| 252 |
+
lb_df_reranker = \
|
| 253 |
+
datastore.leaderboard_df_qa[
|
| 254 |
+
datastore.leaderboard_df_qa[
|
| 255 |
+
COL_NAME_RETRIEVAL_MODEL
|
| 256 |
+
] == BM25_LINK
|
| 257 |
+
]
|
| 258 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 259 |
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
| 260 |
with gr.Row():
|
|
|
|
| 446 |
lb_table_retriever_long_doc,
|
| 447 |
queue=True
|
| 448 |
)
|
|
|
|
| 449 |
with gr.TabItem("Reranking Only", id=22):
|
| 450 |
+
lb_df_reranker_ldoc = \
|
| 451 |
+
datastore.leaderboard_df_long_doc[
|
| 452 |
+
datastore.leaderboard_df_long_doc[
|
| 453 |
+
COL_NAME_RETRIEVAL_MODEL
|
| 454 |
+
] == BM25_LINK
|
| 455 |
]
|
| 456 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 457 |
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
|
|
|
| 467 |
hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
|
| 468 |
)
|
| 469 |
|
| 470 |
+
selected_version.change(
|
| 471 |
+
update_datastore_long_doc,
|
| 472 |
+
[selected_version,],
|
| 473 |
+
[
|
| 474 |
+
selected_domains,
|
| 475 |
+
selected_langs,
|
| 476 |
+
selected_rerankings_reranker_ldoc,
|
| 477 |
+
lb_table_reranker_ldoc,
|
| 478 |
+
hidden_lb_table_reranker_ldoc
|
| 479 |
+
]
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
set_listeners(
|
| 483 |
"long-doc",
|
| 484 |
lb_table_reranker_ldoc,
|
| 485 |
hidden_lb_table_reranker_ldoc,
|
| 486 |
search_bar_reranker_ldoc,
|
| 487 |
+
selected_version,
|
| 488 |
selected_domains,
|
| 489 |
selected_langs,
|
| 490 |
selected_rerankings_reranker_ldoc,
|
|
|
|
| 573 |
|
| 574 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
| 575 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 576 |
|
| 577 |
if __name__ == "__main__":
|
| 578 |
scheduler = BackgroundScheduler()
|