Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor: refactor the naming
Browse files- app.py +36 -36
- src/loaders.py +14 -14
- src/models.py +7 -6
app.py
CHANGED
|
@@ -114,9 +114,9 @@ def update_datastore(version):
|
|
| 114 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
| 115 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
| 116 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 117 |
-
leaderboard_table = get_leaderboard_table(datastore.
|
| 118 |
hidden_leaderboard_table = \
|
| 119 |
-
get_leaderboard_table(datastore.
|
| 120 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
| 121 |
|
| 122 |
|
|
@@ -127,9 +127,9 @@ def update_datastore_long_doc(version):
|
|
| 127 |
selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
|
| 128 |
selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
|
| 129 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 130 |
-
leaderboard_table = get_leaderboard_table(datastore.
|
| 131 |
hidden_leaderboard_table = \
|
| 132 |
-
get_leaderboard_table(datastore.
|
| 133 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
| 134 |
|
| 135 |
|
|
@@ -170,9 +170,9 @@ with demo:
|
|
| 170 |
with gr.Column():
|
| 171 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 172 |
# shown_table
|
| 173 |
-
lb_table = get_leaderboard_table(datastore.
|
| 174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 175 |
-
hidden_lb_table = get_leaderboard_table(datastore.
|
| 176 |
|
| 177 |
selected_version.change(
|
| 178 |
update_datastore,
|
|
@@ -218,19 +218,19 @@ with demo:
|
|
| 218 |
with gr.Column(scale=1):
|
| 219 |
selected_noreranker = get_noreranking_dropdown()
|
| 220 |
|
| 221 |
-
lb_df_retriever = datastore.
|
| 222 |
-
datastore.
|
| 223 |
]
|
| 224 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 225 |
-
lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.
|
| 226 |
|
| 227 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 228 |
-
hidden_lb_df_retriever = datastore.
|
| 229 |
-
datastore.
|
| 230 |
-
|
| 231 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 232 |
hidden_lb_table_retriever = get_leaderboard_table(
|
| 233 |
-
hidden_lb_df_retriever, datastore.
|
| 234 |
)
|
| 235 |
|
| 236 |
selected_version.change(
|
|
@@ -276,8 +276,8 @@ with demo:
|
|
| 276 |
queue=True,
|
| 277 |
)
|
| 278 |
with gr.TabItem("Reranking Only", id=12):
|
| 279 |
-
lb_df_reranker = datastore.
|
| 280 |
-
datastore.
|
| 281 |
]
|
| 282 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 283 |
reranking_models_reranker = (
|
|
@@ -288,14 +288,14 @@ with demo:
|
|
| 288 |
selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
|
| 289 |
with gr.Column(scale=1):
|
| 290 |
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
| 291 |
-
lb_table_reranker = get_leaderboard_table(lb_df_reranker, datastore.
|
| 292 |
|
| 293 |
-
hidden_lb_df_reranker = datastore.
|
| 294 |
-
datastore.
|
| 295 |
-
|
| 296 |
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
| 297 |
hidden_lb_table_reranker = get_leaderboard_table(
|
| 298 |
-
hidden_lb_df_reranker, datastore.
|
| 299 |
)
|
| 300 |
|
| 301 |
selected_version.change(
|
|
@@ -365,12 +365,12 @@ with demo:
|
|
| 365 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 366 |
|
| 367 |
lb_table_long_doc = get_leaderboard_table(
|
| 368 |
-
datastore.
|
| 369 |
)
|
| 370 |
|
| 371 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 372 |
hidden_lb_table_long_doc = get_leaderboard_table(
|
| 373 |
-
datastore.
|
| 374 |
)
|
| 375 |
|
| 376 |
selected_version.change(
|
|
@@ -421,20 +421,20 @@ with demo:
|
|
| 421 |
search_bar_retriever = get_search_bar()
|
| 422 |
with gr.Column(scale=1):
|
| 423 |
selected_noreranker = get_noreranking_dropdown()
|
| 424 |
-
lb_df_retriever_long_doc = datastore.
|
| 425 |
-
datastore.
|
| 426 |
]
|
| 427 |
lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
|
| 428 |
lb_table_retriever_long_doc = get_leaderboard_table(
|
| 429 |
-
lb_df_retriever_long_doc, datastore.
|
| 430 |
)
|
| 431 |
|
| 432 |
-
hidden_lb_df_retriever_long_doc = datastore.
|
| 433 |
-
datastore.
|
| 434 |
-
|
| 435 |
hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
|
| 436 |
hidden_lb_table_retriever_long_doc = get_leaderboard_table(
|
| 437 |
-
hidden_lb_df_retriever_long_doc, datastore.
|
| 438 |
)
|
| 439 |
|
| 440 |
selected_version.change(
|
|
@@ -479,8 +479,8 @@ with demo:
|
|
| 479 |
queue=True,
|
| 480 |
)
|
| 481 |
with gr.TabItem("Reranking Only", id=22):
|
| 482 |
-
lb_df_reranker_ldoc = datastore.
|
| 483 |
-
datastore.
|
| 484 |
]
|
| 485 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 486 |
reranking_models_reranker_ldoc = (
|
|
@@ -493,13 +493,13 @@ with demo:
|
|
| 493 |
)
|
| 494 |
with gr.Column(scale=1):
|
| 495 |
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
| 496 |
-
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.
|
| 497 |
-
hidden_lb_df_reranker_ldoc = datastore.
|
| 498 |
-
datastore.
|
| 499 |
-
|
| 500 |
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
| 501 |
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
| 502 |
-
hidden_lb_df_reranker_ldoc, datastore.
|
| 503 |
)
|
| 504 |
|
| 505 |
selected_version.change(
|
|
|
|
| 114 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
| 115 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
| 116 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 117 |
+
leaderboard_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
|
| 118 |
hidden_leaderboard_table = \
|
| 119 |
+
get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
| 120 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
| 121 |
|
| 122 |
|
|
|
|
| 127 |
selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
|
| 128 |
selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
|
| 129 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 130 |
+
leaderboard_table = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
|
| 131 |
hidden_leaderboard_table = \
|
| 132 |
+
get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
|
| 133 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
| 134 |
|
| 135 |
|
|
|
|
| 170 |
with gr.Column():
|
| 171 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 172 |
# shown_table
|
| 173 |
+
lb_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
|
| 174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 175 |
+
hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
| 176 |
|
| 177 |
selected_version.change(
|
| 178 |
update_datastore,
|
|
|
|
| 218 |
with gr.Column(scale=1):
|
| 219 |
selected_noreranker = get_noreranking_dropdown()
|
| 220 |
|
| 221 |
+
lb_df_retriever = datastore.qa_fmt_df[
|
| 222 |
+
datastore.qa_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 223 |
]
|
| 224 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 225 |
+
lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.qa_types)
|
| 226 |
|
| 227 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 228 |
+
hidden_lb_df_retriever = datastore.qa_raw_df[
|
| 229 |
+
datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 230 |
+
]
|
| 231 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 232 |
hidden_lb_table_retriever = get_leaderboard_table(
|
| 233 |
+
hidden_lb_df_retriever, datastore.qa_types, visible=False
|
| 234 |
)
|
| 235 |
|
| 236 |
selected_version.change(
|
|
|
|
| 276 |
queue=True,
|
| 277 |
)
|
| 278 |
with gr.TabItem("Reranking Only", id=12):
|
| 279 |
+
lb_df_reranker = datastore.qa_fmt_df[
|
| 280 |
+
datastore.qa_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
| 281 |
]
|
| 282 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 283 |
reranking_models_reranker = (
|
|
|
|
| 288 |
selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
|
| 289 |
with gr.Column(scale=1):
|
| 290 |
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
| 291 |
+
lb_table_reranker = get_leaderboard_table(lb_df_reranker, datastore.qa_types)
|
| 292 |
|
| 293 |
+
hidden_lb_df_reranker = datastore.qa_raw_df[
|
| 294 |
+
datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
| 295 |
+
]
|
| 296 |
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
| 297 |
hidden_lb_table_reranker = get_leaderboard_table(
|
| 298 |
+
hidden_lb_df_reranker, datastore.qa_types, visible=False
|
| 299 |
)
|
| 300 |
|
| 301 |
selected_version.change(
|
|
|
|
| 365 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 366 |
|
| 367 |
lb_table_long_doc = get_leaderboard_table(
|
| 368 |
+
datastore.doc_fmt_df, datastore.doc_types
|
| 369 |
)
|
| 370 |
|
| 371 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 372 |
hidden_lb_table_long_doc = get_leaderboard_table(
|
| 373 |
+
datastore.doc_raw_df, datastore.doc_types, visible=False
|
| 374 |
)
|
| 375 |
|
| 376 |
selected_version.change(
|
|
|
|
| 421 |
search_bar_retriever = get_search_bar()
|
| 422 |
with gr.Column(scale=1):
|
| 423 |
selected_noreranker = get_noreranking_dropdown()
|
| 424 |
+
lb_df_retriever_long_doc = datastore.doc_fmt_df[
|
| 425 |
+
datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 426 |
]
|
| 427 |
lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
|
| 428 |
lb_table_retriever_long_doc = get_leaderboard_table(
|
| 429 |
+
lb_df_retriever_long_doc, datastore.doc_types
|
| 430 |
)
|
| 431 |
|
| 432 |
+
hidden_lb_df_retriever_long_doc = datastore.doc_raw_df[
|
| 433 |
+
datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 434 |
+
]
|
| 435 |
hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
|
| 436 |
hidden_lb_table_retriever_long_doc = get_leaderboard_table(
|
| 437 |
+
hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
|
| 438 |
)
|
| 439 |
|
| 440 |
selected_version.change(
|
|
|
|
| 479 |
queue=True,
|
| 480 |
)
|
| 481 |
with gr.TabItem("Reranking Only", id=22):
|
| 482 |
+
lb_df_reranker_ldoc = datastore.doc_fmt_df[
|
| 483 |
+
datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
| 484 |
]
|
| 485 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 486 |
reranking_models_reranker_ldoc = (
|
|
|
|
| 493 |
)
|
| 494 |
with gr.Column(scale=1):
|
| 495 |
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
| 496 |
+
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.doc_types)
|
| 497 |
+
hidden_lb_df_reranker_ldoc = datastore.doc_raw_df[
|
| 498 |
+
datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
| 499 |
+
]
|
| 500 |
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
| 501 |
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
| 502 |
+
hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
|
| 503 |
)
|
| 504 |
|
| 505 |
selected_version.change(
|
src/loaders.py
CHANGED
|
@@ -68,25 +68,25 @@ def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
|
|
| 68 |
lb_data_store.raw_data = load_raw_eval_results(file_path)
|
| 69 |
print(f"raw data: {len(lb_data_store.raw_data)}")
|
| 70 |
|
| 71 |
-
lb_data_store.
|
| 72 |
-
print(f"QA data loaded: {lb_data_store.
|
| 73 |
-
lb_data_store.
|
| 74 |
shown_columns_qa, types_qa = get_default_cols("qa", lb_data_store.slug, add_fix_cols=True)
|
| 75 |
-
lb_data_store.
|
| 76 |
-
lb_data_store.
|
| 77 |
-
~lb_data_store.
|
| 78 |
][shown_columns_qa]
|
| 79 |
-
lb_data_store.
|
| 80 |
|
| 81 |
-
lb_data_store.
|
| 82 |
-
print(f"Long-Doc data loaded: {len(lb_data_store.
|
| 83 |
-
lb_data_store.
|
| 84 |
shown_columns_long_doc, types_long_doc = get_default_cols("long-doc", lb_data_store.slug, add_fix_cols=True)
|
| 85 |
-
lb_data_store.
|
| 86 |
-
lb_data_store.
|
| 87 |
-
~lb_data_store.
|
| 88 |
][shown_columns_long_doc]
|
| 89 |
-
lb_data_store.
|
| 90 |
|
| 91 |
lb_data_store.reranking_models = sorted(
|
| 92 |
list(frozenset([eval_result.reranking_model for eval_result in lb_data_store.raw_data]))
|
|
|
|
| 68 |
lb_data_store.raw_data = load_raw_eval_results(file_path)
|
| 69 |
print(f"raw data: {len(lb_data_store.raw_data)}")
|
| 70 |
|
| 71 |
+
lb_data_store.qa_raw_df = get_leaderboard_df(lb_data_store, task="qa", metric=DEFAULT_METRIC_QA)
|
| 72 |
+
print(f"QA data loaded: {lb_data_store.qa_raw_df.shape}")
|
| 73 |
+
lb_data_store.qa_fmt_df = lb_data_store.qa_raw_df.copy()
|
| 74 |
shown_columns_qa, types_qa = get_default_cols("qa", lb_data_store.slug, add_fix_cols=True)
|
| 75 |
+
lb_data_store.qa_types = types_qa
|
| 76 |
+
lb_data_store.qa_fmt_df = lb_data_store.qa_fmt_df[
|
| 77 |
+
~lb_data_store.qa_fmt_df[COL_NAME_IS_ANONYMOUS]
|
| 78 |
][shown_columns_qa]
|
| 79 |
+
lb_data_store.qa_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
| 80 |
|
| 81 |
+
lb_data_store.doc_raw_df = get_leaderboard_df(lb_data_store, task="long-doc", metric=DEFAULT_METRIC_LONG_DOC)
|
| 82 |
+
print(f"Long-Doc data loaded: {len(lb_data_store.doc_raw_df)}")
|
| 83 |
+
lb_data_store.doc_fmt_df = lb_data_store.doc_raw_df.copy()
|
| 84 |
shown_columns_long_doc, types_long_doc = get_default_cols("long-doc", lb_data_store.slug, add_fix_cols=True)
|
| 85 |
+
lb_data_store.doc_types = types_long_doc
|
| 86 |
+
lb_data_store.doc_fmt_df = lb_data_store.doc_fmt_df[
|
| 87 |
+
~lb_data_store.doc_fmt_df[COL_NAME_IS_ANONYMOUS]
|
| 88 |
][shown_columns_long_doc]
|
| 89 |
+
lb_data_store.doc_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
| 90 |
|
| 91 |
lb_data_store.reranking_models = sorted(
|
| 92 |
list(frozenset([eval_result.reranking_model for eval_result in lb_data_store.raw_data]))
|
src/models.py
CHANGED
|
@@ -141,10 +141,11 @@ class LeaderboardDataStore:
|
|
| 141 |
version: str
|
| 142 |
slug: str
|
| 143 |
raw_data: Optional[list]
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
reranking_models: Optional[list]
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
| 141 |
version: str
|
| 142 |
slug: str
|
| 143 |
raw_data: Optional[list]
|
| 144 |
+
qa_raw_df: Optional[pd.DataFrame]
|
| 145 |
+
doc_raw_df: Optional[pd.DataFrame]
|
| 146 |
+
qa_fmt_df: Optional[pd.DataFrame]
|
| 147 |
+
doc_fmt_df: Optional[pd.DataFrame]
|
| 148 |
reranking_models: Optional[list]
|
| 149 |
+
qa_types: Optional[list]
|
| 150 |
+
doc_types: Optional[list]
|
| 151 |
+
# qa_raw_df, docs_raw_df, qa_fmt_df, docs_fmt_df,
|