Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: implement the version selector for qa retrieval only
Browse files
app.py
CHANGED
|
@@ -14,19 +14,21 @@ from src.display.css_html_js import custom_css
|
|
| 14 |
from src.envs import (
|
| 15 |
API,
|
| 16 |
EVAL_RESULTS_PATH,
|
| 17 |
-
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION
|
| 18 |
)
|
| 19 |
from src.loaders import (
|
| 20 |
load_eval_results
|
| 21 |
)
|
| 22 |
from src.utils import (
|
| 23 |
update_metric,
|
| 24 |
-
set_listeners
|
|
|
|
| 25 |
)
|
| 26 |
from src.display.gradio_formatting import (
|
| 27 |
get_version_dropdown,
|
| 28 |
get_search_bar,
|
| 29 |
get_reranking_dropdown,
|
|
|
|
| 30 |
get_metric_dropdown,
|
| 31 |
get_domain_dropdown,
|
| 32 |
get_language_dropdown,
|
|
@@ -110,11 +112,9 @@ with demo:
|
|
| 110 |
# select domain
|
| 111 |
with gr.Row():
|
| 112 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
| 113 |
-
# selected_domains = get_domain_dropdown(QABenchmarks["2404"])
|
| 114 |
# select language
|
| 115 |
with gr.Row():
|
| 116 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
| 117 |
-
# selected_langs = get_language_dropdown(QABenchmarks["2404"])
|
| 118 |
with gr.Column():
|
| 119 |
# select the metric
|
| 120 |
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
|
|
@@ -132,21 +132,22 @@ with demo:
|
|
| 132 |
with gr.Column():
|
| 133 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 134 |
# shown_table
|
| 135 |
-
|
| 136 |
datastore.leaderboard_df_qa, datastore.types_qa)
|
| 137 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 138 |
-
|
| 139 |
datastore.raw_df_qa, datastore.types_qa, visible=False)
|
| 140 |
|
| 141 |
selected_version.change(
|
| 142 |
update_datastore,
|
| 143 |
[selected_version,],
|
| 144 |
-
[selected_domains, selected_langs, selected_rerankings,
|
| 145 |
)
|
|
|
|
| 146 |
set_listeners(
|
| 147 |
"qa",
|
| 148 |
-
|
| 149 |
-
|
| 150 |
search_bar,
|
| 151 |
selected_version,
|
| 152 |
selected_domains,
|
|
@@ -168,30 +169,37 @@ with demo:
|
|
| 168 |
show_anonymous,
|
| 169 |
show_revision_and_timestamp,
|
| 170 |
],
|
| 171 |
-
|
| 172 |
queue=True
|
| 173 |
)
|
| 174 |
|
| 175 |
-
"""
|
| 176 |
with gr.TabItem("Retrieval Only", id=11):
|
| 177 |
with gr.Row():
|
| 178 |
with gr.Column(scale=1):
|
| 179 |
search_bar_retriever = get_search_bar()
|
| 180 |
with gr.Column(scale=1):
|
| 181 |
selected_noreranker = get_noreranking_dropdown()
|
| 182 |
-
|
|
|
|
| 183 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 184 |
-
lb_table_retriever = get_leaderboard_table(lb_df_retriever,
|
| 185 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 186 |
-
hidden_lb_df_retriever =
|
| 187 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 188 |
-
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
set_listeners(
|
| 191 |
"qa",
|
| 192 |
lb_table_retriever,
|
| 193 |
hidden_lb_table_retriever,
|
| 194 |
search_bar_retriever,
|
|
|
|
| 195 |
selected_domains,
|
| 196 |
selected_langs,
|
| 197 |
selected_noreranker,
|
|
@@ -210,11 +218,11 @@ with demo:
|
|
| 210 |
search_bar_retriever,
|
| 211 |
show_anonymous,
|
| 212 |
show_revision_and_timestamp,
|
| 213 |
-
selected_version,
|
| 214 |
],
|
| 215 |
lb_table_retriever,
|
| 216 |
queue=True
|
| 217 |
)
|
|
|
|
| 218 |
with gr.TabItem("Reranking Only", id=12):
|
| 219 |
lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 220 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
|
|
|
| 14 |
from src.envs import (
|
| 15 |
API,
|
| 16 |
EVAL_RESULTS_PATH,
|
| 17 |
+
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL
|
| 18 |
)
|
| 19 |
from src.loaders import (
|
| 20 |
load_eval_results
|
| 21 |
)
|
| 22 |
from src.utils import (
|
| 23 |
update_metric,
|
| 24 |
+
set_listeners,
|
| 25 |
+
reset_rank
|
| 26 |
)
|
| 27 |
from src.display.gradio_formatting import (
|
| 28 |
get_version_dropdown,
|
| 29 |
get_search_bar,
|
| 30 |
get_reranking_dropdown,
|
| 31 |
+
get_noreranking_dropdown,
|
| 32 |
get_metric_dropdown,
|
| 33 |
get_domain_dropdown,
|
| 34 |
get_language_dropdown,
|
|
|
|
| 112 |
# select domain
|
| 113 |
with gr.Row():
|
| 114 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
|
|
|
| 115 |
# select language
|
| 116 |
with gr.Row():
|
| 117 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
|
|
|
| 118 |
with gr.Column():
|
| 119 |
# select the metric
|
| 120 |
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
|
|
|
|
| 132 |
with gr.Column():
|
| 133 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
| 134 |
# shown_table
|
| 135 |
+
lb_table = get_leaderboard_table(
|
| 136 |
datastore.leaderboard_df_qa, datastore.types_qa)
|
| 137 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 138 |
+
hidden_lb_table = get_leaderboard_table(
|
| 139 |
datastore.raw_df_qa, datastore.types_qa, visible=False)
|
| 140 |
|
| 141 |
selected_version.change(
|
| 142 |
update_datastore,
|
| 143 |
[selected_version,],
|
| 144 |
+
[selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table]
|
| 145 |
)
|
| 146 |
+
|
| 147 |
set_listeners(
|
| 148 |
"qa",
|
| 149 |
+
lb_table,
|
| 150 |
+
hidden_lb_table,
|
| 151 |
search_bar,
|
| 152 |
selected_version,
|
| 153 |
selected_domains,
|
|
|
|
| 169 |
show_anonymous,
|
| 170 |
show_revision_and_timestamp,
|
| 171 |
],
|
| 172 |
+
lb_table,
|
| 173 |
queue=True
|
| 174 |
)
|
| 175 |
|
|
|
|
| 176 |
with gr.TabItem("Retrieval Only", id=11):
|
| 177 |
with gr.Row():
|
| 178 |
with gr.Column(scale=1):
|
| 179 |
search_bar_retriever = get_search_bar()
|
| 180 |
with gr.Column(scale=1):
|
| 181 |
selected_noreranker = get_noreranking_dropdown()
|
| 182 |
+
|
| 183 |
+
lb_df_retriever = datastore.leaderboard_df_qa[datastore.leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 184 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 185 |
+
lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.types_qa)
|
| 186 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 187 |
+
hidden_lb_df_retriever = datastore.raw_df_qa[datastore.raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 188 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 189 |
+
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, datastore.types_qa, visible=False)
|
| 190 |
+
|
| 191 |
+
selected_version.change(
|
| 192 |
+
update_datastore,
|
| 193 |
+
[selected_version,],
|
| 194 |
+
[selected_domains, selected_langs, selected_rerankings, lb_table_retriever, hidden_lb_table_retriever]
|
| 195 |
+
)
|
| 196 |
|
| 197 |
set_listeners(
|
| 198 |
"qa",
|
| 199 |
lb_table_retriever,
|
| 200 |
hidden_lb_table_retriever,
|
| 201 |
search_bar_retriever,
|
| 202 |
+
selected_version,
|
| 203 |
selected_domains,
|
| 204 |
selected_langs,
|
| 205 |
selected_noreranker,
|
|
|
|
| 218 |
search_bar_retriever,
|
| 219 |
show_anonymous,
|
| 220 |
show_revision_and_timestamp,
|
|
|
|
| 221 |
],
|
| 222 |
lb_table_retriever,
|
| 223 |
queue=True
|
| 224 |
)
|
| 225 |
+
"""
|
| 226 |
with gr.TabItem("Reranking Only", id=12):
|
| 227 |
lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 228 |
lb_df_reranker = reset_rank(lb_df_reranker)
|