asta-bench-leaderboard

Running

Amber Tanaka commited on Aug 21

Commit

941eea2

unverified ·

1 Parent(s): 20c57a4

Add Date to table (#75)

Files changed (2) hide show

leaderboard_transformer.py CHANGED Viewed

@@ -257,7 +257,7 @@ class DataTransformer:
         # --- 3. Add Columns for Agent Openness and Tooling ---
         base_cols = ["id","Agent","Submitter","LLM Base","Source"]
         new_cols = ["Openness", "Agent Tooling"]
-        ending_cols = ["Logs"]
         metrics_to_display = [primary_score_col, f"{primary_metric} Cost"]
         for item in group_metrics:
@@ -290,7 +290,7 @@ class DataTransformer:
             # Apply the function row-wise to create the new column
             attempted_column = df_view.apply(calculate_attempted, axis=1)
             # Insert the new column at a nice position (e.g., after "Date")
-            df_view.insert((cols - 1), "Categories Attempted", attempted_column)
         else:
             total_benchmarks = len(group_metrics)
             def calculate_benchmarks_attempted(row):
@@ -303,7 +303,7 @@ class DataTransformer:
                 else:
                     return f"{count}/{total_benchmarks}"
             # Insert the new column, for example, after "Date"
-            df_view.insert((cols - 1), "Benchmarks Attempted", df_view.apply(calculate_benchmarks_attempted, axis=1))
         # --- 4. Generate the Scatter Plot for the Primary Metric ---
         plots: dict[str, go.Figure] = {}

         # --- 3. Add Columns for Agent Openness and Tooling ---
         base_cols = ["id","Agent","Submitter","LLM Base","Source"]
         new_cols = ["Openness", "Agent Tooling"]
+        ending_cols = ["Date", "Logs"]
         metrics_to_display = [primary_score_col, f"{primary_metric} Cost"]
         for item in group_metrics:
             # Apply the function row-wise to create the new column
             attempted_column = df_view.apply(calculate_attempted, axis=1)
             # Insert the new column at a nice position (e.g., after "Date")
+            df_view.insert((cols - 2), "Categories Attempted", attempted_column)
         else:
             total_benchmarks = len(group_metrics)
             def calculate_benchmarks_attempted(row):
                 else:
                     return f"{count}/{total_benchmarks}"
             # Insert the new column, for example, after "Date"
+            df_view.insert((cols - 2), "Benchmarks Attempted", df_view.apply(calculate_benchmarks_attempted, axis=1))
         # --- 4. Generate the Scatter Plot for the Primary Metric ---
         plots: dict[str, go.Figure] = {}

ui_components.py CHANGED Viewed

@@ -588,7 +588,7 @@ def create_leaderboard_display(
         if "Score" in col or "Cost" in col:
             num_score_cost_cols += 1
     dynamic_widths = [90] * num_score_cost_cols
-    fixed_end_widths = [90, 50]
     # 5. Combine all the lists to create the final, fully dynamic list.
     final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
@@ -725,6 +725,7 @@ def create_benchmark_details_display(
             'Attempted Benchmark',
             benchmark_score_col,
             benchmark_cost_col,
             'Logs'
         ]
         for col in desired_cols_in_order:
@@ -775,7 +776,8 @@ def create_benchmark_details_display(
                 datatype=df_datatypes,
                 interactive=False,
                 wrap=True,
-                column_widths=[40, 40, 200, 150, 175, 85, 100, 100, 40],
                 elem_classes=["wrap-header-df"]
             )
             legend_markdown = create_legend_markdown(benchmark_name)

         if "Score" in col or "Cost" in col:
             num_score_cost_cols += 1
     dynamic_widths = [90] * num_score_cost_cols
+    fixed_end_widths = [90, 100, 50]
     # 5. Combine all the lists to create the final, fully dynamic list.
     final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
             'Attempted Benchmark',
             benchmark_score_col,
             benchmark_cost_col,
+            'Date',
             'Logs'
         ]
         for col in desired_cols_in_order:
                 datatype=df_datatypes,
                 interactive=False,
                 wrap=True,
+                column_widths=[40, 40, 200, 150, 175, 85, 100, 100, 80, 40],
+                show_search="search",
                 elem_classes=["wrap-header-df"]
             )
             legend_markdown = create_legend_markdown(benchmark_name)