Amber Tanaka commited on
Commit
5064c71
·
unverified ·
1 Parent(s): c22c48e

Add Repro links to Agent column (#63)

Browse files
Files changed (2) hide show
  1. leaderboard_transformer.py +2 -2
  2. ui_components.py +26 -9
leaderboard_transformer.py CHANGED
@@ -113,6 +113,7 @@ def _pretty_column_name(raw_col: str) -> str:
113
  'Openness': 'Openness',
114
  'Agent tooling': 'Agent Tooling',
115
  'LLM base': 'LLM Base',
 
116
  }
117
 
118
  if raw_col in fixed_mappings:
@@ -186,7 +187,6 @@ def transform_raw_dataframe(raw_df: pd.DataFrame) -> pd.DataFrame:
186
  raise TypeError("Input 'raw_df' must be a pandas DataFrame.")
187
 
188
  df = raw_df.copy()
189
-
190
  # Create the mapping for pretty column names
191
  pretty_cols_map = {col: _pretty_column_name(col) for col in df.columns}
192
 
@@ -255,7 +255,7 @@ class DataTransformer:
255
  df_view = df_sorted.copy()
256
 
257
  # --- 3. Add Columns for Agent Openness and Tooling ---
258
- base_cols = ["id","Agent","Submitter","LLM Base"]
259
  new_cols = ["Openness", "Agent Tooling"]
260
  ending_cols = ["Logs"]
261
 
 
113
  'Openness': 'Openness',
114
  'Agent tooling': 'Agent Tooling',
115
  'LLM base': 'LLM Base',
116
+ 'Source': 'Source',
117
  }
118
 
119
  if raw_col in fixed_mappings:
 
187
  raise TypeError("Input 'raw_df' must be a pandas DataFrame.")
188
 
189
  df = raw_df.copy()
 
190
  # Create the mapping for pretty column names
191
  pretty_cols_map = {col: _pretty_column_name(col) for col in df.columns}
192
 
 
255
  df_view = df_sorted.copy()
256
 
257
  # --- 3. Add Columns for Agent Openness and Tooling ---
258
+ base_cols = ["id","Agent","Submitter","LLM Base","Source"]
259
  new_cols = ["Openness", "Agent Tooling"]
260
  ending_cols = ["Logs"]
261
 
ui_components.py CHANGED
@@ -451,6 +451,12 @@ def create_leaderboard_display(
451
  #Make pretty and format the LLM Base column
452
  df_view['LLM Base'] = df_view['LLM Base'].apply(clean_llm_base_list)
453
  df_view['LLM Base'] = df_view['LLM Base'].apply(format_llm_base_with_html)
 
 
 
 
 
 
454
 
455
  all_cols = df_view.columns.tolist()
456
  # Remove pareto and Icon columns and insert it at the beginning
@@ -458,7 +464,7 @@ def create_leaderboard_display(
458
  all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
459
  df_view = df_view[all_cols]
460
  # Drop internally used columns that are not needed in the display
461
- columns_to_drop = ['id', 'Openness', 'Agent Tooling']
462
  df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
463
 
464
  df_headers = df_view.columns.tolist()
@@ -466,7 +472,7 @@ def create_leaderboard_display(
466
  for col in df_headers:
467
  if col == "Logs" or "Cost" in col or "Score" in col:
468
  df_datatypes.append("markdown")
469
- elif col in ["Icon","LLM Base"]:
470
  df_datatypes.append("html")
471
  else:
472
  df_datatypes.append("str")
@@ -484,8 +490,8 @@ def create_leaderboard_display(
484
  for col in remaining_headers:
485
  if "Score" in col or "Cost" in col:
486
  num_score_cost_cols += 1
487
- dynamic_widths = [80] * num_score_cost_cols
488
- fixed_end_widths = [80, 40]
489
  # 5. Combine all the lists to create the final, fully dynamic list.
490
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
491
 
@@ -553,7 +559,7 @@ def create_benchmark_details_display(
553
  benchmark_cost_col = f"{benchmark_name} Cost"
554
 
555
  # Define the columns needed for the detailed table
556
- table_cols = ['Agent','Openness','Agent Tooling', 'Submitter', 'Date', benchmark_score_col, benchmark_cost_col,'Logs','id', 'LLM Base']
557
 
558
  # Filter to only columns that actually exist in the full dataframe
559
  existing_table_cols = [col for col in table_cols if col in full_df.columns]
@@ -583,6 +589,12 @@ def create_benchmark_details_display(
583
  #Make pretty and format the LLM Base column
584
  benchmark_table_df['LLM Base'] = benchmark_table_df['LLM Base'].apply(clean_llm_base_list)
585
  benchmark_table_df['LLM Base'] = benchmark_table_df['LLM Base'].apply(format_llm_base_with_html)
 
 
 
 
 
 
586
 
587
  # Calculated and add "Benchmark Attempted" column
588
  def check_benchmark_status(row):
@@ -630,7 +642,7 @@ def create_benchmark_details_display(
630
  for col in df_headers:
631
  if "Logs" in col or "Cost" in col or "Score" in col:
632
  df_datatypes.append("markdown")
633
- elif col in ["Icon", "LLM Base"]:
634
  df_datatypes.append("html")
635
  else:
636
  df_datatypes.append("str")
@@ -641,8 +653,6 @@ def create_benchmark_details_display(
641
  }
642
  # 2. Create the final list of headers for display.
643
  benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
644
- # Create the scatter plot using the full data for context, but plotting benchmark metrics
645
- # This shows all agents on the same axis for better comparison.
646
  benchmark_plot = _plot_scatter_plotly(
647
  data=full_df,
648
  x=benchmark_cost_col,
@@ -685,10 +695,17 @@ def get_full_leaderboard_data(split: str) -> tuple[pd.DataFrame, dict]:
685
  if pd.isna(raw_uri) or raw_uri == "": return ""
686
  web_url = hf_uri_to_web_url(str(raw_uri))
687
  return hyperlink(web_url, "🔗") if web_url else ""
688
-
689
  # Apply the function to the "Logs" column
690
  pretty_df["Logs"] = pretty_df["Logs"].apply(format_log_entry_to_html)
691
 
 
 
 
 
 
 
 
 
692
  return pretty_df, pretty_tag_map
693
 
694
  # Fallback for unexpected types
 
451
  #Make pretty and format the LLM Base column
452
  df_view['LLM Base'] = df_view['LLM Base'].apply(clean_llm_base_list)
453
  df_view['LLM Base'] = df_view['LLM Base'].apply(format_llm_base_with_html)
454
+ # append the repro url to the end of the agent name
455
+ if 'Source' in df_view.columns:
456
+ df_view['Agent'] = df_view.apply(
457
+ lambda row: f"{row['Agent']} {row['Source']}" if row['Source'] else row['Agent'],
458
+ axis=1
459
+ )
460
 
461
  all_cols = df_view.columns.tolist()
462
  # Remove pareto and Icon columns and insert it at the beginning
 
464
  all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
465
  df_view = df_view[all_cols]
466
  # Drop internally used columns that are not needed in the display
467
+ columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
468
  df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
469
 
470
  df_headers = df_view.columns.tolist()
 
472
  for col in df_headers:
473
  if col == "Logs" or "Cost" in col or "Score" in col:
474
  df_datatypes.append("markdown")
475
+ elif col in ["Agent","Icon","LLM Base"]:
476
  df_datatypes.append("html")
477
  else:
478
  df_datatypes.append("str")
 
490
  for col in remaining_headers:
491
  if "Score" in col or "Cost" in col:
492
  num_score_cost_cols += 1
493
+ dynamic_widths = [90] * num_score_cost_cols
494
+ fixed_end_widths = [90, 50]
495
  # 5. Combine all the lists to create the final, fully dynamic list.
496
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
497
 
 
559
  benchmark_cost_col = f"{benchmark_name} Cost"
560
 
561
  # Define the columns needed for the detailed table
562
+ table_cols = ['Agent','Source','Openness','Agent Tooling', 'Submitter', 'Date', benchmark_score_col, benchmark_cost_col,'Logs','id', 'LLM Base']
563
 
564
  # Filter to only columns that actually exist in the full dataframe
565
  existing_table_cols = [col for col in table_cols if col in full_df.columns]
 
589
  #Make pretty and format the LLM Base column
590
  benchmark_table_df['LLM Base'] = benchmark_table_df['LLM Base'].apply(clean_llm_base_list)
591
  benchmark_table_df['LLM Base'] = benchmark_table_df['LLM Base'].apply(format_llm_base_with_html)
592
+ # append the repro url to the end of the agent name
593
+ if 'Source' in benchmark_table_df.columns:
594
+ benchmark_table_df['Agent'] = benchmark_table_df.apply(
595
+ lambda row: f"{row['Agent']} {row['Source']}" if row['Source'] else row['Agent'],
596
+ axis=1
597
+ )
598
 
599
  # Calculated and add "Benchmark Attempted" column
600
  def check_benchmark_status(row):
 
642
  for col in df_headers:
643
  if "Logs" in col or "Cost" in col or "Score" in col:
644
  df_datatypes.append("markdown")
645
+ elif col in ["Agent","Icon", "LLM Base"]:
646
  df_datatypes.append("html")
647
  else:
648
  df_datatypes.append("str")
 
653
  }
654
  # 2. Create the final list of headers for display.
655
  benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
 
 
656
  benchmark_plot = _plot_scatter_plotly(
657
  data=full_df,
658
  x=benchmark_cost_col,
 
695
  if pd.isna(raw_uri) or raw_uri == "": return ""
696
  web_url = hf_uri_to_web_url(str(raw_uri))
697
  return hyperlink(web_url, "🔗") if web_url else ""
 
698
  # Apply the function to the "Logs" column
699
  pretty_df["Logs"] = pretty_df["Logs"].apply(format_log_entry_to_html)
700
 
701
+ if "Source" in pretty_df.columns:
702
+ def format_source_url_to_html(raw_url):
703
+ # Handle empty or NaN values, returning a blank string.
704
+ if pd.isna(raw_url) or raw_url == "": return ""
705
+ # Assume 'source_url' is already a valid web URL and doesn't need conversion.
706
+ return hyperlink(str(raw_url), "🔗")
707
+ # Apply the function to the "source_url" column.
708
+ pretty_df["Source"] = pretty_df["Source"].apply(format_source_url_to_html)
709
  return pretty_df, pretty_tag_map
710
 
711
  # Fallback for unexpected types