yangzhitao commited on
Commit
cd1b5e8
·
1 Parent(s): 22161b0

feat: add functions to truncate numbers to one decimal place and format DataFrame columns accordingly

Browse files
Files changed (3) hide show
  1. app.py +27 -0
  2. scripts/upload_dataset.py +16 -2
  3. src/populate.py +16 -2
app.py CHANGED
@@ -65,6 +65,29 @@ print("///// --- Settings --- /////", settings.model_dump())
65
  ) = get_evaluation_queue_df(settings.EVAL_REQUESTS_PATH, EVAL_COLS)
66
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def filter_dataframe_by_columns(selected_cols: list[str], original_df: pd.DataFrame) -> pd.DataFrame:
69
  """
70
  根据选择的列过滤 DataFrame
@@ -179,6 +202,8 @@ def init_leaderboard_tabs(
179
  precision_filtered_df = filter_dataframe_by_precision(default_precision, original_df)
180
  # 根据默认选择再筛选一次 DataFrame
181
  initial_filtered_df = filter_dataframe_by_columns(default_selected, precision_filtered_df)
 
 
182
 
183
  with gr.Row():
184
  with gr.Column(scale=1):
@@ -231,6 +256,8 @@ def init_leaderboard_tabs(
231
  column_filtered_df = filter_dataframe_by_columns(selected_cols, precision_filtered_df)
232
  # 最后按搜索关键词筛选
233
  final_df = search_models_in_dataframe(search_text, column_filtered_df)
 
 
234
  return final_df
235
 
236
  # 绑定搜索、列选择和 precision 的变化事件,动态更新 DataFrame
 
65
  ) = get_evaluation_queue_df(settings.EVAL_REQUESTS_PATH, EVAL_COLS)
66
 
67
 
68
+ def truncate_to_one_decimal(value):
69
+ """
70
+ 将数字截断到1位小数(不四舍五入)
71
+ """
72
+ if pd.isna(value) or not isinstance(value, (int, float)):
73
+ return value
74
+ return float(int(value * 10)) / 10
75
+
76
+
77
+ def format_dataframe_numbers(df: pd.DataFrame) -> pd.DataFrame:
78
+ """
79
+ 格式化 DataFrame 中的数字列,只保留1位小数并截断
80
+ """
81
+ df = df.copy()
82
+ for col in df.columns:
83
+ if col in ['Model', 'T']: # 跳过非数字列
84
+ continue
85
+ # 检查是否为数值类型
86
+ if pd.api.types.is_numeric_dtype(df[col]):
87
+ df[col] = df[col].apply(truncate_to_one_decimal)
88
+ return df
89
+
90
+
91
  def filter_dataframe_by_columns(selected_cols: list[str], original_df: pd.DataFrame) -> pd.DataFrame:
92
  """
93
  根据选择的列过滤 DataFrame
 
202
  precision_filtered_df = filter_dataframe_by_precision(default_precision, original_df)
203
  # 根据默认选择再筛选一次 DataFrame
204
  initial_filtered_df = filter_dataframe_by_columns(default_selected, precision_filtered_df)
205
+ # 格式化数字列,只保留1位小数并截断
206
+ initial_filtered_df = format_dataframe_numbers(initial_filtered_df)
207
 
208
  with gr.Row():
209
  with gr.Column(scale=1):
 
256
  column_filtered_df = filter_dataframe_by_columns(selected_cols, precision_filtered_df)
257
  # 最后按搜索关键词筛选
258
  final_df = search_models_in_dataframe(search_text, column_filtered_df)
259
+ # 格式化数字列,只保留1位小数并截断
260
+ final_df = format_dataframe_numbers(final_df)
261
  return final_df
262
 
263
  # 绑定搜索、列选择和 precision 的变化事件,动态更新 DataFrame
scripts/upload_dataset.py CHANGED
@@ -1,6 +1,18 @@
1
- #!/usr/bin/env python3
 
 
 
 
 
 
2
  """
3
  Upload the eval-results/leaderboard folder to y-playground/results on Hugging Face Hub.
 
 
 
 
 
 
4
  """
5
 
6
  import os
@@ -14,7 +26,9 @@ load_dotenv()
14
 
15
  # Configuration
16
  LOCAL_FOLDER = Path("eval-results/leaderboard")
17
- REPO_ID = "y-playground/results"
 
 
18
  REPO_TYPE = "dataset" # or "model" or "space"
19
 
20
 
 
1
+ # !/usr/bin/env python3
2
+ # /// script
3
+ # dependencies = [
4
+ # "python-dotenv",
5
+ # "huggingface-hub",
6
+ # ]
7
+ # ///
8
  """
9
  Upload the eval-results/leaderboard folder to y-playground/results on Hugging Face Hub.
10
+
11
+ Usage:
12
+
13
+ ```bash
14
+ uv run scripts/upload_dataset.py
15
+ ```
16
  """
17
 
18
  import os
 
26
 
27
  # Configuration
28
  LOCAL_FOLDER = Path("eval-results/leaderboard")
29
+ HF_OWNER = os.getenv("HF_OWNER", "lmms-lab-si")
30
+ HF_RESULTS_REPO_NAME = os.getenv("HF_RESULTS_REPO_NAME", "EASI-Leaderboard-Results")
31
+ REPO_ID = f"{HF_OWNER}/{HF_RESULTS_REPO_NAME}"
32
  REPO_TYPE = "dataset" # or "model" or "space"
33
 
34
 
src/populate.py CHANGED
@@ -23,6 +23,15 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
23
  from src.leaderboard.read_evals import get_raw_eval_results
24
 
25
 
 
 
 
 
 
 
 
 
 
26
  def get_leaderboard_df(
27
  results_path: str,
28
  requests_path: str,
@@ -49,7 +58,7 @@ def get_leaderboard_df(
49
  exclude entries with missing benchmark results.
50
 
51
  Note:
52
- The function automatically rounds numeric values to 2 decimal places and
53
  filters out any entries that have NaN values in the specified benchmark columns.
54
  """
55
  raw_data = get_raw_eval_results(results_path, requests_path)
@@ -57,7 +66,12 @@ def get_leaderboard_df(
57
 
58
  df = pd.DataFrame.from_records(all_data_json)
59
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
60
- df = df.loc[:, cols].round(decimals=2)
 
 
 
 
 
61
 
62
  # filter out if any of the benchmarks have not been produced
63
  df = df.loc[has_no_nan_values(df, benchmark_cols), :]
 
23
  from src.leaderboard.read_evals import get_raw_eval_results
24
 
25
 
26
+ def truncate_to_one_decimal(value):
27
+ """
28
+ 将数字截断到1位小数(不四舍五入)
29
+ """
30
+ if pd.isna(value) or not isinstance(value, (int, float)):
31
+ return value
32
+ return float(int(value * 10)) / 10
33
+
34
+
35
  def get_leaderboard_df(
36
  results_path: str,
37
  requests_path: str,
 
58
  exclude entries with missing benchmark results.
59
 
60
  Note:
61
+ The function automatically truncates numeric values to 1 decimal place and
62
  filters out any entries that have NaN values in the specified benchmark columns.
63
  """
64
  raw_data = get_raw_eval_results(results_path, requests_path)
 
66
 
67
  df = pd.DataFrame.from_records(all_data_json)
68
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
69
+ df = df.loc[:, cols]
70
+
71
+ # 截断数字列到1位小数(不四舍五入)
72
+ for col in df.columns:
73
+ if col not in ['Model', 'T'] and pd.api.types.is_numeric_dtype(df[col]):
74
+ df[col] = df[col].apply(truncate_to_one_decimal)
75
 
76
  # filter out if any of the benchmarks have not been produced
77
  df = df.loc[has_no_nan_values(df, benchmark_cols), :]