ljvmiranda921 commited on
Commit
15fe18d
Β·
1 Parent(s): 7ffe204

Add option to download file

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +72 -2
.gitignore CHANGED
@@ -11,3 +11,4 @@ eval-results/
11
  eval-queue-bk/
12
  eval-results-bk/
13
  logs/
 
 
11
  eval-queue-bk/
12
  eval-results-bk/
13
  logs/
14
+ filbench_results.csv
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
 
3
  import gradio as gr
4
  import pandas as pd
@@ -27,7 +28,7 @@ def restart_space():
27
 
28
 
29
  # 2. Load and populate leaderboard data
30
- def init_leaderboard(source: str, aggregate: bool = False) -> Leaderboard:
31
  results = load_dataset(source, split="train").to_pandas().to_dict(orient="records")
32
  raw_data = [EvalResult.init_from_dict(result) for result in results]
33
  all_data_json = [v.to_dict() for v in raw_data]
@@ -35,7 +36,6 @@ def init_leaderboard(source: str, aggregate: bool = False) -> Leaderboard:
35
  df = pd.DataFrame.from_records(all_data_json)
36
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
37
  df["Incomplete"] = ~df.isna().any(axis=1)
38
-
39
  master_columns = []
40
  for col in fields(AutoEvalColumn):
41
  if col.meta:
@@ -54,6 +54,11 @@ def init_leaderboard(source: str, aggregate: bool = False) -> Leaderboard:
54
  ]
55
  cols.append("Incomplete")
56
  df = df[cols].round(decimals=2)
 
 
 
 
 
57
 
58
  return Leaderboard(
59
  value=df,
@@ -90,6 +95,68 @@ def init_leaderboard(source: str, aggregate: bool = False) -> Leaderboard:
90
  )
91
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # 3. Actual setup of the HF Space
94
  demo = gr.Blocks(css=custom_css)
95
  with demo:
@@ -111,6 +178,9 @@ with demo:
111
  gr.Markdown(about.LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
112
 
113
  with gr.Row():
 
 
 
114
  with gr.Accordion("πŸ“™ Citation", open=False):
115
  citation_button = gr.Textbox(
116
  value=about.CITATION_BUTTON_TEXT,
 
1
  import os
2
+ import re
3
 
4
  import gradio as gr
5
  import pandas as pd
 
28
 
29
 
30
  # 2. Load and populate leaderboard data
31
+ def get_results(source: str, aggregate: bool = False) -> pd.DataFrame:
32
  results = load_dataset(source, split="train").to_pandas().to_dict(orient="records")
33
  raw_data = [EvalResult.init_from_dict(result) for result in results]
34
  all_data_json = [v.to_dict() for v in raw_data]
 
36
  df = pd.DataFrame.from_records(all_data_json)
37
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
38
  df["Incomplete"] = ~df.isna().any(axis=1)
 
39
  master_columns = []
40
  for col in fields(AutoEvalColumn):
41
  if col.meta:
 
54
  ]
55
  cols.append("Incomplete")
56
  df = df[cols].round(decimals=2)
57
+ return df, master_columns
58
+
59
+
60
+ def init_leaderboard(source: str, aggregate: bool = False) -> Leaderboard:
61
+ df, master_columns = get_results(source=source, aggregate=aggregate)
62
 
63
  return Leaderboard(
64
  value=df,
 
95
  )
96
 
97
 
98
+ def download_results():
99
+ df, _ = get_results(source=REPO_RESULTS, aggregate=False)
100
+ df_agg, _ = get_results(source=REPO_RESULTS, aggregate=True)
101
+
102
+ # Cleanup
103
+ def extract_names(html_string):
104
+ match = re.search(r"<a[^>]*>(.*?)</a>", html_string)
105
+ if match:
106
+ extracted_text = match.group(1) # "some value"
107
+ return extracted_text
108
+
109
+ def remove_emojis(string):
110
+ emoji_pattern = re.compile(
111
+ "["
112
+ "\U0001f600-\U0001f64f" # emoticons
113
+ "\U0001f300-\U0001f5ff" # symbols & pictographs
114
+ "\U0001f680-\U0001f6ff" # transport & map symbols
115
+ "\U0001f700-\U0001f77f" # alchemical symbols
116
+ "\U0001f780-\U0001f7ff" # Geometric Shapes Extended
117
+ "\U0001f800-\U0001f8ff" # Supplemental Arrows-C
118
+ "\U0001f900-\U0001f9ff" # Supplemental Symbols and Pictographs
119
+ "\U0001fa00-\U0001fa6f" # Chess Symbols
120
+ "\U0001fa70-\U0001faff" # Symbols and Pictographs Extended-A
121
+ "\U00002702-\U000027b0" # Dingbats
122
+ "\U000024c2-\U0001f251"
123
+ "]+",
124
+ flags=re.UNICODE,
125
+ )
126
+ return emoji_pattern.sub(r"", string)
127
+
128
+ df["Model"] = df["Model"].apply(extract_names)
129
+ df = df.rename(columns={col: remove_emojis(col).strip() for col in df.columns})
130
+ df["Multilingual"] = df["Multilingual"].apply(remove_emojis)
131
+ df["Model Type"] = df["Model Type"].apply(remove_emojis)
132
+ df = df.reset_index(drop=True)
133
+
134
+ # Cleanup the aggregated dataset
135
+ df_agg["Model"] = df_agg["Model"].apply(extract_names)
136
+ df_agg = df_agg.rename(
137
+ columns={col: remove_emojis(col).strip() for col in df_agg.columns}
138
+ )
139
+ df_agg = df_agg.reset_index(drop=True)
140
+ df_agg = df_agg[
141
+ [
142
+ "Model",
143
+ "Cultural Knowledge",
144
+ "Classical NLP",
145
+ "Reading Comprehension",
146
+ "Generation",
147
+ ]
148
+ ]
149
+ df_agg = df_agg.rename(
150
+ columns={col: f"agg_{col}" for col in df_agg.columns if col != "Model"}
151
+ )
152
+
153
+ # Combine the full and aggregated results
154
+ df_merge = df.merge(df_agg, on="Model")
155
+ filepath = "filbench_results.csv"
156
+ df_merge.to_csv(filepath, index=False)
157
+ return filepath
158
+
159
+
160
  # 3. Actual setup of the HF Space
161
  demo = gr.Blocks(css=custom_css)
162
  with demo:
 
178
  gr.Markdown(about.LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
179
 
180
  with gr.Row():
181
+ download_button = gr.DownloadButton("Download results (CSV)")
182
+ download_button.click(download_results, outputs=download_button)
183
+
184
  with gr.Accordion("πŸ“™ Citation", open=False):
185
  citation_button = gr.Textbox(
186
  value=about.CITATION_BUTTON_TEXT,