Spaces:
Running
Running
Titova Ksenia
commited on
Commit
·
758c9c5
1
Parent(s):
1077ec2
remove average_pb
Browse files- src/display/utils.py +1 -1
- src/leaderboard/read_evals.py +1 -8
- src/populate.py +3 -5
src/display/utils.py
CHANGED
|
@@ -27,7 +27,7 @@ auto_eval_column_dict = []
|
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Correlation ⬆️", "number", True)])
|
| 30 |
-
auto_eval_column_dict.append(["average_pb", ColumnContent, ColumnContent("Positional Bias Impact", "number", True)])
|
| 31 |
for task in Tasks:
|
| 32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 33 |
# Model information
|
|
|
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Correlation ⬆️", "number", True)])
|
| 30 |
+
# auto_eval_column_dict.append(["average_pb", ColumnContent, ColumnContent("Positional Bias Impact", "number", True)])
|
| 31 |
for task in Tasks:
|
| 32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 33 |
# Model information
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -114,11 +114,7 @@ class EvalResult:
|
|
| 114 |
def to_dict(self, mina=0, maxa=1):
|
| 115 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 116 |
average = sum([self.results["apcc"], self.results["mpcc"]]) / 2
|
| 117 |
-
print("self.results mpcc_delta", self.results["mpcc_delta"])
|
| 118 |
|
| 119 |
-
norm_mpcc_delta = (float(self.results["mpcc_delta"]) - mina) / (maxa - mina)
|
| 120 |
-
print("norm_mpcc_delta", norm_mpcc_delta)
|
| 121 |
-
average_pb = sum([norm_mpcc_delta, self.results["mpcc_cons"], self.results["pcon_ab"]]) / 3
|
| 122 |
data_dict = {
|
| 123 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 124 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
@@ -129,7 +125,6 @@ class EvalResult:
|
|
| 129 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 130 |
AutoEvalColumn.revision.name: self.revision,
|
| 131 |
AutoEvalColumn.average.name: average,
|
| 132 |
-
AutoEvalColumn.average_pb.name: average_pb,
|
| 133 |
AutoEvalColumn.license.name: self.license,
|
| 134 |
AutoEvalColumn.likes.name: self.likes,
|
| 135 |
AutoEvalColumn.params.name: self.num_params,
|
|
@@ -201,11 +196,9 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 201 |
eval_results[eval_name] = eval_result
|
| 202 |
|
| 203 |
results = []
|
| 204 |
-
mina = min([a.results["mpcc_delta"] for a in eval_results.values()])
|
| 205 |
-
maxa = max([a.results["mpcc_delta"] for a in eval_results.values()])
|
| 206 |
for v in eval_results.values():
|
| 207 |
try:
|
| 208 |
-
v.to_dict(
|
| 209 |
results.append(v)
|
| 210 |
except KeyError as e: # not all eval values present
|
| 211 |
print("e", e)
|
|
|
|
| 114 |
def to_dict(self, mina=0, maxa=1):
|
| 115 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 116 |
average = sum([self.results["apcc"], self.results["mpcc"]]) / 2
|
|
|
|
| 117 |
|
|
|
|
|
|
|
|
|
|
| 118 |
data_dict = {
|
| 119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 120 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
|
| 125 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 126 |
AutoEvalColumn.revision.name: self.revision,
|
| 127 |
AutoEvalColumn.average.name: average,
|
|
|
|
| 128 |
AutoEvalColumn.license.name: self.license,
|
| 129 |
AutoEvalColumn.likes.name: self.likes,
|
| 130 |
AutoEvalColumn.params.name: self.num_params,
|
|
|
|
| 196 |
eval_results[eval_name] = eval_result
|
| 197 |
|
| 198 |
results = []
|
|
|
|
|
|
|
| 199 |
for v in eval_results.values():
|
| 200 |
try:
|
| 201 |
+
v.to_dict() # we test if the dict version is complete
|
| 202 |
results.append(v)
|
| 203 |
except KeyError as e: # not all eval values present
|
| 204 |
print("e", e)
|
src/populate.py
CHANGED
|
@@ -11,9 +11,7 @@ from src.leaderboard.read_evals import get_raw_eval_results
|
|
| 11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
| 12 |
"""Creates a dataframe from all the individual experiment results"""
|
| 13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
| 14 |
-
|
| 15 |
-
maxa = max([a.results["mpcc_delta"] for a in raw_data.values()])
|
| 16 |
-
all_data_json = [v.to_dict(mina, maxa) for v in raw_data]
|
| 17 |
|
| 18 |
df = pd.DataFrame.from_records(all_data_json)
|
| 19 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
|
@@ -30,10 +28,10 @@ def set_style_for_leaderboard_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 30 |
# Adding CSS to style the specific column header
|
| 31 |
styled_df.set_table_styles({
|
| 32 |
AutoEvalColumn.average.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}],
|
| 33 |
-
AutoEvalColumn.average_pb.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}]
|
| 34 |
}, overwrite=False)
|
| 35 |
|
| 36 |
-
styled_df.format(na_rep="").bar(align=0, subset=[AutoEvalColumn.average.name
|
| 37 |
return styled_df
|
| 38 |
|
| 39 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
|
|
| 11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
| 12 |
"""Creates a dataframe from all the individual experiment results"""
|
| 13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
| 14 |
+
all_data_json = [v.to_dict() for v in raw_data]
|
|
|
|
|
|
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
|
|
|
| 28 |
# Adding CSS to style the specific column header
|
| 29 |
styled_df.set_table_styles({
|
| 30 |
AutoEvalColumn.average.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}],
|
| 31 |
+
# AutoEvalColumn.average_pb.name: [{'selector': 'th.col_heading.level0', 'props': 'color: green;'}]
|
| 32 |
}, overwrite=False)
|
| 33 |
|
| 34 |
+
styled_df.format(na_rep="").bar(align=0, subset=[AutoEvalColumn.average.name], cmap="PiYG")
|
| 35 |
return styled_df
|
| 36 |
|
| 37 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|