Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import json | |
| class ResultsProcessor: | |
| def __init__(self, prompt_option, result_file, data_dict): | |
| self.prompt_option = prompt_option | |
| self.result_file = result_file | |
| self.data_dict = data_dict | |
| def get_overall_performance(self): | |
| return round(self.data_dict["Overall performance"]*100, 2) | |
| def get_bias_ratios_df(self): | |
| fairness_results = self.data_dict['Fairness results'] | |
| characteristic_list = [] | |
| fairness_ratio_list = [] | |
| for key, val in fairness_results.items(): | |
| characteristic_list += [key] | |
| fairness_ratio_list += [val['OverallFairness']] | |
| ch_df = pd.DataFrame({ | |
| 'Characteristic': characteristic_list, | |
| 'Bias ratio': fairness_ratio_list | |
| }).sort_values(by=['Characteristic']) | |
| return ch_df | |
| def get_global_perturbers_df(self): | |
| global_perturber_families = self.data_dict['Perturber Families'] | |
| perf_pert_values = [] | |
| normalized_perf_pert_values = [] | |
| family_levels = [] | |
| family_names_list = [] | |
| levels_index_list = [] | |
| for item in global_perturber_families: | |
| family_name = item['family name'] | |
| family_results = self.data_dict['Performance Robustness']['Perturber family wise results'][family_name]["PerformancePerturbers"]# TODO: change the structuer of post processing here | |
| family_levels += item['levels'] | |
| original_perf = family_results[item['levels'][0]] | |
| count = 0 | |
| for t_item in item['levels']: | |
| perf_pert_values += [family_results[t_item]] | |
| normalized_perf_pert_values += [family_results[t_item]/original_perf] | |
| family_names_list += [family_name] | |
| levels_index_list += [count] | |
| count += 1 | |
| t_pert_df_global = pd.DataFrame({ | |
| 'Perturbation level': family_levels, | |
| 'Performance': perf_pert_values, | |
| 'normalized performance': normalized_perf_pert_values, | |
| 'Perturbation family': family_names_list, | |
| 'Levels' : levels_index_list | |
| }) | |
| t_pert_df_global['category'] = 'Overall' | |
| return t_pert_df_global | |
| def get_data_distribution(self, embedder_option): | |
| embedder_perf_ci_table = self.data_dict['Performance results'][embedder_option]['CI_Table'] | |
| n_points = self.data_dict['n points'] | |
| category_share_of_data = {} | |
| categories_list = [] | |
| share_of_data_list = [] | |
| n_points_list = [] | |
| for key, val in embedder_perf_ci_table.items(): | |
| categories_list += [val['category']] | |
| share_of_data_list += [val['Share of Data']] | |
| n_points_list += [int(val['Share of Data']*n_points/100)] | |
| t_df = pd.DataFrame({ | |
| 'Category': categories_list, | |
| 'Share of data': share_of_data_list, | |
| 'Number of points': n_points_list | |
| }) | |
| return t_df | |
| def get_fairness_confidence_interval_df(self, embedder_option): | |
| embedder_fair_ci_table = self.data_dict['Fairness results'][embedder_option]['CI_Table'] | |
| categories_list = [] | |
| estimates_list = [] | |
| uppers_list = [] | |
| lowers_list = [] | |
| for key, val in embedder_fair_ci_table.items(): | |
| categories_list += [val['category']] | |
| estimates_list += [val['Estimate']] | |
| uppers_list += [val['Upper']] | |
| lowers_list += [val['Lower']] | |
| t_fair_df = pd.DataFrame({ | |
| 'Category': categories_list, | |
| 'Estimate': estimates_list, | |
| 'Upper': uppers_list, | |
| 'Lower': lowers_list, | |
| 'Index': list(range(len(uppers_list))) | |
| }) | |
| t_fair_df['Index'] = t_fair_df['Index'].astype(float) | |
| t_fair_df['Diff upper'] = t_fair_df['Upper'] - t_fair_df['Estimate'] | |
| t_fair_df['Diff lower'] = t_fair_df['Estimate'] - t_fair_df['Lower'] | |
| return t_fair_df | |
| def get_performance_robustness(self, embedder_option): | |
| t_pert_df_global = self.get_global_perturbers_df() | |
| global_perturber_families = self.data_dict['Perturber Families'] | |
| t_result = self.data_dict['Performance Robustness']['Embedder wise results'][embedder_option] | |
| merged_dfs_list = [] | |
| t_pert_df_global_temps_list = [] | |
| family_names_list = [] | |
| # Embedder categories | |
| for item in global_perturber_families: | |
| family_name = item['family name'] | |
| dfs_list = [] | |
| count = 0 | |
| for t_item in item['levels']: | |
| df = pd.DataFrame(t_result[t_item]) | |
| df['Perturber'] = t_item | |
| df['Perturber family'] = family_name | |
| df['Levels'] = count | |
| dfs_list += [df] | |
| count += 1 | |
| merged_df = pd.concat(dfs_list, axis=0) | |
| merged_dfs_list += [merged_df] | |
| family_names_list += [family_name] | |
| t_pert_df_global_temp = t_pert_df_global[t_pert_df_global['Perturbation family'] == family_name].copy(deep=True) | |
| t_pert_df_global_temps_list +=[t_pert_df_global_temp] | |
| return { | |
| 'merged_dfs_list' : merged_dfs_list, | |
| 't_pert_df_global_temps_list' : t_pert_df_global_temps_list, | |
| 'family_names_list' : family_names_list | |
| } | |