import abc import datetime import os.path from io import BytesIO from typing import List import attrs import pandas as pd from huggingface_hub import login, upload_file import shutil from config import APP_CONFIG from domain_constants import SCORE_NAMES_MAP, METADATA_NAMES_MAP @attrs.define class ModelScoringResult: uuid: str model_name: str github_link: str submission_time: datetime.datetime paper_link: str eval_count: int conditional: bool masked: bool gradient_free: bool design_quality: float mean_violations: float sim_to_data_mmd: float mean_novelty: float binary_validity: float diversity_dpp: float @attrs.define class ModelAdminRating: uuid: str admin_verified: bool update_time: datetime.datetime ORDERED_SCORES_COLUMNS = [ "uuid", "model_name", "submission_time", "design_quality", "mean_violations", "sim_to_data_mmd", "mean_novelty", "binary_validity", "diversity_dpp", "eval_count", "conditional", "masked", "gradient_free", "github_link", "paper_link", ] ORDERED_RATING_COLUMNS = [ "uuid", "admin_verified", "update_time", ] ORDERED_APPROVAL_COLUMNS = [ "model_uuid", "model_verification_time" ] def save_uploaded_tensor(uuid: str, src_path: str): """ Save an uploaded tensor file under a standardized UUID-based name. - Local mode (APP_CONFIG.production == False): Copies the uploaded file into: local-run-data/tensors/{uuid}{ext} - Production / Hugging Face dataset mode: Uploads the file into the same dataset repo used for scores, at: tensors/{uuid}{ext} """ # Preserve original extension if possible (.pt / .pth), default .pt base_ext = os.path.splitext(src_path)[1] or ".pt" filename = f"{uuid}{base_ext}" if APP_CONFIG.production: # Upload directly to the Hugging Face dataset repo upload_file( path_or_fileobj=src_path, repo_id=model_scores_dataset.scores_repo_id, repo_type="dataset", path_in_repo=f"tensors/{filename}", ) else: # Save alongside local CSVs tensors_dir = os.path.join(os.path.dirname(__file__), "local-run-data", "tensors") os.makedirs(tensors_dir, exist_ok=True) dst_path = os.path.join(tensors_dir, filename) shutil.copyfile(src_path, dst_path) class PandasModelScoresRepository(metaclass=abc.ABCMeta): def __init__(self, scores_columns: List[str], ratings_columns: List[str]): self.scores_columns = scores_columns self.ratings_columns = ratings_columns def get_data_to_display(self): scores = self.read_curr_scores() # Ensure all expected score columns exist, fill missing with NA for col in self.scores_columns: if col not in scores.columns: scores[col] = pd.NA scores = scores[self.scores_columns] ratings = self.read_curr_rating() for col in self.ratings_columns: if col not in ratings.columns: ratings[col] = pd.NA ratings = ratings[self.ratings_columns] # Merge scores + ratings on uuid full_result = scores.merge(ratings, on="uuid", how="left") # --- Format datetime columns --- if "submission_time" in full_result.columns: full_result["submission_time"] = pd.to_datetime( full_result["submission_time"], errors="coerce" ).dt.strftime("%Y-%m-%d") # date only if "update_time" in full_result.columns: full_result["update_time"] = pd.to_datetime( full_result["update_time"], errors="coerce" ).dt.strftime("%Y-%m-%d") # also date-only for simplicity # --- Ensure boolean admin_verified column exists and is bool --- if "admin_verified" in full_result.columns: full_result["admin_verified"] = ( full_result["admin_verified"] .astype("boolean") # nullable bool .fillna(False) .astype(bool) ) else: full_result["admin_verified"] = False # --- Rename score columns to display names --- full_result = full_result.rename(columns=SCORE_NAMES_MAP) # --- Rename metadata columns to display names --- full_result = full_result.rename(columns=METADATA_NAMES_MAP) return full_result def add_model_score(self, row: ModelScoringResult): previous_state = self.read_curr_scores() result = pd.concat([previous_state, pd.DataFrame(attrs.asdict(row), index=range(1))]) self.save_scores_to_disk(result) def add_state_model(self, row: ModelAdminRating): previous_state = self.read_curr_rating() result = pd.concat([previous_state[previous_state["uuid"] != row.uuid], pd.DataFrame(attrs.asdict(row), index=range(1))]) self.save_ratings_to_disk(result) def save_current_scores_state(self, rows: List[ModelScoringResult]): self.save_scores_to_disk(pd.DataFrame([attrs.asdict(r) for r in rows])) @abc.abstractmethod def read_curr_rating(self): pass @abc.abstractmethod def save_scores_to_disk(self, result: pd.DataFrame): pass @abc.abstractmethod def read_curr_scores(self) -> pd.DataFrame: pass @abc.abstractmethod def save_ratings_to_disk(self, result): pass class LocalPandasModelScoresRepository(PandasModelScoresRepository): def __init__(self, file_dir: str, score_columns: List[str], rating_columns: List[str]): super().__init__(score_columns, rating_columns) os.makedirs(file_dir, exist_ok=True) self.scores_path = os.path.join(file_dir, "model_scores.csv") self.ratings_path = os.path.join(file_dir, "model_ratings.csv") if not os.path.exists(self.scores_path): with open(self.scores_path, "w") as file: file.write(",".join(self.scores_columns)) if not os.path.exists(self.ratings_path): with open(self.ratings_path, "w") as file: file.write(",".join(self.ratings_columns)) def read_curr_rating(self): return pd.read_csv(self.ratings_path, index_col=None) def save_ratings_to_disk(self, result: pd.DataFrame): result.to_csv(self.ratings_path, index=False) def read_curr_scores(self) -> pd.DataFrame: return pd.read_csv(self.scores_path, index_col=None) def save_scores_to_disk(self, result: pd.DataFrame): result.to_csv(self.scores_path, index=False) @attrs.define(frozen=True) class DatasetParams: scores_dataset_url: str scores_repo_id: str scores_file_path_in_repo: str rating_dataset_url: str rating_repo_id: str rating_file_path_in_repo: str model_scores_dataset = DatasetParams( scores_dataset_url="https://huggingface.co/datasets/Lyleregenwetter/BikeBench_Leaderboard_Data/resolve/main/model_scores.csv", scores_repo_id="Lyleregenwetter/BikeBench_Leaderboard_Data", scores_file_path_in_repo="model_scores.csv", rating_dataset_url="https://huggingface.co/datasets/Lyleregenwetter/BikeBench_Leaderboard_Data/resolve/main/model_ratings.csv", rating_repo_id="Lyleregenwetter/BikeBench_Leaderboard_Data", rating_file_path_in_repo="model_ratings.csv", ) class HuggingFaceDatasetModelScoresRepository(PandasModelScoresRepository): def __init__(self, dataset_params: DatasetParams, scores_columns: List[str], rating_columns: List[str]): super().__init__(scores_columns, rating_columns) login(APP_CONFIG.hugging_face_token) self.dataset_params = dataset_params def read_curr_scores(self) -> pd.DataFrame: return pd.read_csv(self.dataset_params.scores_dataset_url, index_col=None) def read_curr_rating(self): return pd.read_csv(self.dataset_params.rating_dataset_url, index_col=None) def save_ratings_to_disk(self, result): csv_string = result.to_csv(index=False) csv_buffer = BytesIO(csv_string.encode('utf-8')) upload_file( path_or_fileobj=csv_buffer, repo_id=self.dataset_params.rating_repo_id, repo_type="dataset", path_in_repo=self.dataset_params.rating_file_path_in_repo ) def save_scores_to_disk(self, result: pd.DataFrame): csv_string = result.to_csv(index=False) csv_buffer = BytesIO(csv_string.encode('utf-8')) upload_file( path_or_fileobj=csv_buffer, repo_id=self.dataset_params.scores_repo_id, repo_type="dataset", path_in_repo=self.dataset_params.scores_file_path_in_repo ) MODELS_REPOSITORY_INSTANCE: PandasModelScoresRepository if APP_CONFIG.production: REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_SCORES_COLUMNS, ORDERED_RATING_COLUMNS) else: REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__), "local-run-data"), ORDERED_SCORES_COLUMNS, ORDERED_RATING_COLUMNS)