import abc import datetime import os.path from io import BytesIO from typing import List import attrs import pandas as pd from huggingface_hub import login, upload_file from config import APP_CONFIG @attrs.define class ModelScoringResult: uuid: str submission_time: datetime.datetime design_quality: float mean_violations: float sim_to_data_mmd: float mean_novelty: float binary_validity: float diversity_dpp: float ORDERED_SCORES_COLUMNS = [ "uuid", "submission_time", "design_quality", "mean_violations", "sim_to_data_mmd", "mean_novelty", "binary_validity", "diversity_dpp", ] ORDERED_APPROVAL_COLUMNS = [ "model_uuid", "model_verification_time" ] class PandasModelScoresRepository(metaclass=abc.ABCMeta): def __init__(self, columns): self.columns = columns def get_data_to_display(self): return pd.DataFrame(self.read_curr_state(), columns=self.columns) def add_row(self, row: ModelScoringResult): previous_state = self.read_curr_state() result = pd.concat([previous_state, pd.DataFrame(attrs.asdict(row), index=range(1))]) self.save_to_disk(result) def save_current_state(self, rows: List[ModelScoringResult]): self.save_to_disk(pd.DataFrame([attrs.asdict(r) for r in rows])) @abc.abstractmethod def save_to_disk(self, result: pd.DataFrame): pass @abc.abstractmethod def read_curr_state(self) -> pd.DataFrame: pass class LocalPandasModelScoresRepository(PandasModelScoresRepository): def __init__(self, dummy_file_path: str, columns: List[str]): super().__init__(columns) self.dummy_file_path = dummy_file_path if not os.path.exists(self.dummy_file_path): with open(self.dummy_file_path, "w") as file: file.write(",".join(self.columns)) def read_curr_state(self) -> pd.DataFrame: return pd.read_csv(self.dummy_file_path, index_col=None) def save_to_disk(self, result: pd.DataFrame): result.to_csv(self.dummy_file_path, index=False) @attrs.define(frozen=True) class DatasetParams: dataset_url: str repo_id: str file_path_in_repo: str model_scores_dataset = DatasetParams( dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/scoring_data.txt", repo_id="yaz23/bike-bench-models", file_path_in_repo="scoring_data.txt" ) approval_dataset = DatasetParams( dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/approval_data.txt", repo_id="yaz23/bike-bench-models", file_path_in_repo="approval_data.txt" ) class HuggingFaceDatasetModelScoresRepository(PandasModelScoresRepository): def __init__(self, dataset_params: DatasetParams, columns: List[str]): super().__init__(columns) login(APP_CONFIG.hugging_face_token) self.dataset_params = dataset_params def read_curr_state(self) -> pd.DataFrame: return pd.read_csv(self.dataset_params.dataset_url, index_col=None) def save_to_disk(self, result: pd.DataFrame): csv_string = result.to_csv(index=False) csv_buffer = BytesIO(csv_string.encode('utf-8')) upload_file( path_or_fileobj=csv_buffer, repo_id=self.dataset_params.repo_id, repo_type="dataset", path_in_repo=self.dataset_params.file_path_in_repo ) MODELS_REPOSITORY_INSTANCE: PandasModelScoresRepository APPROVAL_REPOSITORY_INSTANCE: PandasModelScoresRepository if APP_CONFIG.production: REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_SCORES_COLUMNS) APPROVAL_REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_APPROVAL_COLUMNS) else: REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__), "local-run-data/model_scores.csv"), ORDERED_SCORES_COLUMNS) APPROVAL_REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__), "local-run-data/model_approval.csv"), ORDERED_APPROVAL_COLUMNS)