leaderboard-poc / data_repository.py
yazan-amer
real scores pending
88ad7fb
import abc
import datetime
import os.path
from io import BytesIO
from typing import List
import attrs
import pandas as pd
from huggingface_hub import login, upload_file
from config import APP_CONFIG
@attrs.define
class ModelScoringResult:
uuid: str
submission_time: datetime.datetime
design_quality: float
mean_violations: float
sim_to_data_mmd: float
mean_novelty: float
binary_validity: float
diversity_dpp: float
ORDERED_SCORES_COLUMNS = [
"uuid",
"submission_time",
"design_quality",
"mean_violations",
"sim_to_data_mmd",
"mean_novelty",
"binary_validity",
"diversity_dpp",
]
ORDERED_APPROVAL_COLUMNS = [
"model_uuid",
"model_verification_time"
]
class PandasModelScoresRepository(metaclass=abc.ABCMeta):
def __init__(self, columns):
self.columns = columns
def get_data_to_display(self):
return pd.DataFrame(self.read_curr_state(), columns=self.columns)
def add_row(self, row: ModelScoringResult):
previous_state = self.read_curr_state()
result = pd.concat([previous_state, pd.DataFrame(attrs.asdict(row), index=range(1))])
self.save_to_disk(result)
def save_current_state(self, rows: List[ModelScoringResult]):
self.save_to_disk(pd.DataFrame([attrs.asdict(r) for r in rows]))
@abc.abstractmethod
def save_to_disk(self, result: pd.DataFrame):
pass
@abc.abstractmethod
def read_curr_state(self) -> pd.DataFrame:
pass
class LocalPandasModelScoresRepository(PandasModelScoresRepository):
def __init__(self, dummy_file_path: str, columns: List[str]):
super().__init__(columns)
self.dummy_file_path = dummy_file_path
if not os.path.exists(self.dummy_file_path):
with open(self.dummy_file_path, "w") as file:
file.write(",".join(self.columns))
def read_curr_state(self) -> pd.DataFrame:
return pd.read_csv(self.dummy_file_path, index_col=None)
def save_to_disk(self, result: pd.DataFrame):
result.to_csv(self.dummy_file_path, index=False)
@attrs.define(frozen=True)
class DatasetParams:
dataset_url: str
repo_id: str
file_path_in_repo: str
model_scores_dataset = DatasetParams(
dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/scoring_data.txt",
repo_id="yaz23/bike-bench-models",
file_path_in_repo="scoring_data.txt"
)
approval_dataset = DatasetParams(
dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/approval_data.txt",
repo_id="yaz23/bike-bench-models",
file_path_in_repo="approval_data.txt"
)
class HuggingFaceDatasetModelScoresRepository(PandasModelScoresRepository):
def __init__(self, dataset_params: DatasetParams, columns: List[str]):
super().__init__(columns)
login(APP_CONFIG.hugging_face_token)
self.dataset_params = dataset_params
def read_curr_state(self) -> pd.DataFrame:
return pd.read_csv(self.dataset_params.dataset_url,
index_col=None)
def save_to_disk(self, result: pd.DataFrame):
csv_string = result.to_csv(index=False)
csv_buffer = BytesIO(csv_string.encode('utf-8'))
upload_file(
path_or_fileobj=csv_buffer,
repo_id=self.dataset_params.repo_id,
repo_type="dataset",
path_in_repo=self.dataset_params.file_path_in_repo
)
MODELS_REPOSITORY_INSTANCE: PandasModelScoresRepository
APPROVAL_REPOSITORY_INSTANCE: PandasModelScoresRepository
if APP_CONFIG.production:
REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_SCORES_COLUMNS)
APPROVAL_REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_APPROVAL_COLUMNS)
else:
REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__),
"local-run-data/model_scores.csv"),
ORDERED_SCORES_COLUMNS)
APPROVAL_REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__),
"local-run-data/model_approval.csv"),
ORDERED_APPROVAL_COLUMNS)