Spaces:
Sleeping
Sleeping
File size: 4,463 Bytes
5496871 a8fedb5 5496871 a8fedb5 5496871 a05288f 5496871 88ad7fb 5496871 88ad7fb a05288f 5496871 88ad7fb 5496871 a8fedb5 a05288f 5496871 88ad7fb a05288f 88ad7fb 5496871 a05288f 5496871 a05288f 5496871 a05288f 5496871 a05288f 5496871 a05288f 88ad7fb 5496871 88ad7fb 5496871 a05288f 5496871 a05288f 5496871 88ad7fb a05288f 88ad7fb a8fedb5 88ad7fb a8fedb5 a05288f 88ad7fb a8fedb5 5496871 a05288f a8fedb5 88ad7fb a8fedb5 88ad7fb a8fedb5 5496871 88ad7fb a05288f 88ad7fb a05288f 88ad7fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import abc
import datetime
import os.path
from io import BytesIO
from typing import List
import attrs
import pandas as pd
from huggingface_hub import login, upload_file
from config import APP_CONFIG
@attrs.define
class ModelScoringResult:
uuid: str
submission_time: datetime.datetime
design_quality: float
mean_violations: float
sim_to_data_mmd: float
mean_novelty: float
binary_validity: float
diversity_dpp: float
ORDERED_SCORES_COLUMNS = [
"uuid",
"submission_time",
"design_quality",
"mean_violations",
"sim_to_data_mmd",
"mean_novelty",
"binary_validity",
"diversity_dpp",
]
ORDERED_APPROVAL_COLUMNS = [
"model_uuid",
"model_verification_time"
]
class PandasModelScoresRepository(metaclass=abc.ABCMeta):
def __init__(self, columns):
self.columns = columns
def get_data_to_display(self):
return pd.DataFrame(self.read_curr_state(), columns=self.columns)
def add_row(self, row: ModelScoringResult):
previous_state = self.read_curr_state()
result = pd.concat([previous_state, pd.DataFrame(attrs.asdict(row), index=range(1))])
self.save_to_disk(result)
def save_current_state(self, rows: List[ModelScoringResult]):
self.save_to_disk(pd.DataFrame([attrs.asdict(r) for r in rows]))
@abc.abstractmethod
def save_to_disk(self, result: pd.DataFrame):
pass
@abc.abstractmethod
def read_curr_state(self) -> pd.DataFrame:
pass
class LocalPandasModelScoresRepository(PandasModelScoresRepository):
def __init__(self, dummy_file_path: str, columns: List[str]):
super().__init__(columns)
self.dummy_file_path = dummy_file_path
if not os.path.exists(self.dummy_file_path):
with open(self.dummy_file_path, "w") as file:
file.write(",".join(self.columns))
def read_curr_state(self) -> pd.DataFrame:
return pd.read_csv(self.dummy_file_path, index_col=None)
def save_to_disk(self, result: pd.DataFrame):
result.to_csv(self.dummy_file_path, index=False)
@attrs.define(frozen=True)
class DatasetParams:
dataset_url: str
repo_id: str
file_path_in_repo: str
model_scores_dataset = DatasetParams(
dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/scoring_data.txt",
repo_id="yaz23/bike-bench-models",
file_path_in_repo="scoring_data.txt"
)
approval_dataset = DatasetParams(
dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/approval_data.txt",
repo_id="yaz23/bike-bench-models",
file_path_in_repo="approval_data.txt"
)
class HuggingFaceDatasetModelScoresRepository(PandasModelScoresRepository):
def __init__(self, dataset_params: DatasetParams, columns: List[str]):
super().__init__(columns)
login(APP_CONFIG.hugging_face_token)
self.dataset_params = dataset_params
def read_curr_state(self) -> pd.DataFrame:
return pd.read_csv(self.dataset_params.dataset_url,
index_col=None)
def save_to_disk(self, result: pd.DataFrame):
csv_string = result.to_csv(index=False)
csv_buffer = BytesIO(csv_string.encode('utf-8'))
upload_file(
path_or_fileobj=csv_buffer,
repo_id=self.dataset_params.repo_id,
repo_type="dataset",
path_in_repo=self.dataset_params.file_path_in_repo
)
MODELS_REPOSITORY_INSTANCE: PandasModelScoresRepository
APPROVAL_REPOSITORY_INSTANCE: PandasModelScoresRepository
if APP_CONFIG.production:
REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_SCORES_COLUMNS)
APPROVAL_REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_APPROVAL_COLUMNS)
else:
REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__),
"local-run-data/model_scores.csv"),
ORDERED_SCORES_COLUMNS)
APPROVAL_REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__),
"local-run-data/model_approval.csv"),
ORDERED_APPROVAL_COLUMNS)
|