File size: 4,463 Bytes
5496871
 
 
a8fedb5
5496871
 
 
 
a8fedb5
5496871
a05288f
5496871
 
 
 
 
 
88ad7fb
 
 
 
 
 
5496871
 
88ad7fb
a05288f
5496871
88ad7fb
 
 
 
 
 
 
 
 
 
 
5496871
 
a8fedb5
a05288f
5496871
88ad7fb
 
 
a05288f
88ad7fb
5496871
a05288f
 
 
 
5496871
a05288f
 
5496871
 
a05288f
5496871
 
 
a05288f
5496871
 
 
a05288f
88ad7fb
 
5496871
 
 
88ad7fb
5496871
a05288f
 
5496871
a05288f
 
5496871
 
88ad7fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a05288f
88ad7fb
 
a8fedb5
88ad7fb
a8fedb5
a05288f
88ad7fb
a8fedb5
5496871
a05288f
a8fedb5
 
 
 
88ad7fb
a8fedb5
88ad7fb
a8fedb5
5496871
 
88ad7fb
 
a05288f
88ad7fb
 
a05288f
88ad7fb
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import abc
import datetime
import os.path
from io import BytesIO
from typing import List

import attrs
import pandas as pd
from huggingface_hub import login, upload_file

from config import APP_CONFIG


@attrs.define
class ModelScoringResult:
    uuid: str
    submission_time: datetime.datetime
    design_quality: float
    mean_violations: float
    sim_to_data_mmd: float
    mean_novelty: float
    binary_validity: float
    diversity_dpp: float


ORDERED_SCORES_COLUMNS = [
    "uuid",
    "submission_time",
    "design_quality",
    "mean_violations",
    "sim_to_data_mmd",
    "mean_novelty",
    "binary_validity",
    "diversity_dpp",
]

ORDERED_APPROVAL_COLUMNS = [
    "model_uuid",
    "model_verification_time"
]


class PandasModelScoresRepository(metaclass=abc.ABCMeta):

    def __init__(self, columns):
        self.columns = columns

    def get_data_to_display(self):
        return pd.DataFrame(self.read_curr_state(), columns=self.columns)

    def add_row(self, row: ModelScoringResult):
        previous_state = self.read_curr_state()
        result = pd.concat([previous_state, pd.DataFrame(attrs.asdict(row), index=range(1))])
        self.save_to_disk(result)

    def save_current_state(self, rows: List[ModelScoringResult]):
        self.save_to_disk(pd.DataFrame([attrs.asdict(r) for r in rows]))

    @abc.abstractmethod
    def save_to_disk(self, result: pd.DataFrame):
        pass

    @abc.abstractmethod
    def read_curr_state(self) -> pd.DataFrame:
        pass


class LocalPandasModelScoresRepository(PandasModelScoresRepository):
    def __init__(self, dummy_file_path: str, columns: List[str]):
        super().__init__(columns)
        self.dummy_file_path = dummy_file_path
        if not os.path.exists(self.dummy_file_path):
            with open(self.dummy_file_path, "w") as file:
                file.write(",".join(self.columns))

    def read_curr_state(self) -> pd.DataFrame:
        return pd.read_csv(self.dummy_file_path, index_col=None)

    def save_to_disk(self, result: pd.DataFrame):
        result.to_csv(self.dummy_file_path, index=False)


@attrs.define(frozen=True)
class DatasetParams:
    dataset_url: str
    repo_id: str
    file_path_in_repo: str


model_scores_dataset = DatasetParams(
    dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/scoring_data.txt",
    repo_id="yaz23/bike-bench-models",
    file_path_in_repo="scoring_data.txt"
)
approval_dataset = DatasetParams(
    dataset_url="https://huggingface.co/datasets/yaz23/bike-bench-models/resolve/main/approval_data.txt",
    repo_id="yaz23/bike-bench-models",
    file_path_in_repo="approval_data.txt"
)


class HuggingFaceDatasetModelScoresRepository(PandasModelScoresRepository):
    def __init__(self, dataset_params: DatasetParams, columns: List[str]):
        super().__init__(columns)
        login(APP_CONFIG.hugging_face_token)
        self.dataset_params = dataset_params

    def read_curr_state(self) -> pd.DataFrame:
        return pd.read_csv(self.dataset_params.dataset_url,
                           index_col=None)

    def save_to_disk(self, result: pd.DataFrame):
        csv_string = result.to_csv(index=False)
        csv_buffer = BytesIO(csv_string.encode('utf-8'))
        upload_file(
            path_or_fileobj=csv_buffer,
            repo_id=self.dataset_params.repo_id,
            repo_type="dataset",
            path_in_repo=self.dataset_params.file_path_in_repo
        )


MODELS_REPOSITORY_INSTANCE: PandasModelScoresRepository
APPROVAL_REPOSITORY_INSTANCE: PandasModelScoresRepository
if APP_CONFIG.production:
    REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_SCORES_COLUMNS)
    APPROVAL_REPOSITORY_INSTANCE = HuggingFaceDatasetModelScoresRepository(model_scores_dataset, ORDERED_APPROVAL_COLUMNS)
else:
    REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__),
                                                                        "local-run-data/model_scores.csv"),
                                                           ORDERED_SCORES_COLUMNS)
    APPROVAL_REPOSITORY_INSTANCE = LocalPandasModelScoresRepository(os.path.join(os.path.dirname(__file__),
                                                                                 "local-run-data/model_approval.csv"),
                                                                    ORDERED_APPROVAL_COLUMNS)