update preprocessor config
Browse files- preprocessor_config.json +3 -0
- processing_time_rcd.py +37 -7
preprocessor_config.json
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"processor_type": "TimeRCDProcessor",
|
|
|
|
|
|
|
|
|
|
| 3 |
"win_size": 5000,
|
| 4 |
"stride": 5000,
|
| 5 |
"normalize": true,
|
|
|
|
| 1 |
{
|
| 2 |
"processor_type": "TimeRCDProcessor",
|
| 3 |
+
"auto_map": {
|
| 4 |
+
"AutoProcessor": "processing_time_rcd.TimeRCDProcessor"
|
| 5 |
+
},
|
| 6 |
"win_size": 5000,
|
| 7 |
"stride": 5000,
|
| 8 |
"normalize": true,
|
processing_time_rcd.py
CHANGED
|
@@ -17,9 +17,10 @@ Usage:
|
|
| 17 |
import numpy as np
|
| 18 |
import torch
|
| 19 |
from typing import Optional, Dict, Any
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
-
class TimeRCDProcessor:
|
| 23 |
"""
|
| 24 |
Processor for preparing time series data for Time_RCD model.
|
| 25 |
|
|
@@ -47,11 +48,21 @@ class TimeRCDProcessor:
|
|
| 47 |
stride: Optional[int] = None,
|
| 48 |
normalize: bool = True,
|
| 49 |
pad_to_multiple: bool = True,
|
|
|
|
| 50 |
):
|
|
|
|
| 51 |
self.win_size = win_size
|
| 52 |
self.stride = stride if stride is not None else win_size
|
| 53 |
self.normalize = normalize
|
| 54 |
self.pad_to_multiple = pad_to_multiple
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def __call__(
|
| 57 |
self,
|
|
@@ -166,6 +177,9 @@ class TimeRCDProcessor:
|
|
| 166 |
|
| 167 |
config = {
|
| 168 |
"processor_type": "TimeRCDProcessor",
|
|
|
|
|
|
|
|
|
|
| 169 |
"win_size": self.win_size,
|
| 170 |
"stride": self.stride,
|
| 171 |
"normalize": self.normalize,
|
|
@@ -176,18 +190,34 @@ class TimeRCDProcessor:
|
|
| 176 |
json.dump(config, f, indent=2)
|
| 177 |
|
| 178 |
@classmethod
|
| 179 |
-
def from_pretrained(cls, pretrained_model_name_or_path: str):
|
| 180 |
"""Load processor from pretrained configuration."""
|
| 181 |
import json
|
| 182 |
import os
|
|
|
|
| 183 |
|
|
|
|
| 184 |
config_file = os.path.join(pretrained_model_name_or_path, "preprocessor_config.json")
|
| 185 |
|
| 186 |
-
if
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
# Remove processor_type from config
|
| 193 |
config.pop("processor_type", None)
|
|
|
|
| 17 |
import numpy as np
|
| 18 |
import torch
|
| 19 |
from typing import Optional, Dict, Any
|
| 20 |
+
from transformers import ProcessorMixin
|
| 21 |
|
| 22 |
|
| 23 |
+
class TimeRCDProcessor(ProcessorMixin):
|
| 24 |
"""
|
| 25 |
Processor for preparing time series data for Time_RCD model.
|
| 26 |
|
|
|
|
| 48 |
stride: Optional[int] = None,
|
| 49 |
normalize: bool = True,
|
| 50 |
pad_to_multiple: bool = True,
|
| 51 |
+
**kwargs
|
| 52 |
):
|
| 53 |
+
super().__init__(**kwargs)
|
| 54 |
self.win_size = win_size
|
| 55 |
self.stride = stride if stride is not None else win_size
|
| 56 |
self.normalize = normalize
|
| 57 |
self.pad_to_multiple = pad_to_multiple
|
| 58 |
+
|
| 59 |
+
# Required attributes for ProcessorMixin
|
| 60 |
+
self.model_input_names = ["time_series", "attention_mask"]
|
| 61 |
+
|
| 62 |
+
@property
|
| 63 |
+
def attributes(self):
|
| 64 |
+
"""Return list of attribute names for serialization."""
|
| 65 |
+
return ["win_size", "stride", "normalize", "pad_to_multiple"]
|
| 66 |
|
| 67 |
def __call__(
|
| 68 |
self,
|
|
|
|
| 177 |
|
| 178 |
config = {
|
| 179 |
"processor_type": "TimeRCDProcessor",
|
| 180 |
+
"auto_map": {
|
| 181 |
+
"AutoProcessor": "processing_time_rcd.TimeRCDProcessor"
|
| 182 |
+
},
|
| 183 |
"win_size": self.win_size,
|
| 184 |
"stride": self.stride,
|
| 185 |
"normalize": self.normalize,
|
|
|
|
| 190 |
json.dump(config, f, indent=2)
|
| 191 |
|
| 192 |
@classmethod
|
| 193 |
+
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
|
| 194 |
"""Load processor from pretrained configuration."""
|
| 195 |
import json
|
| 196 |
import os
|
| 197 |
+
from huggingface_hub import hf_hub_download
|
| 198 |
|
| 199 |
+
# Try to load from local path first
|
| 200 |
config_file = os.path.join(pretrained_model_name_or_path, "preprocessor_config.json")
|
| 201 |
|
| 202 |
+
if os.path.exists(config_file):
|
| 203 |
+
# Load from local path
|
| 204 |
+
with open(config_file, "r") as f:
|
| 205 |
+
config = json.load(f)
|
| 206 |
+
else:
|
| 207 |
+
# Try to download from HuggingFace Hub
|
| 208 |
+
try:
|
| 209 |
+
config_file = hf_hub_download(
|
| 210 |
+
repo_id=pretrained_model_name_or_path,
|
| 211 |
+
filename="preprocessor_config.json",
|
| 212 |
+
**kwargs
|
| 213 |
+
)
|
| 214 |
+
with open(config_file, "r") as f:
|
| 215 |
+
config = json.load(f)
|
| 216 |
+
except Exception as e:
|
| 217 |
+
raise FileNotFoundError(
|
| 218 |
+
f"Could not load preprocessor config from {pretrained_model_name_or_path}. "
|
| 219 |
+
f"Error: {e}"
|
| 220 |
+
)
|
| 221 |
|
| 222 |
# Remove processor_type from config
|
| 223 |
config.pop("processor_type", None)
|