oliverlevn commited on
Commit
2c2b38d
·
verified ·
1 Parent(s): d9788f8

update preprocessor config

Browse files
Files changed (2) hide show
  1. preprocessor_config.json +3 -0
  2. processing_time_rcd.py +37 -7
preprocessor_config.json CHANGED
@@ -1,5 +1,8 @@
1
  {
2
  "processor_type": "TimeRCDProcessor",
 
 
 
3
  "win_size": 5000,
4
  "stride": 5000,
5
  "normalize": true,
 
1
  {
2
  "processor_type": "TimeRCDProcessor",
3
+ "auto_map": {
4
+ "AutoProcessor": "processing_time_rcd.TimeRCDProcessor"
5
+ },
6
  "win_size": 5000,
7
  "stride": 5000,
8
  "normalize": true,
processing_time_rcd.py CHANGED
@@ -17,9 +17,10 @@ Usage:
17
  import numpy as np
18
  import torch
19
  from typing import Optional, Dict, Any
 
20
 
21
 
22
- class TimeRCDProcessor:
23
  """
24
  Processor for preparing time series data for Time_RCD model.
25
 
@@ -47,11 +48,21 @@ class TimeRCDProcessor:
47
  stride: Optional[int] = None,
48
  normalize: bool = True,
49
  pad_to_multiple: bool = True,
 
50
  ):
 
51
  self.win_size = win_size
52
  self.stride = stride if stride is not None else win_size
53
  self.normalize = normalize
54
  self.pad_to_multiple = pad_to_multiple
 
 
 
 
 
 
 
 
55
 
56
  def __call__(
57
  self,
@@ -166,6 +177,9 @@ class TimeRCDProcessor:
166
 
167
  config = {
168
  "processor_type": "TimeRCDProcessor",
 
 
 
169
  "win_size": self.win_size,
170
  "stride": self.stride,
171
  "normalize": self.normalize,
@@ -176,18 +190,34 @@ class TimeRCDProcessor:
176
  json.dump(config, f, indent=2)
177
 
178
  @classmethod
179
- def from_pretrained(cls, pretrained_model_name_or_path: str):
180
  """Load processor from pretrained configuration."""
181
  import json
182
  import os
 
183
 
 
184
  config_file = os.path.join(pretrained_model_name_or_path, "preprocessor_config.json")
185
 
186
- if not os.path.exists(config_file):
187
- raise FileNotFoundError(f"Preprocessor config not found at {config_file}")
188
-
189
- with open(config_file, "r") as f:
190
- config = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # Remove processor_type from config
193
  config.pop("processor_type", None)
 
17
  import numpy as np
18
  import torch
19
  from typing import Optional, Dict, Any
20
+ from transformers import ProcessorMixin
21
 
22
 
23
+ class TimeRCDProcessor(ProcessorMixin):
24
  """
25
  Processor for preparing time series data for Time_RCD model.
26
 
 
48
  stride: Optional[int] = None,
49
  normalize: bool = True,
50
  pad_to_multiple: bool = True,
51
+ **kwargs
52
  ):
53
+ super().__init__(**kwargs)
54
  self.win_size = win_size
55
  self.stride = stride if stride is not None else win_size
56
  self.normalize = normalize
57
  self.pad_to_multiple = pad_to_multiple
58
+
59
+ # Required attributes for ProcessorMixin
60
+ self.model_input_names = ["time_series", "attention_mask"]
61
+
62
+ @property
63
+ def attributes(self):
64
+ """Return list of attribute names for serialization."""
65
+ return ["win_size", "stride", "normalize", "pad_to_multiple"]
66
 
67
  def __call__(
68
  self,
 
177
 
178
  config = {
179
  "processor_type": "TimeRCDProcessor",
180
+ "auto_map": {
181
+ "AutoProcessor": "processing_time_rcd.TimeRCDProcessor"
182
+ },
183
  "win_size": self.win_size,
184
  "stride": self.stride,
185
  "normalize": self.normalize,
 
190
  json.dump(config, f, indent=2)
191
 
192
  @classmethod
193
+ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
194
  """Load processor from pretrained configuration."""
195
  import json
196
  import os
197
+ from huggingface_hub import hf_hub_download
198
 
199
+ # Try to load from local path first
200
  config_file = os.path.join(pretrained_model_name_or_path, "preprocessor_config.json")
201
 
202
+ if os.path.exists(config_file):
203
+ # Load from local path
204
+ with open(config_file, "r") as f:
205
+ config = json.load(f)
206
+ else:
207
+ # Try to download from HuggingFace Hub
208
+ try:
209
+ config_file = hf_hub_download(
210
+ repo_id=pretrained_model_name_or_path,
211
+ filename="preprocessor_config.json",
212
+ **kwargs
213
+ )
214
+ with open(config_file, "r") as f:
215
+ config = json.load(f)
216
+ except Exception as e:
217
+ raise FileNotFoundError(
218
+ f"Could not load preprocessor config from {pretrained_model_name_or_path}. "
219
+ f"Error: {e}"
220
+ )
221
 
222
  # Remove processor_type from config
223
  config.pop("processor_type", None)