Update prosody_embedding_pipeline.py
Browse files
prosody_embedding_pipeline.py
CHANGED
|
@@ -5,7 +5,6 @@ import torch
|
|
| 5 |
from typing import Dict, Union, List, Optional
|
| 6 |
from pathlib import Path
|
| 7 |
import logging
|
| 8 |
-
from .prosody_preprocessor import ProsodyPreprocessor, ProsodyConfig
|
| 9 |
from datasets import Dataset
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
|
@@ -16,46 +15,15 @@ class ProsodyEmbeddingPipeline(Pipeline):
|
|
| 16 |
speaker_stats,
|
| 17 |
f0_interp,
|
| 18 |
f0_normalize,
|
| 19 |
-
preprocessor: Optional[ProsodyPreprocessor] = None,
|
| 20 |
stats_dir: Optional[str] = None,
|
| 21 |
**kwargs
|
| 22 |
):
|
| 23 |
super().__init__(**kwargs)
|
| 24 |
-
self.preprocessor = preprocessor or ProsodyPreprocessor()
|
| 25 |
self.stats_dir = Path(stats_dir) if stats_dir else None
|
| 26 |
self.speaker_stats = speaker_stats
|
| 27 |
self.f0_interp = f0_interp
|
| 28 |
self.f0_normalize = f0_normalize
|
| 29 |
|
| 30 |
-
|
| 31 |
-
@classmethod
|
| 32 |
-
def from_dataset(
|
| 33 |
-
cls,
|
| 34 |
-
dataset: Dataset,
|
| 35 |
-
stats_dir: str = "preprocessor_stats",
|
| 36 |
-
**kwargs
|
| 37 |
-
) -> "ProsodyPipeline":
|
| 38 |
-
"""Initialize pipeline by computing speaker statistics from a dataset"""
|
| 39 |
-
logger.info("Initializing pipeline from dataset...")
|
| 40 |
-
|
| 41 |
-
preprocessor = ProsodyPreprocessor()
|
| 42 |
-
|
| 43 |
-
stats_dir = Path(stats_dir)
|
| 44 |
-
stats_dir.mkdir(parents=True, exist_ok=True)
|
| 45 |
-
|
| 46 |
-
logger.info("Computing speaker statistics...")
|
| 47 |
-
features_dataset, speaker_stats = preprocessor.collect_stats(dataset)
|
| 48 |
-
|
| 49 |
-
stats_path = stats_dir / "speaker_stats.pt"
|
| 50 |
-
logger.info(f"Saving speaker statistics to {stats_path}")
|
| 51 |
-
preprocessor.save_stats(stats_path)
|
| 52 |
-
|
| 53 |
-
return cls(
|
| 54 |
-
preprocessor=preprocessor,
|
| 55 |
-
stats_dir=stats_dir,
|
| 56 |
-
model=None,
|
| 57 |
-
**kwargs
|
| 58 |
-
)
|
| 59 |
|
| 60 |
|
| 61 |
def _sanitize_parameters(self, **kwargs):
|
|
@@ -229,23 +197,3 @@ class ProsodyEmbeddingPipeline(Pipeline):
|
|
| 229 |
|
| 230 |
return outputs
|
| 231 |
|
| 232 |
-
|
| 233 |
-
@classmethod
|
| 234 |
-
def from_pretrained(cls, save_directory: Union[str, Path], **kwargs):
|
| 235 |
-
"""Load a pretrained pipeline"""
|
| 236 |
-
save_directory = Path(save_directory)
|
| 237 |
-
|
| 238 |
-
config = ProsodyConfig.from_pretrained(save_directory)
|
| 239 |
-
preprocessor = ProsodyPreprocessor(config)
|
| 240 |
-
|
| 241 |
-
pipeline = cls(
|
| 242 |
-
preprocessor=preprocessor,
|
| 243 |
-
stats_dir=save_directory,
|
| 244 |
-
**kwargs
|
| 245 |
-
)
|
| 246 |
-
|
| 247 |
-
stats_path = save_directory / "speaker_stats.pt"
|
| 248 |
-
if stats_path.exists():
|
| 249 |
-
pipeline.speaker_stats = ProsodyPreprocessor.load_stats(stats_path)
|
| 250 |
-
|
| 251 |
-
return pipeline
|
|
|
|
| 5 |
from typing import Dict, Union, List, Optional
|
| 6 |
from pathlib import Path
|
| 7 |
import logging
|
|
|
|
| 8 |
from datasets import Dataset
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
|
|
|
| 15 |
speaker_stats,
|
| 16 |
f0_interp,
|
| 17 |
f0_normalize,
|
|
|
|
| 18 |
stats_dir: Optional[str] = None,
|
| 19 |
**kwargs
|
| 20 |
):
|
| 21 |
super().__init__(**kwargs)
|
|
|
|
| 22 |
self.stats_dir = Path(stats_dir) if stats_dir else None
|
| 23 |
self.speaker_stats = speaker_stats
|
| 24 |
self.f0_interp = f0_interp
|
| 25 |
self.f0_normalize = f0_normalize
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def _sanitize_parameters(self, **kwargs):
|
|
|
|
| 197 |
|
| 198 |
return outputs
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|