Spaces:
Running
Running
| """ | |
| Style-Bert-VITS2 モデルのハイパーパラメータを表す Pydantic モデル。 | |
| デフォルト値は configs/config_jp_extra.json 内の定義と概ね同一で、 | |
| 万が一ロードした config.json に存在しないキーがあった際のフェイルセーフとして適用される。 | |
| """ | |
| from pathlib import Path | |
| from typing import Optional, Union | |
| from pydantic import BaseModel, ConfigDict | |
| class HyperParametersTrain(BaseModel): | |
| log_interval: int = 200 | |
| eval_interval: int = 1000 | |
| seed: int = 42 | |
| epochs: int = 1000 | |
| learning_rate: float = 0.0001 | |
| betas: tuple[float, float] = (0.8, 0.99) | |
| eps: float = 1e-9 | |
| batch_size: int = 2 | |
| bf16_run: bool = False | |
| fp16_run: bool = False | |
| lr_decay: float = 0.99996 | |
| segment_size: int = 16384 | |
| init_lr_ratio: int = 1 | |
| warmup_epochs: int = 0 | |
| c_mel: int = 45 | |
| c_kl: float = 1.0 | |
| c_commit: int = 100 | |
| skip_optimizer: bool = False | |
| freeze_ZH_bert: bool = False | |
| freeze_JP_bert: bool = False | |
| freeze_EN_bert: bool = False | |
| freeze_emo: bool = False | |
| freeze_style: bool = False | |
| freeze_decoder: bool = False | |
| class HyperParametersData(BaseModel): | |
| use_jp_extra: bool = True | |
| training_files: str = "Data/Dummy/train.list" | |
| validation_files: str = "Data/Dummy/val.list" | |
| max_wav_value: float = 32768.0 | |
| sampling_rate: int = 44100 | |
| filter_length: int = 2048 | |
| hop_length: int = 512 | |
| win_length: int = 2048 | |
| n_mel_channels: int = 128 | |
| mel_fmin: float = 0.0 | |
| mel_fmax: Optional[float] = None | |
| add_blank: bool = True | |
| n_speakers: int = 1 | |
| cleaned_text: bool = True | |
| spk2id: dict[str, int] = { | |
| "Dummy": 0, | |
| } | |
| num_styles: int = 1 | |
| style2id: dict[str, int] = { | |
| "Neutral": 0, | |
| } | |
| class HyperParametersModelSLM(BaseModel): | |
| model: str = "./slm/wavlm-base-plus" | |
| sr: int = 16000 | |
| hidden: int = 768 | |
| nlayers: int = 13 | |
| initial_channel: int = 64 | |
| class HyperParametersModel(BaseModel): | |
| use_spk_conditioned_encoder: bool = True | |
| use_noise_scaled_mas: bool = True | |
| use_mel_posterior_encoder: bool = False | |
| use_duration_discriminator: bool = False | |
| use_wavlm_discriminator: bool = True | |
| inter_channels: int = 192 | |
| hidden_channels: int = 192 | |
| filter_channels: int = 768 | |
| n_heads: int = 2 | |
| n_layers: int = 6 | |
| kernel_size: int = 3 | |
| p_dropout: float = 0.1 | |
| resblock: str = "1" | |
| resblock_kernel_sizes: list[int] = [3, 7, 11] | |
| resblock_dilation_sizes: list[list[int]] = [ | |
| [1, 3, 5], | |
| [1, 3, 5], | |
| [1, 3, 5], | |
| ] | |
| upsample_rates: list[int] = [8, 8, 2, 2, 2] | |
| upsample_initial_channel: int = 512 | |
| upsample_kernel_sizes: list[int] = [16, 16, 8, 2, 2] | |
| n_layers_q: int = 3 | |
| use_spectral_norm: bool = False | |
| gin_channels: int = 512 | |
| slm: HyperParametersModelSLM = HyperParametersModelSLM() | |
| class HyperParameters(BaseModel): | |
| model_name: str = "Dummy" | |
| version: str = "2.0-JP-Extra" | |
| train: HyperParametersTrain = HyperParametersTrain() | |
| data: HyperParametersData = HyperParametersData() | |
| model: HyperParametersModel = HyperParametersModel() | |
| # 以下は学習時にのみ動的に設定されるパラメータ (通常 config.json には存在しない) | |
| model_dir: Optional[str] = None | |
| speedup: bool = False | |
| repo_id: Optional[str] = None | |
| # model_ 以下を Pydantic の保護対象から除外する | |
| model_config = ConfigDict(protected_namespaces=()) | |
| def load_from_json(json_path: Union[str, Path]) -> "HyperParameters": | |
| """ | |
| 与えられた JSON ファイルからハイパーパラメータを読み込む。 | |
| Args: | |
| json_path (Union[str, Path]): JSON ファイルのパス | |
| Returns: | |
| HyperParameters: ハイパーパラメータ | |
| """ | |
| with open(json_path, encoding="utf-8") as f: | |
| return HyperParameters.model_validate_json(f.read()) | |