| { | |
| "adam_b1": 0.8, | |
| "adam_b2": 0.99, | |
| "architectures": [ | |
| "Quantizer" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "quantizer_config.QuantizerConfig", | |
| "AutoModel": "models.Quantizer" | |
| }, | |
| "custom_pipelines": { | |
| "prosody-embedding": { | |
| "impl": "prosody_embedding_pipeline.ProsodyEmbeddingPipeline", | |
| "pt": [ | |
| "AutoModel" | |
| ], | |
| "tf": [] | |
| } | |
| }, | |
| "decoder_depth": 4, | |
| "decoder_dilation_growth_rate": 3, | |
| "decoder_downs_t": [ | |
| 4 | |
| ], | |
| "decoder_input_emb_width": 3, | |
| "decoder_levels": 1, | |
| "decoder_m_conv": 1.0, | |
| "decoder_output_emb_width": 64, | |
| "decoder_strides_t": [ | |
| 2 | |
| ], | |
| "decoder_width": 32, | |
| "emb_width": 64, | |
| "encoder_depth": 4, | |
| "encoder_dilation_growth_rate": 3, | |
| "encoder_downs_t": [ | |
| 4 | |
| ], | |
| "encoder_input_emb_width": 3, | |
| "encoder_levels": 1, | |
| "encoder_m_conv": 1.0, | |
| "encoder_output_emb_width": 64, | |
| "encoder_strides_t": [ | |
| 2 | |
| ], | |
| "encoder_width": 32, | |
| "f0_feats": false, | |
| "f0_median": false, | |
| "f0_normalize": true, | |
| "intensity_normalize": true, | |
| "l_bins": 320, | |
| "lambda_commit": 0.02, | |
| "learning_rate": 0.0002, | |
| "levels": 1, | |
| "lr_decay": 0.999, | |
| "model_type": "prosody_quantizer", | |
| "mu": 0.99, | |
| "multispkr": "single", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.53.3" | |
| } | |