| { | |
| "model_params": { | |
| "decoder": { | |
| "resblock_dilation_sizes": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ], | |
| "resblock_kernel_sizes": [ | |
| 3, | |
| 7, | |
| 11 | |
| ], | |
| "type": "hifigan", | |
| "upsample_initial_channel": 512, | |
| "upsample_kernel_sizes": [ | |
| 20, | |
| 10, | |
| 6, | |
| 4 | |
| ], | |
| "upsample_rates": [ | |
| 10, | |
| 5, | |
| 3, | |
| 2 | |
| ] | |
| }, | |
| "diffusion": { | |
| "dist": { | |
| "estimate_sigma_data": true, | |
| "mean": -3.0, | |
| "sigma_data": 0.2, | |
| "std": 1.0 | |
| }, | |
| "embedding_mask_proba": 0.1, | |
| "transformer": { | |
| "head_features": 64, | |
| "multiplier": 2, | |
| "num_heads": 8, | |
| "num_layers": 3 | |
| } | |
| }, | |
| "dim_in": 64, | |
| "dropout": 0.2, | |
| "hidden_dim": 512, | |
| "max_conv_dim": 512, | |
| "max_dur": 50, | |
| "multispeaker": false, | |
| "n_layer": 3, | |
| "n_mels": 80, | |
| "n_token": 178, | |
| "slm": { | |
| "hidden": 768, | |
| "initial_channel": 64, | |
| "model": "microsoft/wavlm-base-plus", | |
| "nlayers": 13, | |
| "sr": 16000 | |
| }, | |
| "style_dim": 128 | |
| }, | |
| "training_config": { | |
| "epochs": 5, | |
| "batch_size": 2, | |
| "max_len": 620, | |
| "optimizer": { | |
| "bert_lr": 1e-05, | |
| "ft_lr": 0.0001, | |
| "lr": 0.0001 | |
| }, | |
| "loss_params": { | |
| "diff_epoch": 1, | |
| "joint_epoch": 110, | |
| "lambda_F0": 1.0, | |
| "lambda_ce": 20.0, | |
| "lambda_diff": 1.0, | |
| "lambda_dur": 1.0, | |
| "lambda_gen": 1.0, | |
| "lambda_mel": 5.0, | |
| "lambda_mono": 1.0, | |
| "lambda_norm": 1.0, | |
| "lambda_s2s": 1.0, | |
| "lambda_slm": 1.0, | |
| "lambda_sty": 1.0 | |
| } | |
| }, | |
| "preprocess_params": { | |
| "spect_params": { | |
| "hop_length": 300, | |
| "n_fft": 2048, | |
| "win_length": 1200 | |
| }, | |
| "sr": 24000 | |
| }, | |
| "data_params": { | |
| "OOD_data": "Data/OOD_texts.txt", | |
| "min_length": 50, | |
| "root_path": "Data/wavs", | |
| "train_data": "Data/train_list.txt", | |
| "val_data": "Data/val_list.txt" | |
| }, | |
| "model_state": { | |
| "epoch": 4, | |
| "iterations": 1225, | |
| "val_loss": 0.4000273048877716 | |
| }, | |
| "training_metrics": { | |
| "train_loss": [], | |
| "val_loss": [ | |
| 56.0, | |
| 24.0, | |
| 35.0, | |
| 43.0, | |
| 22.0, | |
| 13.0, | |
| 21.0, | |
| 5.0, | |
| 53.0, | |
| 45.0, | |
| 41.0, | |
| 25.0, | |
| 3.0, | |
| 44.0, | |
| 40.0, | |
| 18.0, | |
| 17.0, | |
| 0.0, | |
| 9.0, | |
| 52.0, | |
| 52.0, | |
| 7.0, | |
| 21.0, | |
| 24.0, | |
| 21.0, | |
| 41.0, | |
| 40.0, | |
| 0.0, | |
| 22.0, | |
| 55.0, | |
| 35.0, | |
| 30.0, | |
| 35.0, | |
| 5.0, | |
| 52.0, | |
| 52.0, | |
| 0.0, | |
| 3.0, | |
| 45.0, | |
| 31.0, | |
| 42.0, | |
| 16.0, | |
| 5.0, | |
| 35.0, | |
| 13.0, | |
| 51.0, | |
| 4.0, | |
| 27.0, | |
| 44.0, | |
| 16.0, | |
| 48.0, | |
| 11.0, | |
| 57.0, | |
| 15.0, | |
| 27.0, | |
| 53.0, | |
| 28.0, | |
| 57.0, | |
| 20.0, | |
| 16.0, | |
| 1.0, | |
| 21.0, | |
| 5.0, | |
| 35.0, | |
| 26.0, | |
| 37.0, | |
| 57.0, | |
| 16.0 | |
| ], | |
| "dur_loss": [ | |
| 0.502, | |
| 0.482, | |
| 0.476, | |
| 0.466, | |
| 0.473, | |
| 0.464, | |
| 0.464, | |
| 0.459, | |
| 0.467, | |
| 0.47, | |
| 0.463, | |
| 0.458, | |
| 0.498, | |
| 0.483, | |
| 0.472, | |
| 0.473, | |
| 0.465, | |
| 0.469, | |
| 0.459, | |
| 0.454, | |
| 0.461, | |
| 0.458, | |
| 0.461, | |
| 0.453, | |
| 0.457, | |
| 0.456, | |
| 0.456, | |
| 0.455, | |
| 0.456, | |
| 0.453, | |
| 0.452, | |
| 0.453, | |
| 0.464, | |
| 0.468, | |
| 0.446, | |
| 0.45, | |
| 0.449, | |
| 0.451, | |
| 0.442, | |
| 0.438, | |
| 0.445, | |
| 0.439, | |
| 0.524, | |
| 0.488, | |
| 0.495, | |
| 0.486, | |
| 0.488, | |
| 0.494, | |
| 0.484, | |
| 0.485, | |
| 0.419, | |
| 0.417, | |
| 0.425, | |
| 0.426, | |
| 0.429, | |
| 0.424, | |
| 0.414, | |
| 0.426, | |
| 0.523, | |
| 0.5, | |
| 0.5, | |
| 0.492, | |
| 0.485, | |
| 0.411, | |
| 0.395, | |
| 0.391, | |
| 0.4, | |
| 0.4 | |
| ], | |
| "F0_loss": [ | |
| 1.759, | |
| 1.681, | |
| 1.706, | |
| 1.622, | |
| 1.67, | |
| 1.749, | |
| 1.749, | |
| 1.72, | |
| 1.733, | |
| 1.71, | |
| 1.702, | |
| 1.661, | |
| 1.743, | |
| 1.683, | |
| 1.642, | |
| 1.678, | |
| 1.703, | |
| 1.679, | |
| 1.635, | |
| 1.733, | |
| 1.648, | |
| 1.689, | |
| 1.705, | |
| 1.693, | |
| 1.688, | |
| 1.7, | |
| 1.716, | |
| 1.678, | |
| 1.672, | |
| 1.696, | |
| 1.642, | |
| 1.693, | |
| 1.536, | |
| 1.46, | |
| 1.438, | |
| 1.463, | |
| 1.466, | |
| 1.446, | |
| 1.453, | |
| 1.445, | |
| 1.443, | |
| 1.441, | |
| 1.573, | |
| 1.585, | |
| 1.659, | |
| 1.654, | |
| 1.68, | |
| 1.614, | |
| 1.596, | |
| 1.552, | |
| 1.095, | |
| 1.086, | |
| 1.032, | |
| 1.022, | |
| 1.033, | |
| 1.059, | |
| 1.036, | |
| 1.02, | |
| 2.685, | |
| 2.684, | |
| 2.636, | |
| 2.653, | |
| 2.774, | |
| 1.49, | |
| 1.41, | |
| 1.436, | |
| 1.392, | |
| 1.411 | |
| ], | |
| "epochs": [ | |
| 1, | |
| 2, | |
| 3, | |
| 4, | |
| 5, | |
| 6, | |
| 7, | |
| 8, | |
| 9, | |
| 10, | |
| 11, | |
| 12, | |
| 13, | |
| 14, | |
| 15, | |
| 16, | |
| 17, | |
| 18, | |
| 19, | |
| 20, | |
| 21, | |
| 22, | |
| 23, | |
| 24, | |
| 25, | |
| 26, | |
| 27, | |
| 28, | |
| 29, | |
| 30, | |
| 31, | |
| 32, | |
| 33, | |
| 34, | |
| 35, | |
| 36, | |
| 37, | |
| 38, | |
| 39, | |
| 40, | |
| 41, | |
| 42, | |
| 43, | |
| 44, | |
| 45, | |
| 46, | |
| 47, | |
| 48, | |
| 49, | |
| 50, | |
| 51, | |
| 52, | |
| 53, | |
| 54, | |
| 55, | |
| 56, | |
| 57, | |
| 58, | |
| 59, | |
| 60, | |
| 61, | |
| 62, | |
| 63, | |
| 64, | |
| 65, | |
| 66, | |
| 67, | |
| 68 | |
| ] | |
| } | |
| } |