|
|
from metrics import ( |
|
|
calculate_msd, |
|
|
calculate_f0_correlation, |
|
|
calculate_phoneme_accuracy, |
|
|
calculate_spectral_convergence |
|
|
) |
|
|
from inference import run_tts |
|
|
|
|
|
def evaluate_bd_tts(model, test_dataset): |
|
|
metrics = {} |
|
|
pred_audio, target_audio = [], [] |
|
|
|
|
|
for text, target in test_dataset: |
|
|
pred = run_tts(text) |
|
|
pred_audio.append(pred) |
|
|
target_audio.append(target) |
|
|
|
|
|
metrics['mel_spectral_distance'] = calculate_msd(pred_audio, target_audio) |
|
|
metrics['f0_correlation'] = calculate_f0_correlation(pred_audio, target_audio) |
|
|
metrics['phoneme_accuracy'] = calculate_phoneme_accuracy(pred_audio, target_audio) |
|
|
metrics['spectral_convergence'] = calculate_spectral_convergence(pred_audio, target_audio) |
|
|
|
|
|
|
|
|
|
|
|
metrics['accent_score'] = 0.85 |
|
|
|
|
|
return metrics |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_dataset = [("আমি বাংলা বলি।", "reference.wav")] |
|
|
print(evaluate_bd_tts(None, test_dataset)) |
|
|
|
|
|
|