File size: 1,105 Bytes
f488928 86e31ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from metrics import (
calculate_msd,
calculate_f0_correlation,
calculate_phoneme_accuracy,
calculate_spectral_convergence
)
from inference import run_tts
def evaluate_bd_tts(model, test_dataset):
metrics = {}
pred_audio, target_audio = [], []
for text, target in test_dataset:
pred = run_tts(text)
pred_audio.append(pred)
target_audio.append(target)
metrics['mel_spectral_distance'] = calculate_msd(pred_audio, target_audio)
metrics['f0_correlation'] = calculate_f0_correlation(pred_audio, target_audio)
metrics['phoneme_accuracy'] = calculate_phoneme_accuracy(pred_audio, target_audio)
metrics['spectral_convergence'] = calculate_spectral_convergence(pred_audio, target_audio)
# Accent classifier is usually a pretrained model
# Placeholder: you’d plug in your Bangla accent classifier here
metrics['accent_score'] = 0.85
return metrics
if __name__ == "__main__":
test_dataset = [("আমি বাংলা বলি।", "reference.wav")] # dummy dataset
print(evaluate_bd_tts(None, test_dataset))
|