File size: 1,105 Bytes
f488928
86e31ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from metrics import (
    calculate_msd,
    calculate_f0_correlation,
    calculate_phoneme_accuracy,
    calculate_spectral_convergence
)
from inference import run_tts

def evaluate_bd_tts(model, test_dataset):
    metrics = {}
    pred_audio, target_audio = [], []

    for text, target in test_dataset:
        pred = run_tts(text)
        pred_audio.append(pred)
        target_audio.append(target)

    metrics['mel_spectral_distance'] = calculate_msd(pred_audio, target_audio)
    metrics['f0_correlation'] = calculate_f0_correlation(pred_audio, target_audio)
    metrics['phoneme_accuracy'] = calculate_phoneme_accuracy(pred_audio, target_audio)
    metrics['spectral_convergence'] = calculate_spectral_convergence(pred_audio, target_audio)

    # Accent classifier is usually a pretrained model
    # Placeholder: you’d plug in your Bangla accent classifier here
    metrics['accent_score'] = 0.85  

    return metrics

if __name__ == "__main__":
    test_dataset = [("আমি বাংলা বলি।", "reference.wav")]  # dummy dataset
    print(evaluate_bd_tts(None, test_dataset))