Spaces:
Runtime error
Runtime error
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| from speechbrain.pretrained import GraphemeToPhoneme | |
| import datasets | |
| import os | |
| import torchaudio | |
| from wav2vecasr.MispronounciationDetector import MispronounciationDetector | |
| from wav2vecasr.PhonemeASRModel import Wav2Vec2PhonemeASRModel, Wav2Vec2OptimisedPhonemeASRModel, MultitaskPhonemeASRModel | |
| import jiwer | |
| import re | |
| # Load sample data | |
| audio_path, transcript_path = os.path.join(os.getcwd(), "data", "arctic_a0003.wav"), os.path.join(os.getcwd(),"data", "arctic_a0003.txt") | |
| audio, org_sr = torchaudio.load(audio_path) | |
| audio = torchaudio.functional.resample(audio, orig_freq=org_sr, new_freq=16000) | |
| audio = audio.view(audio.shape[1]) | |
| audio = audio.to("cpu") | |
| with open(transcript_path) as f: | |
| text = f.read() | |
| f.close() | |
| print("Done loading sample data") | |
| # Load processors and models | |
| device = "cpu" | |
| path = os.path.join(os.getcwd(), "model", "multitask_best_ctc.pt") | |
| vocab_path = os.path.join(os.getcwd(), "model", "vocab") | |
| asr_model = MultitaskPhonemeASRModel(path, vocab_path, device) | |
| g2p = GraphemeToPhoneme.from_hparams("speechbrain/soundchoice-g2p") | |
| mispronounciation_detector = MispronounciationDetector(asr_model, g2p, "cpu") | |
| print("Done loading models and processors") | |
| # Predict | |
| raw_info = mispronounciation_detector.detect(audio, text) | |
| print(raw_info['ref']) | |
| print(raw_info['hyp']) | |
| print(raw_info['phoneme_errors']) | |
| print(f"PER: {raw_info['per']}\n") | |