Spaces:

peproject
/

pronounciationevaluation

Runtime error

App Files Files Community

bel32123 commited on Oct 30, 2023

Commit

2676061

1 Parent(s): 490c46f

Introduce uncertainty to word error with PER threshold

Browse files

Files changed (1) hide show

wav2vecasr/MispronounciationDetector.py +19 -6

wav2vecasr/MispronounciationDetector.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from pandas.core.construction import T
 import torch
 import jiwer
 class MispronounciationDetector:
   def __init__(self, l2_phoneme_recogniser, g2p, device):
@@ -8,18 +9,19 @@ class MispronounciationDetector:
     self.g2p = g2p
     self.device = device
-  def detect(self, audio, text):
     l2_phones = self.phoneme_asr_model.get_l2_phoneme_sequence(audio)
     native_speaker_phones = self.get_native_speaker_phoneme_sequence(text)
     standardised_native_speaker_phones = self.phoneme_asr_model.standardise_g2p_phoneme_sequence(native_speaker_phones)
-    raw_info = self.get_mispronounciation_output(text, l2_phones, standardised_native_speaker_phones)
     return raw_info
   def get_native_speaker_phoneme_sequence(self, text):
     phonemes = self.g2p(text)
     return phonemes
-  def get_mispronounciation_output(self, text, pred_phones, org_label_phones):
     """
     Aligns the predicted phones from the L2 speaker and the expected native speaker phone to get the errors
     :param text: original words read by the user
@@ -101,7 +103,7 @@ class MispronounciationDetector:
     # get mispronounced words based on if there are phoneme errors present in the phonemes of that word
     aligned_word_error_output = ""
     words = text.split(" ")
-    word_error_bool = self.get_mispronounced_words(error_bool)
     wer = sum(word_error_bool) / len(words)
     raw_info = {"ref":ref, "hyp": hyp, "per":per, "phoneme_errors": error_bool, "wer": wer, "words": words, "word_errors":word_error_bool}
@@ -109,16 +111,27 @@ class MispronounciationDetector:
     return raw_info
-  def get_mispronounced_words(self, phoneme_error_bool):
     # map mispronounced phones back to words that were mispronounce to get WER
     word_error_bool = []
     phoneme_error_bool.append("|")
     word_phones = self.split_lst_by_delim(phoneme_error_bool, "|")
     for phones in word_phones:
-      if "s" in phones or "d" in phones or "a" in phones:
         word_error_bool.append(True)
       else:
         word_error_bool.append(False)
     return word_error_bool

 from pandas.core.construction import T
 import torch
 import jiwer
+import re
 class MispronounciationDetector:
   def __init__(self, l2_phoneme_recogniser, g2p, device):
     self.g2p = g2p
     self.device = device
+  def detect(self, audio, text, phoneme_error_threshold=0.25):
     l2_phones = self.phoneme_asr_model.get_l2_phoneme_sequence(audio)
+    l2_phones = [re.sub(r'\d', "", phone_str) for phone_str in l2_phones] #g2p has no lexical stress
     native_speaker_phones = self.get_native_speaker_phoneme_sequence(text)
     standardised_native_speaker_phones = self.phoneme_asr_model.standardise_g2p_phoneme_sequence(native_speaker_phones)
+    raw_info = self.get_mispronounciation_output(text, l2_phones, standardised_native_speaker_phones, phoneme_error_threshold)
     return raw_info
   def get_native_speaker_phoneme_sequence(self, text):
     phonemes = self.g2p(text)
     return phonemes
+  def get_mispronounciation_output(self, text, pred_phones, org_label_phones, phoneme_error_threshold):
     """
     Aligns the predicted phones from the L2 speaker and the expected native speaker phone to get the errors
     :param text: original words read by the user
     # get mispronounced words based on if there are phoneme errors present in the phonemes of that word
     aligned_word_error_output = ""
     words = text.split(" ")
+    word_error_bool = self.get_mispronounced_words(error_bool, phoneme_error_threshold)
     wer = sum(word_error_bool) / len(words)
     raw_info = {"ref":ref, "hyp": hyp, "per":per, "phoneme_errors": error_bool, "wer": wer, "words": words, "word_errors":word_error_bool}
     return raw_info
+  def get_mispronounced_words(self, phoneme_error_bool, phoneme_error_threshold):
     # map mispronounced phones back to words that were mispronounce to get WER
     word_error_bool = []
     phoneme_error_bool.append("|")
     word_phones = self.split_lst_by_delim(phoneme_error_bool, "|")
+    # wrong only if percentage of phones that are wrong > phoneme error threshold
     for phones in word_phones:
+      # get count of "s", "d", "a" in phones
+      error_count = 0
+      for phone in phones:
+        if phone == "s" or phone == "d" or phone == "a":
+          error_count += 1
+      # check if pass threshold
+      if error_count / len(phones) > phoneme_error_threshold:
         word_error_bool.append(True)
       else:
         word_error_bool.append(False)
     return word_error_bool