bond005
/

wav2vec2-large-ru-golos-with-lm

Automatic Speech Recognition

SberDevices/Golos

bond005/rulibrispeech

bond005/sova_rudevices

dangrebenkin/voxforge-ru-dataset

Model card Files Files and versions

bond005 commited on Sep 26, 2022

Commit

98eef82

·

1 Parent(s): 489c772

Update README.md

Files changed (1) hide show

README.md +12 -7

README.md CHANGED Viewed

@@ -71,6 +71,7 @@ You can use this model by writing your own inference script:
 ```python
 import os
 import librosa
 import nltk
@@ -94,7 +95,7 @@ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 def speech_file_to_array_fn(batch):
-    speech_array, sampling_rate = librosa.load(batch["path"], sr=16000)
     prepared_sentence = ' '.join(list(filter(
         lambda it: it.isalpha(),
         nltk.wordpunct_tokenize(batch["sentence"].lower().replace('ё', 'е'))
@@ -103,7 +104,9 @@ def speech_file_to_array_fn(batch):
     batch["sentence"] = prepared_sentence
     return batch
-test_dataset = test_dataset.map(speech_file_to_array_fn)
 inputs = processor(test_dataset["speech"], sampling_rate=16_000,
                    return_tensors="pt", padding=True)
@@ -115,10 +118,12 @@ predicted_sentences = processor.batch_decode(
     num_processes=num_processes
 ).text
-for i, predicted_sentence in enumerate(predicted_sentences):
-    print("-" * 100)
-    print("Reference:", test_dataset[i]["sentence"])
-    print("Prediction:", predicted_sentence)
 ```
 ```text
@@ -195,7 +200,7 @@ If you want to cite this model you can use this:
 ```bibtex
 @misc{bondarenko2022wav2vec2-large-ru-golos,
-  title={XLSR Wav2Vec2 Russian with Language Model by Ivan Bondarenko},
   author={Bondarenko, Ivan},
   publisher={Hugging Face},
   journal={Hugging Face Hub},

 ```python
 import os
+import warnings
 import librosa
 import nltk
 # Preprocessing the datasets.
 # We need to read the audio files as arrays
 def speech_file_to_array_fn(batch):
+    speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
     prepared_sentence = ' '.join(list(filter(
         lambda it: it.isalpha(),
         nltk.wordpunct_tokenize(batch["sentence"].lower().replace('ё', 'е'))
     batch["sentence"] = prepared_sentence
     return batch
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    test_dataset = test_dataset.map(speech_file_to_array_fn, num_proc=num_processes)
 inputs = processor(test_dataset["speech"], sampling_rate=16_000,
                    return_tensors="pt", padding=True)
     num_processes=num_processes
 ).text
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    for i, predicted_sentence in enumerate(predicted_sentences):
+        print("-" * 100)
+        print("Reference:", test_dataset[i]["sentence"])
+        print("Prediction:", predicted_sentence)
 ```
 ```text
 ```bibtex
 @misc{bondarenko2022wav2vec2-large-ru-golos,
+  title={XLSR Wav2Vec2 Russian with 3-gram Language Model by Ivan Bondarenko},
   author={Bondarenko, Ivan},
   publisher={Hugging Face},
   journal={Hugging Face Hub},