Update README.md
Browse files
README.md
CHANGED
|
@@ -71,6 +71,7 @@ You can use this model by writing your own inference script:
|
|
| 71 |
|
| 72 |
```python
|
| 73 |
import os
|
|
|
|
| 74 |
|
| 75 |
import librosa
|
| 76 |
import nltk
|
|
@@ -94,7 +95,7 @@ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
|
|
| 94 |
# Preprocessing the datasets.
|
| 95 |
# We need to read the audio files as arrays
|
| 96 |
def speech_file_to_array_fn(batch):
|
| 97 |
-
speech_array, sampling_rate = librosa.load(batch["path"], sr=
|
| 98 |
prepared_sentence = ' '.join(list(filter(
|
| 99 |
lambda it: it.isalpha(),
|
| 100 |
nltk.wordpunct_tokenize(batch["sentence"].lower().replace('ё', 'е'))
|
|
@@ -103,7 +104,9 @@ def speech_file_to_array_fn(batch):
|
|
| 103 |
batch["sentence"] = prepared_sentence
|
| 104 |
return batch
|
| 105 |
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
|
| 108 |
inputs = processor(test_dataset["speech"], sampling_rate=16_000,
|
| 109 |
return_tensors="pt", padding=True)
|
|
@@ -115,10 +118,12 @@ predicted_sentences = processor.batch_decode(
|
|
| 115 |
num_processes=num_processes
|
| 116 |
).text
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
| 122 |
```
|
| 123 |
|
| 124 |
```text
|
|
@@ -195,7 +200,7 @@ If you want to cite this model you can use this:
|
|
| 195 |
|
| 196 |
```bibtex
|
| 197 |
@misc{bondarenko2022wav2vec2-large-ru-golos,
|
| 198 |
-
title={XLSR Wav2Vec2 Russian with Language Model by Ivan Bondarenko},
|
| 199 |
author={Bondarenko, Ivan},
|
| 200 |
publisher={Hugging Face},
|
| 201 |
journal={Hugging Face Hub},
|
|
|
|
| 71 |
|
| 72 |
```python
|
| 73 |
import os
|
| 74 |
+
import warnings
|
| 75 |
|
| 76 |
import librosa
|
| 77 |
import nltk
|
|
|
|
| 95 |
# Preprocessing the datasets.
|
| 96 |
# We need to read the audio files as arrays
|
| 97 |
def speech_file_to_array_fn(batch):
|
| 98 |
+
speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
|
| 99 |
prepared_sentence = ' '.join(list(filter(
|
| 100 |
lambda it: it.isalpha(),
|
| 101 |
nltk.wordpunct_tokenize(batch["sentence"].lower().replace('ё', 'е'))
|
|
|
|
| 104 |
batch["sentence"] = prepared_sentence
|
| 105 |
return batch
|
| 106 |
|
| 107 |
+
with warnings.catch_warnings():
|
| 108 |
+
warnings.simplefilter("ignore")
|
| 109 |
+
test_dataset = test_dataset.map(speech_file_to_array_fn, num_proc=num_processes)
|
| 110 |
|
| 111 |
inputs = processor(test_dataset["speech"], sampling_rate=16_000,
|
| 112 |
return_tensors="pt", padding=True)
|
|
|
|
| 118 |
num_processes=num_processes
|
| 119 |
).text
|
| 120 |
|
| 121 |
+
with warnings.catch_warnings():
|
| 122 |
+
warnings.simplefilter("ignore")
|
| 123 |
+
for i, predicted_sentence in enumerate(predicted_sentences):
|
| 124 |
+
print("-" * 100)
|
| 125 |
+
print("Reference:", test_dataset[i]["sentence"])
|
| 126 |
+
print("Prediction:", predicted_sentence)
|
| 127 |
```
|
| 128 |
|
| 129 |
```text
|
|
|
|
| 200 |
|
| 201 |
```bibtex
|
| 202 |
@misc{bondarenko2022wav2vec2-large-ru-golos,
|
| 203 |
+
title={XLSR Wav2Vec2 Russian with 3-gram Language Model by Ivan Bondarenko},
|
| 204 |
author={Bondarenko, Ivan},
|
| 205 |
publisher={Hugging Face},
|
| 206 |
journal={Hugging Face Hub},
|