Spaces:
Running
Running
Irpan
commited on
Commit
·
499b2c1
1
Parent(s):
c492cbb
asr
Browse files
asr.py
CHANGED
|
@@ -13,13 +13,13 @@ import util
|
|
| 13 |
|
| 14 |
# Load processor and model
|
| 15 |
models_info = {
|
| 16 |
-
"OpenAI-Whisper
|
| 17 |
"processor": WhisperProcessor.from_pretrained("openai/whisper-small", language="uzbek", task="transcribe"),
|
| 18 |
"model": AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small"),
|
| 19 |
"ctc_model": False,
|
| 20 |
"arabic_script": False
|
| 21 |
},
|
| 22 |
-
"Meta-MMS
|
| 23 |
"processor": AutoProcessor.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic'),
|
| 24 |
"model": AutoModelForCTC.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic', ignore_mismatched_sizes=True),
|
| 25 |
"ctc_model": True,
|
|
@@ -63,10 +63,10 @@ def transcribe(audio_data, model_id) -> str:
|
|
| 63 |
else:
|
| 64 |
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
|
| 65 |
|
| 66 |
-
# Check audio duration
|
| 67 |
-
duration = audio_input.shape[1] / sampling_rate
|
| 68 |
-
if duration > 10:
|
| 69 |
-
|
| 70 |
|
| 71 |
model = models_info[model_id]["model"]
|
| 72 |
processor = models_info[model_id]["processor"]
|
|
|
|
| 13 |
|
| 14 |
# Load processor and model
|
| 15 |
models_info = {
|
| 16 |
+
"OpenAI-Whisper": {
|
| 17 |
"processor": WhisperProcessor.from_pretrained("openai/whisper-small", language="uzbek", task="transcribe"),
|
| 18 |
"model": AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small"),
|
| 19 |
"ctc_model": False,
|
| 20 |
"arabic_script": False
|
| 21 |
},
|
| 22 |
+
"Meta-MMS": {
|
| 23 |
"processor": AutoProcessor.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic'),
|
| 24 |
"model": AutoModelForCTC.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic', ignore_mismatched_sizes=True),
|
| 25 |
"ctc_model": True,
|
|
|
|
| 63 |
else:
|
| 64 |
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
|
| 65 |
|
| 66 |
+
# # Check audio duration
|
| 67 |
+
# duration = audio_input.shape[1] / sampling_rate
|
| 68 |
+
# if duration > 10:
|
| 69 |
+
# return f"<<ERROR: Audio duration ({duration:.2f}s) exceeds 10 seconds. Please upload a shorter audio clip for faster processing.>>", None
|
| 70 |
|
| 71 |
model = models_info[model_id]["model"]
|
| 72 |
processor = models_info[model_id]["processor"]
|
tts.py
CHANGED
|
@@ -49,8 +49,8 @@ text2speech.spc2wav = None ### disable griffin-lim
|
|
| 49 |
|
| 50 |
def synthesize(text, model_id):
|
| 51 |
print(text)
|
| 52 |
-
if len(text) > 200:
|
| 53 |
-
|
| 54 |
|
| 55 |
if model_id == 'IS2AI-TurkicTTS':
|
| 56 |
return synthesize_turkic_tts(text)
|
|
|
|
| 49 |
|
| 50 |
def synthesize(text, model_id):
|
| 51 |
print(text)
|
| 52 |
+
# if len(text) > 200:
|
| 53 |
+
# raise ValueError(f"Input text exceeds 200 characters. Please provide a shorter input text for faster processing.")
|
| 54 |
|
| 55 |
if model_id == 'IS2AI-TurkicTTS':
|
| 56 |
return synthesize_turkic_tts(text)
|
util.py
CHANGED
|
@@ -4,14 +4,25 @@ from umsc import UgMultiScriptConverter
|
|
| 4 |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
|
| 5 |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
| 6 |
|
| 7 |
-
asr_examples = [
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
tts_examples = [
|
| 11 |
-
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
|
| 12 |
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
|
| 13 |
-
["
|
| 14 |
-
["
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
["Yaxshimusiz?", "Meta-MMS"],
|
| 16 |
-
["Yaxshimusiz?", "
|
| 17 |
]
|
|
|
|
| 4 |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
|
| 5 |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
| 6 |
|
| 7 |
+
asr_examples = [
|
| 8 |
+
['examples/1.wav', 'OpenAI-Whisper'],
|
| 9 |
+
['examples/1.wav', 'Meta-MMS'],
|
| 10 |
+
['examples/1.wav', 'Ixxan-FineTuned-Whisper'],
|
| 11 |
+
['examples/1.wav', 'Ixxan-FineTuned-MMS'],
|
| 12 |
+
['examples/2.wav', 'OpenAI-Whisper'],
|
| 13 |
+
['examples/2.wav', 'Meta-MMS'],
|
| 14 |
+
['examples/2.wav', 'Ixxan-FineTuned-Whisper'],
|
| 15 |
+
['examples/2.wav', 'Ixxan-FineTuned-MMS']
|
| 16 |
+
]
|
| 17 |
|
| 18 |
tts_examples = [
|
|
|
|
| 19 |
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
|
| 20 |
+
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
|
| 21 |
+
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Ixxan-FineTuned-MMS"],
|
| 22 |
+
["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "IS2AI-TurkicTTS"],
|
| 23 |
+
["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "Meta-MMS"],
|
| 24 |
+
["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "Ixxan-FineTuned-MMS"],
|
| 25 |
+
["Yaxshimusiz?", "IS2AI-TurkicTTS"],
|
| 26 |
["Yaxshimusiz?", "Meta-MMS"],
|
| 27 |
+
["Yaxshimusiz?", "Ixxan-FineTuned-MMS"]
|
| 28 |
]
|