Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModel | |
| import numpy as np | |
| import soundfile as sf | |
| import tempfile | |
| import whisper | |
| # Load TTS model (IndicF5) | |
| tts_model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True) | |
| # Load ASR model (Whisper) | |
| asr_model = whisper.load_model("medium") | |
| def generate_tts_and_transcribe(text, ref_audio, ref_text): | |
| # Save uploaded ref_audio to a path | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| tmp.write(ref_audio.read()) | |
| ref_audio_path = tmp.name | |
| # Generate speech using IndicF5 | |
| audio = tts_model(text, ref_audio_path=ref_audio_path, ref_text=ref_text) | |
| # Normalize | |
| if audio.dtype == np.int16: | |
| audio = audio.astype(np.float32) / 32768.0 | |
| # Save TTS output | |
| tts_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
| sf.write(tts_path, np.array(audio, dtype=np.float32), samplerate=24000) | |
| # Transcribe using Whisper | |
| asr_result = asr_model.transcribe(tts_path, language="ta") | |
| transcript = asr_result["text"] | |
| return tts_path, transcript | |
| # Gradio Interface | |
| demo = gr.Interface( | |
| fn=generate_tts_and_transcribe, | |
| inputs=[ | |
| gr.Textbox(label="Text to Synthesize (Tamil)"), | |
| gr.Audio(label="Reference Audio (.wav)", type="file"), | |
| gr.Textbox(label="Reference Text (Tamil)") | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Generated Audio", type="filepath"), | |
| gr.Textbox(label="ASR Transcription (Whisper)") | |
| ], | |
| title="IndicF5 Tamil TTS + Whisper ASR", | |
| description="Give a reference audio and text, synthesize Tamil speech using IndicF5, and transcribe it with Whisper." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |