Spaces:

mails10
/

Expressive-TTS

Runtime error

App Files Files Community

Expressive-TTS / app.py

mails10

Update app.py

9aa9353 verified 10 months ago

raw

history blame contribute delete

1.73 kB

	import gradio as gr
	from transformers import AutoModel
	import numpy as np
	import soundfile as sf
	import tempfile
	import whisper


	# Load TTS model (IndicF5)
	tts_model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True)

	# Load ASR model (Whisper)
	asr_model = whisper.load_model("medium")

	def generate_tts_and_transcribe(text, ref_audio, ref_text):
	# Save uploaded ref_audio to a path
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(ref_audio.read())
	ref_audio_path = tmp.name

	# Generate speech using IndicF5
	audio = tts_model(text, ref_audio_path=ref_audio_path, ref_text=ref_text)

	# Normalize
	if audio.dtype == np.int16:
	audio = audio.astype(np.float32) / 32768.0

	# Save TTS output
	tts_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
	sf.write(tts_path, np.array(audio, dtype=np.float32), samplerate=24000)

	# Transcribe using Whisper
	asr_result = asr_model.transcribe(tts_path, language="ta")
	transcript = asr_result["text"]

	return tts_path, transcript

	# Gradio Interface
	demo = gr.Interface(
	fn=generate_tts_and_transcribe,
	inputs=[
	gr.Textbox(label="Text to Synthesize (Tamil)"),
	gr.Audio(label="Reference Audio (.wav)", type="file"),
	gr.Textbox(label="Reference Text (Tamil)")
	],
	outputs=[
	gr.Audio(label="Generated Audio", type="filepath"),
	gr.Textbox(label="ASR Transcription (Whisper)")
	],
	title="IndicF5 Tamil TTS + Whisper ASR",
	description="Give a reference audio and text, synthesize Tamil speech using IndicF5, and transcribe it with Whisper."
	)

	if __name__ == "__main__":
	demo.launch()