import gradio as gr import os import time import tempfile from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.xtts import Xtts from huggingface_hub import hf_hub_download import scipy.io.wavfile import soundfile as sf # Set the cache directory for Hugging Face models to ensure they are saved within the Space os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub' # required for agreeing to license terms for non-commercial purposes of coqui os.environ['COQUI_TOS_AGREED'] = '1' config_path = hf_hub_download("16pramodh/coqui-hindi", "config.json") model_path = hf_hub_download("16pramodh/coqui-hindi", "model.pth") vocab_path = hf_hub_download("16pramodh/coqui-hindi", "vocab.json") checkpoint_dir = os.path.dirname(model_path) config = XttsConfig() config.load_json(config_path) model = Xtts.init_from_config(config) model.load_checkpoint(config, checkpoint_dir=checkpoint_dir, eval=True) # Define the transcription function that Gradio will expose as an API def speech_gen(audio_file_path,hindi_text): if audio_file_path is None: return "No audio file provided." try: outputs = model.synthesize( hindi_text, config, speaker_wav=audio_file_path, gpt_cond_len=3, language="hi", ) # Save output audio to wav file temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) sf.write(temp_wav_file.name, outputs["wav"], 24000) return temp_wav_file.name except Exception as e: print(e) return None # Create the Gradio Interface iface = gr.Interface( fn=speech_gen, inputs=[gr.Audio(type="filepath"),"text"], outputs=gr.Audio(type="filepath"), title="Coqui TTS model space", description="converts Hindi text to Hindi speech", ) # Launch the Gradio app if __name__ == "__main__": iface.launch()