Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import torch | |
| import gradio as gr | |
| import torchaudio | |
| import time | |
| import spaces | |
| from datetime import datetime | |
| from tortoise.api import TextToSpeech | |
| from tortoise.utils.text import split_and_recombine_text | |
| from tortoise.utils.audio import load_audio, load_voice, load_voices | |
| tts = TextToSpeech(kv_cache=True) | |
| def inference( | |
| text, | |
| reference_audio, | |
| ): | |
| output_wav_path = tempfile.mktemp(suffix=".wav") | |
| texts = split_and_recombine_text(text) | |
| start_time = time.time() | |
| all_parts = [] | |
| for j, text in enumerate(texts): | |
| for audio_frame in tts.tts_with_preset( | |
| text, | |
| voice_samples=load_audio(init_audio_file), | |
| preset="fast", | |
| ): | |
| # print("Time taken: ", time.time() - start_time) | |
| all_parts.append(audio_frame) | |
| # yield (24000, audio_frame.cpu().detach().numpy()) | |
| wav = torch.cat(all_parts, dim=0).unsqueeze(0) | |
| print(wav.shape) | |
| torchaudio.save(output_wav_path, wav.cpu(), 24000) | |
| return output_wav_path | |
| def main(): | |
| title = "Tortoise TTS 🐢" | |
| text = gr.Textbox( | |
| label="Text", | |
| ) | |
| reference_audio = gr.Audio(label="Reference Audio", type="filepath") | |
| output_audio = gr.Audio(label="Generated Speech") | |
| # download_audio = gr.Audio(label="dowanload audio:") | |
| interface = gr.Interface( | |
| fn=inference, | |
| inputs=[ | |
| text, | |
| reference_audio, | |
| ], | |
| title=title, | |
| outputs=output_audio, | |
| ) | |
| interface.launch() | |
| if __name__ == "__main__": | |
| with open("Tortoise_TTS_Runs_Scripts.log", "a") as f: | |
| f.write( | |
| f"\n\n-------------------------Tortoise TTS Scripts Logs, {datetime.now()}-------------------------\n" | |
| ) | |
| main() |