Podcastify

Sleeping

eswardivi commited on May 2, 2024

Commit

4fbe883

1 Parent(s): 268d0e5

conversation

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,29 +6,35 @@ os.system('python -m unidic download')
 # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
 from melo.api import TTS
 import tempfile
 @spaces.GPU
 def synthesize(conversation_text, speed, progress=gr.Progress()):
-    speed = 1.0
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     models = {
         'EN': TTS(language='EN', device=device),
     }
     speakers = ['EN-US', 'EN-Default']
-    final_audio = io.BytesIO()
     conversation = json.loads(conversation_text)
     for i, turn in enumerate(conversation["conversation"]):
-        bio = io.BytesIO()
-        text = turn["text"]
-        print(text)
-        speaker = speakers[i % 2]
         speaker_id = models['EN'].hps.data.spk2id[speaker]
-        models['EN'].tts_to_file(text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format='wav')
-        final_audio.write(bio.getvalue())
-    return final_audio.getvalue()
 with gr.Blocks() as demo:
     gr.Markdown('# Turn Any Article into Podcast')
     gr.Markdown('## Easily convert articles from URLs into listenable audio Podcast.')

 # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
 from melo.api import TTS
 import tempfile
+import wave
+from pydub import AudioSegment
 @spaces.GPU
 def synthesize(conversation_text, speed, progress=gr.Progress()):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     models = {
         'EN': TTS(language='EN', device=device),
     }
     speakers = ['EN-US', 'EN-Default']
+    combined_audio = AudioSegment.empty()
     conversation = json.loads(conversation_text)
     for i, turn in enumerate(conversation["conversation"]):
+        bio = io.BytesIO()
+        text = turn["text"]
+        speaker = speakers[i % 2]
         speaker_id = models['EN'].hps.data.spk2id[speaker]
+        models['EN'].tts_to_file(text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format='wav')
+        bio.seek(0)
+        audio_segment = AudioSegment.from_file(bio, format="wav")
+        combined_audio += audio_segment
+    final_audio_path = 'final.mp3'
+    combined_audio.export(final_audio_path, format='mp3')
+    return final_audio_path
 with gr.Blocks() as demo:
     gr.Markdown('# Turn Any Article into Podcast')
     gr.Markdown('## Easily convert articles from URLs into listenable audio Podcast.')