Spaces:
Build error
Build error
| import torch | |
| import os | |
| import gradio as gr | |
| from transformers import pipeline | |
| from pyChatGPT import ChatGPT | |
| from speechbrain.pretrained import Tacotron2 | |
| from speechbrain.pretrained import HIFIGAN | |
| import json | |
| import soundfile as sf | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| print(f"Is CUDA available: {torch.cuda.is_available()}") | |
| print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
| # Intialise STT (Whisper) | |
| pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model="openai/whisper-base.en", | |
| chunk_length_s=30, | |
| device=device, | |
| ) | |
| # Initialise ChatGPT session | |
| session_token = os.environ.get("SessionToken") | |
| api = ChatGPT(session_token=session_token) | |
| # Intialise TTS (tacotron2) and Vocoder (HiFIGAN) | |
| tacotron2 = Tacotron2.from_hparams( | |
| source="speechbrain/tts-tacotron2-ljspeech", | |
| savedir="tmpdir_tts", | |
| overrides={"max_decoder_steps": 10000}, | |
| run_opts={"device": device}, | |
| ) | |
| hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") | |
| def get_response_from_chatbot(text, reset_conversation): | |
| try: | |
| if reset_conversation: | |
| api.refresh_auth() | |
| api.reset_conversation() | |
| resp = api.send_message(text) | |
| response = resp["message"] | |
| except: | |
| response = "Sorry, the chatGPT queue is full. Please try again later." | |
| return response | |
| def chat(input_audio, chat_history, reset_conversation): | |
| # speech -> text (Whisper) | |
| message = pipe(input_audio)["text"] | |
| # text -> response (chatGPT) | |
| response = get_response_from_chatbot(message, reset_conversation) | |
| # response -> speech (tacotron2) | |
| mel_output, mel_length, alignment = tacotron2.encode_text(response) | |
| wav = hifi_gan.decode_batch(mel_output) | |
| sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050) | |
| out_chat = [] | |
| chat_history = chat_history if not reset_conversation else "" | |
| if chat_history != "": | |
| out_chat = json.loads(chat_history) | |
| out_chat.append((message, response)) | |
| chat_history = json.dumps(out_chat) | |
| return out_chat, chat_history, "out.wav" | |
| start_work = """async() => { | |
| function isMobile() { | |
| try { | |
| document.createEvent("TouchEvent"); return true; | |
| } catch(e) { | |
| return false; | |
| } | |
| } | |
| function getClientHeight() | |
| { | |
| var clientHeight=0; | |
| if(document.body.clientHeight&&document.documentElement.clientHeight) { | |
| var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; | |
| } else { | |
| var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; | |
| } | |
| return clientHeight; | |
| } | |
| function setNativeValue(element, value) { | |
| const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set; | |
| const prototype = Object.getPrototypeOf(element); | |
| const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set; | |
| if (valueSetter && valueSetter !== prototypeValueSetter) { | |
| prototypeValueSetter.call(element, value); | |
| } else { | |
| valueSetter.call(element, value); | |
| } | |
| } | |
| var gradioEl = document.querySelector('body > gradio-app').shadowRoot; | |
| if (!gradioEl) { | |
| gradioEl = document.querySelector('body > gradio-app'); | |
| } | |
| if (typeof window['gradioEl'] === 'undefined') { | |
| window['gradioEl'] = gradioEl; | |
| const page1 = window['gradioEl'].querySelectorAll('#page_1')[0]; | |
| const page2 = window['gradioEl'].querySelectorAll('#page_2')[0]; | |
| page1.style.display = "none"; | |
| page2.style.display = "block"; | |
| window['div_count'] = 0; | |
| window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0]; | |
| window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0]; | |
| chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0]; | |
| prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0]; | |
| window['chat_bot1'].children[1].textContent = ''; | |
| clientHeight = getClientHeight(); | |
| new_height = (clientHeight-300) + 'px'; | |
| chat_row.style.height = new_height; | |
| window['chat_bot'].style.height = new_height; | |
| window['chat_bot'].children[2].style.height = new_height; | |
| window['chat_bot1'].style.height = new_height; | |
| window['chat_bot1'].children[2].style.height = new_height; | |
| prompt_row.children[0].style.flex = 'auto'; | |
| prompt_row.children[0].style.width = '100%'; | |
| window['checkChange'] = function checkChange() { | |
| try { | |
| if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) { | |
| new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count']; | |
| for (var i = 0; i < new_len; i++) { | |
| new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true); | |
| window['chat_bot1'].children[2].children[0].appendChild(new_div); | |
| } | |
| window['div_count'] = chat_bot.children[2].children[0].children.length; | |
| } | |
| if (window['chat_bot'].children[0].children.length > 1) { | |
| window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent; | |
| } else { | |
| window['chat_bot1'].children[1].textContent = ''; | |
| } | |
| } catch(e) { | |
| } | |
| } | |
| window['checkChange_interval'] = window.setInterval("window.checkChange()", 500); | |
| } | |
| return false; | |
| }""" | |
| with gr.Blocks(title="Talk to chatGPT") as demo: | |
| gr.Markdown("## Talk to chatGPT ##") | |
| gr.HTML( | |
| "<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en' class='underline'>Whisper</a> to convert the input speech" | |
| " to transcribed text, <a href='https://chat.openai.com/chat' class='underline'>chatGPT</a> to generate responses, and <a" | |
| " href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech' class='underline'>tacotron2</a> to convert the response to" | |
| " output speech: </p>" | |
| ) | |
| gr.HTML("<p> <center><img src='https://raw.githubusercontent.com/sanchit-gandhi/codesnippets/main/pipeline.png' width='870'></center> </p>") | |
| gr.HTML( | |
| "<p>You can duplicate this space and use your own session token: <a style='display:inline-block'" | |
| " href='https://huggingface.co/spaces/sanchit-gandhi/chatGPT?duplicate=true'><img" | |
| " src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10'" | |
| " alt='Duplicate Space'></a></p>" | |
| ) | |
| gr.HTML( | |
| "<p> Instructions on how to obtain your session token can be found in the video <a style='display:inline-block'" | |
| " href='https://youtu.be/TdNSj_qgdFk?t=175'><font style='color:blue;weight:bold;'>here</font></a>." | |
| " Add your session token by going to <i>Settings</i> -> <i>New secret</i> and add the token under the name <i>SessionToken</i>. </p>" | |
| ) | |
| with gr.Group(elem_id="page_1", visible=True) as page_1: | |
| with gr.Box(): | |
| with gr.Row(): | |
| start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True) | |
| start_button.click(fn=None, inputs=[], outputs=[], _js=start_work) | |
| with gr.Group(elem_id="page_2", visible=False) as page_2: | |
| with gr.Row(elem_id="chat_row"): | |
| chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue")) | |
| chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue")) | |
| with gr.Row(): | |
| prompt_input_audio = gr.Audio( | |
| source="microphone", | |
| type="filepath", | |
| label="Record Audio Input", | |
| ) | |
| prompt_output_audio = gr.Audio() | |
| reset_conversation = gr.Checkbox(label="Reset conversation?", value=False) | |
| with gr.Row(elem_id="prompt_row"): | |
| chat_history = gr.Textbox(lines=4, label="prompt", visible=False) | |
| submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style( | |
| margin=True, | |
| rounded=(True, True, True, True), | |
| width=100, | |
| ) | |
| submit_btn.click( | |
| fn=chat, | |
| inputs=[prompt_input_audio, chat_history, reset_conversation], | |
| outputs=[chatbot, chat_history, prompt_output_audio], | |
| ) | |
| demo.launch(debug=True) | |