Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_client import Client | |
| import torch | |
| import os | |
| from scipy.io.wavfile import write | |
| hf_token = os.environ.get('HF_TOKEN') | |
| #splt_client = Client("https://fffiloni-splittrack2musicgen.hf.space/") | |
| def split_process(audio, chosen_out_track): | |
| os.makedirs("out", exist_ok=True) | |
| write('test.wav', audio[0], audio[1]) | |
| os.system("python3 -m demucs.separate -n mdx_extra_q -j 4 test.wav -o out") | |
| #return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav","./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav" | |
| if chosen_out_track == "vocals": | |
| return "./out/mdx_extra_q/test/vocals.wav" | |
| elif chosen_out_track == "bass": | |
| return "./out/mdx_extra_q/test/bass.wav" | |
| elif chosen_out_track == "drums": | |
| return "./out/mdx_extra_q/test/drums.wav" | |
| elif chosen_out_track == "other": | |
| return "./out/mdx_extra_q/test/other.wav" | |
| elif chosen_out_track == "all-in": | |
| return "test.wav" | |
| from transformers import pipeline | |
| from transformers.pipelines.audio_utils import ffmpeg_read | |
| import tempfile | |
| MODEL_NAME = "openai/whisper-large-v3-turbo" | |
| BATCH_SIZE = 8 | |
| FILE_LIMIT_MB = 1000 | |
| YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files | |
| device = 0 if torch.cuda.is_available() else "cpu" | |
| pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| chunk_length_s=30, | |
| device=device, | |
| token=hf_token | |
| ) | |
| #@spaces.GPU | |
| def transcribe(inputs, task): | |
| if inputs is None: | |
| raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") | |
| text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"] | |
| return text | |
| import re | |
| def format_lyrics(text): | |
| # Remove unwanted subtitle artifacts | |
| text = re.sub(r"Sous-?titrage Société Radio-Canada", "", text, flags=re.IGNORECASE) | |
| # Optionally remove repeated whitespace or multiple newlines | |
| text = re.sub(r'\n+', '\n', text).strip() | |
| # Use regex to find parts that start with a capital letter and insert a newline | |
| formatted_text = re.sub(r'(?<!^)([A-Z])', r'\n\1', text) | |
| # Remove any leading whitespace on each line | |
| formatted_text = re.sub(r'^[ \t]+', '', formatted_text, flags=re.MULTILINE) | |
| return formatted_text | |
| def infer(audio_input): | |
| # STEP 1 | Split vocals from the song/audio file | |
| splt_result = split_process(audio_input, "vocals") | |
| print(splt_result) | |
| # STEP 2 | Transcribe | |
| # TO-DO : handling errors if JAX demo queue is full | |
| whisper_result = transcribe( | |
| splt_result, # str (filepath or URL to file) in 'inputs' Audio component | |
| "transcribe", # str in 'Task' Radio component | |
| ) | |
| print(whisper_result) | |
| #return whisper_result[0] # if using JAX | |
| lyrics = format_lyrics(whisper_result) | |
| print(lyrics) | |
| return splt_result, lyrics | |
| css = """ | |
| #col-container {max-width: 510px; margin-left: auto; margin-right: auto;} | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;"> | |
| <div | |
| style=" | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.8rem; | |
| font-size: 1.75rem; | |
| " | |
| > | |
| <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;"> | |
| Song To Lyrics | |
| </h1> | |
| </div> | |
| <p style="margin-bottom: 10px; font-size: 94%"> | |
| Send the audio file of your favorite song, and get the lyrics ! <br /> | |
| Under the hood, we split and get the vocals track from the audio file, then send the vocals to Whisper. | |
| </p> | |
| </div>""") | |
| song_in = gr.Audio(label="Song input", type="numpy", sources="upload") | |
| getlyrics_btn = gr.Button("Get Lyrics !") | |
| vocals_out = gr.Audio(label="Vocals Only") | |
| lyrics_res = gr.Textbox(label="Lyrics") | |
| getlyrics_btn.click(fn=infer, inputs=[song_in], outputs=[vocals_out, lyrics_res]) | |
| demo.queue().launch() | |