Spaces:
Running
on
Zero
Running
on
Zero
cn lyrics example
Browse files- app.py +33 -30
- diffrhythm/infer/infer_utils.py +1 -0
- src/prompt/rap_cn.wav +0 -0
- src/prompt/rap_en.wav +0 -0
app.py
CHANGED
|
@@ -29,15 +29,18 @@ device='cuda'
|
|
| 29 |
cfm, tokenizer, muq, vae = prepare_model(device)
|
| 30 |
cfm = torch.compile(cfm)
|
| 31 |
|
| 32 |
-
@spaces.GPU
|
| 33 |
def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
|
| 34 |
|
| 35 |
if randomize_seed:
|
| 36 |
seed = random.randint(0, MAX_SEED)
|
| 37 |
torch.manual_seed(seed)
|
| 38 |
sway_sampling_coef = -1 if steps < 32 else None
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
negative_style_prompt = get_negative_style_prompt(device)
|
| 42 |
latent_prompt = get_reference_latent(device, max_frames)
|
| 43 |
generated_song = inference(cfm_model=cfm,
|
|
@@ -169,7 +172,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 169 |
with gr.Row():
|
| 170 |
with gr.Column():
|
| 171 |
lrc = gr.Textbox(
|
| 172 |
-
label="
|
| 173 |
placeholder="Input the full lyrics",
|
| 174 |
lines=12,
|
| 175 |
max_lines=50,
|
|
@@ -181,26 +184,23 @@ with gr.Blocks(css=css) as demo:
|
|
| 181 |
with gr.Column():
|
| 182 |
with gr.Accordion("Best Practices Guide", open=True):
|
| 183 |
gr.Markdown("""
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
4. **Supported Languages**
|
| 202 |
-
- **Chinese and English**
|
| 203 |
-
- More languages comming soon
|
| 204 |
""")
|
| 205 |
|
| 206 |
lyrics_btn = gr.Button("Generate", variant="primary")
|
|
@@ -239,23 +239,26 @@ with gr.Blocks(css=css) as demo:
|
|
| 239 |
["./src/prompt/classic_en.wav"],
|
| 240 |
["./src/prompt/jazz_cn.wav"],
|
| 241 |
["./src/prompt/jazz_en.wav"],
|
|
|
|
|
|
|
| 242 |
["./src/prompt/default.wav"]
|
| 243 |
],
|
| 244 |
inputs=[audio_prompt],
|
| 245 |
label="Audio Examples",
|
| 246 |
-
examples_per_page=
|
| 247 |
elem_id="audio-examples-container"
|
| 248 |
)
|
| 249 |
|
| 250 |
gr.Examples(
|
| 251 |
examples=[
|
| 252 |
["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
|
| 253 |
-
["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""]
|
|
|
|
| 254 |
],
|
| 255 |
|
| 256 |
inputs=[lrc],
|
| 257 |
label="Lrc Examples",
|
| 258 |
-
examples_per_page=
|
| 259 |
elem_id="lrc-examples-container",
|
| 260 |
)
|
| 261 |
|
|
@@ -270,7 +273,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 270 |
gr.Markdown("### Method 1: Generate from Theme")
|
| 271 |
theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
|
| 272 |
tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
|
| 273 |
-
language = gr.Radio(["
|
| 274 |
gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
|
| 275 |
|
| 276 |
gr.Examples(
|
|
@@ -283,7 +286,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 283 |
[
|
| 284 |
"Heroic Epic",
|
| 285 |
"choir orchestral powerful",
|
| 286 |
-
"
|
| 287 |
]
|
| 288 |
],
|
| 289 |
inputs=[theme, tags_gen, language],
|
|
@@ -321,7 +324,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 321 |
|
| 322 |
with gr.Column():
|
| 323 |
lrc_output = gr.Textbox(
|
| 324 |
-
label="Generated LRC
|
| 325 |
placeholder="Timed lyrics will appear here",
|
| 326 |
lines=57,
|
| 327 |
elem_classes="lrc-output",
|
|
|
|
| 29 |
cfm, tokenizer, muq, vae = prepare_model(device)
|
| 30 |
cfm = torch.compile(cfm)
|
| 31 |
|
| 32 |
+
@spaces.GPU(duration=20)
|
| 33 |
def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
|
| 34 |
|
| 35 |
if randomize_seed:
|
| 36 |
seed = random.randint(0, MAX_SEED)
|
| 37 |
torch.manual_seed(seed)
|
| 38 |
sway_sampling_coef = -1 if steps < 32 else None
|
| 39 |
+
try:
|
| 40 |
+
lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
|
| 41 |
+
style_prompt = get_style_prompt(muq, ref_audio_path)
|
| 42 |
+
except Exception as e:
|
| 43 |
+
raise gr.Error(f"Error: {str(e)}")
|
| 44 |
negative_style_prompt = get_negative_style_prompt(device)
|
| 45 |
latent_prompt = get_reference_latent(device, max_frames)
|
| 46 |
generated_song = inference(cfm_model=cfm,
|
|
|
|
| 172 |
with gr.Row():
|
| 173 |
with gr.Column():
|
| 174 |
lrc = gr.Textbox(
|
| 175 |
+
label="Lyrics",
|
| 176 |
placeholder="Input the full lyrics",
|
| 177 |
lines=12,
|
| 178 |
max_lines=50,
|
|
|
|
| 184 |
with gr.Column():
|
| 185 |
with gr.Accordion("Best Practices Guide", open=True):
|
| 186 |
gr.Markdown("""
|
| 187 |
+
1. **Lyrics Format Requirements**
|
| 188 |
+
- Each line must follow: `[mm:ss.xx]Lyric content`
|
| 189 |
+
- Example of valid format:
|
| 190 |
+
```
|
| 191 |
+
[00:10.00]Moonlight spills through broken blinds
|
| 192 |
+
[00:13.20]Your shadow dances on the dashboard shrine
|
| 193 |
+
```
|
| 194 |
+
2. **Generation Duration Limits**
|
| 195 |
+
- Current version supports maximum **95 seconds** of music generation
|
| 196 |
+
- Total timestamps should not exceed 01:35.00 (95 seconds)
|
| 197 |
+
3. **Audio Prompt Requirements**
|
| 198 |
+
- Reference audio should be ≥ 1 second, audio >10 seconds will be randomly clipped into 10 seconds
|
| 199 |
+
- For optimal results, the 10-second clips should be carefully selected
|
| 200 |
+
- Shorter clips may lead to incoherent generation
|
| 201 |
+
4. **Supported Languages**
|
| 202 |
+
- **Chinese and English**
|
| 203 |
+
- More languages comming soon
|
|
|
|
|
|
|
|
|
|
| 204 |
""")
|
| 205 |
|
| 206 |
lyrics_btn = gr.Button("Generate", variant="primary")
|
|
|
|
| 239 |
["./src/prompt/classic_en.wav"],
|
| 240 |
["./src/prompt/jazz_cn.wav"],
|
| 241 |
["./src/prompt/jazz_en.wav"],
|
| 242 |
+
["./src/prompt/rap_cn.wav"],
|
| 243 |
+
["./src/prompt/rap_en.wav"],
|
| 244 |
["./src/prompt/default.wav"]
|
| 245 |
],
|
| 246 |
inputs=[audio_prompt],
|
| 247 |
label="Audio Examples",
|
| 248 |
+
examples_per_page=13,
|
| 249 |
elem_id="audio-examples-container"
|
| 250 |
)
|
| 251 |
|
| 252 |
gr.Examples(
|
| 253 |
examples=[
|
| 254 |
["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
|
| 255 |
+
["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""],
|
| 256 |
+
["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""]
|
| 257 |
],
|
| 258 |
|
| 259 |
inputs=[lrc],
|
| 260 |
label="Lrc Examples",
|
| 261 |
+
examples_per_page=3,
|
| 262 |
elem_id="lrc-examples-container",
|
| 263 |
)
|
| 264 |
|
|
|
|
| 273 |
gr.Markdown("### Method 1: Generate from Theme")
|
| 274 |
theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
|
| 275 |
tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
|
| 276 |
+
language = gr.Radio(["cn", "en"], label="Language", value="en")
|
| 277 |
gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
|
| 278 |
|
| 279 |
gr.Examples(
|
|
|
|
| 286 |
[
|
| 287 |
"Heroic Epic",
|
| 288 |
"choir orchestral powerful",
|
| 289 |
+
"cn"
|
| 290 |
]
|
| 291 |
],
|
| 292 |
inputs=[theme, tags_gen, language],
|
|
|
|
| 324 |
|
| 325 |
with gr.Column():
|
| 326 |
lrc_output = gr.Textbox(
|
| 327 |
+
label="Generated LRC",
|
| 328 |
placeholder="Timed lyrics will appear here",
|
| 329 |
lines=57,
|
| 330 |
elem_classes="lrc-output",
|
diffrhythm/infer/infer_utils.py
CHANGED
|
@@ -56,6 +56,7 @@ def get_style_prompt(model, wav_path):
|
|
| 56 |
audio, _ = librosa.load(wav_path, sr=24000)
|
| 57 |
audio_len = librosa.get_duration(y=audio, sr=24000)
|
| 58 |
|
|
|
|
| 59 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
| 60 |
|
| 61 |
if audio_len > 10:
|
|
|
|
| 56 |
audio, _ = librosa.load(wav_path, sr=24000)
|
| 57 |
audio_len = librosa.get_duration(y=audio, sr=24000)
|
| 58 |
|
| 59 |
+
|
| 60 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
| 61 |
|
| 62 |
if audio_len > 10:
|
src/prompt/rap_cn.wav
ADDED
|
Binary file (441 kB). View file
|
|
|
src/prompt/rap_en.wav
ADDED
|
Binary file (882 kB). View file
|
|
|