Spaces:
Running
on
Zero
Running
on
Zero
cn lyrics example
Browse files- app.py +33 -30
- diffrhythm/infer/infer_utils.py +1 -0
- src/prompt/rap_cn.wav +0 -0
- src/prompt/rap_en.wav +0 -0
app.py
CHANGED
@@ -29,15 +29,18 @@ device='cuda'
|
|
29 |
cfm, tokenizer, muq, vae = prepare_model(device)
|
30 |
cfm = torch.compile(cfm)
|
31 |
|
32 |
-
@spaces.GPU
|
33 |
def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
|
34 |
|
35 |
if randomize_seed:
|
36 |
seed = random.randint(0, MAX_SEED)
|
37 |
torch.manual_seed(seed)
|
38 |
sway_sampling_coef = -1 if steps < 32 else None
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
41 |
negative_style_prompt = get_negative_style_prompt(device)
|
42 |
latent_prompt = get_reference_latent(device, max_frames)
|
43 |
generated_song = inference(cfm_model=cfm,
|
@@ -169,7 +172,7 @@ with gr.Blocks(css=css) as demo:
|
|
169 |
with gr.Row():
|
170 |
with gr.Column():
|
171 |
lrc = gr.Textbox(
|
172 |
-
label="
|
173 |
placeholder="Input the full lyrics",
|
174 |
lines=12,
|
175 |
max_lines=50,
|
@@ -181,26 +184,23 @@ with gr.Blocks(css=css) as demo:
|
|
181 |
with gr.Column():
|
182 |
with gr.Accordion("Best Practices Guide", open=True):
|
183 |
gr.Markdown("""
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
4. **Supported Languages**
|
202 |
-
- **Chinese and English**
|
203 |
-
- More languages comming soon
|
204 |
""")
|
205 |
|
206 |
lyrics_btn = gr.Button("Generate", variant="primary")
|
@@ -239,23 +239,26 @@ with gr.Blocks(css=css) as demo:
|
|
239 |
["./src/prompt/classic_en.wav"],
|
240 |
["./src/prompt/jazz_cn.wav"],
|
241 |
["./src/prompt/jazz_en.wav"],
|
|
|
|
|
242 |
["./src/prompt/default.wav"]
|
243 |
],
|
244 |
inputs=[audio_prompt],
|
245 |
label="Audio Examples",
|
246 |
-
examples_per_page=
|
247 |
elem_id="audio-examples-container"
|
248 |
)
|
249 |
|
250 |
gr.Examples(
|
251 |
examples=[
|
252 |
["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
|
253 |
-
["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""]
|
|
|
254 |
],
|
255 |
|
256 |
inputs=[lrc],
|
257 |
label="Lrc Examples",
|
258 |
-
examples_per_page=
|
259 |
elem_id="lrc-examples-container",
|
260 |
)
|
261 |
|
@@ -270,7 +273,7 @@ with gr.Blocks(css=css) as demo:
|
|
270 |
gr.Markdown("### Method 1: Generate from Theme")
|
271 |
theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
|
272 |
tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
|
273 |
-
language = gr.Radio(["
|
274 |
gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
|
275 |
|
276 |
gr.Examples(
|
@@ -283,7 +286,7 @@ with gr.Blocks(css=css) as demo:
|
|
283 |
[
|
284 |
"Heroic Epic",
|
285 |
"choir orchestral powerful",
|
286 |
-
"
|
287 |
]
|
288 |
],
|
289 |
inputs=[theme, tags_gen, language],
|
@@ -321,7 +324,7 @@ with gr.Blocks(css=css) as demo:
|
|
321 |
|
322 |
with gr.Column():
|
323 |
lrc_output = gr.Textbox(
|
324 |
-
label="Generated LRC
|
325 |
placeholder="Timed lyrics will appear here",
|
326 |
lines=57,
|
327 |
elem_classes="lrc-output",
|
|
|
29 |
cfm, tokenizer, muq, vae = prepare_model(device)
|
30 |
cfm = torch.compile(cfm)
|
31 |
|
32 |
+
@spaces.GPU(duration=20)
|
33 |
def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
|
34 |
|
35 |
if randomize_seed:
|
36 |
seed = random.randint(0, MAX_SEED)
|
37 |
torch.manual_seed(seed)
|
38 |
sway_sampling_coef = -1 if steps < 32 else None
|
39 |
+
try:
|
40 |
+
lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
|
41 |
+
style_prompt = get_style_prompt(muq, ref_audio_path)
|
42 |
+
except Exception as e:
|
43 |
+
raise gr.Error(f"Error: {str(e)}")
|
44 |
negative_style_prompt = get_negative_style_prompt(device)
|
45 |
latent_prompt = get_reference_latent(device, max_frames)
|
46 |
generated_song = inference(cfm_model=cfm,
|
|
|
172 |
with gr.Row():
|
173 |
with gr.Column():
|
174 |
lrc = gr.Textbox(
|
175 |
+
label="Lyrics",
|
176 |
placeholder="Input the full lyrics",
|
177 |
lines=12,
|
178 |
max_lines=50,
|
|
|
184 |
with gr.Column():
|
185 |
with gr.Accordion("Best Practices Guide", open=True):
|
186 |
gr.Markdown("""
|
187 |
+
1. **Lyrics Format Requirements**
|
188 |
+
- Each line must follow: `[mm:ss.xx]Lyric content`
|
189 |
+
- Example of valid format:
|
190 |
+
```
|
191 |
+
[00:10.00]Moonlight spills through broken blinds
|
192 |
+
[00:13.20]Your shadow dances on the dashboard shrine
|
193 |
+
```
|
194 |
+
2. **Generation Duration Limits**
|
195 |
+
- Current version supports maximum **95 seconds** of music generation
|
196 |
+
- Total timestamps should not exceed 01:35.00 (95 seconds)
|
197 |
+
3. **Audio Prompt Requirements**
|
198 |
+
- Reference audio should be ≥ 1 second, audio >10 seconds will be randomly clipped into 10 seconds
|
199 |
+
- For optimal results, the 10-second clips should be carefully selected
|
200 |
+
- Shorter clips may lead to incoherent generation
|
201 |
+
4. **Supported Languages**
|
202 |
+
- **Chinese and English**
|
203 |
+
- More languages comming soon
|
|
|
|
|
|
|
204 |
""")
|
205 |
|
206 |
lyrics_btn = gr.Button("Generate", variant="primary")
|
|
|
239 |
["./src/prompt/classic_en.wav"],
|
240 |
["./src/prompt/jazz_cn.wav"],
|
241 |
["./src/prompt/jazz_en.wav"],
|
242 |
+
["./src/prompt/rap_cn.wav"],
|
243 |
+
["./src/prompt/rap_en.wav"],
|
244 |
["./src/prompt/default.wav"]
|
245 |
],
|
246 |
inputs=[audio_prompt],
|
247 |
label="Audio Examples",
|
248 |
+
examples_per_page=13,
|
249 |
elem_id="audio-examples-container"
|
250 |
)
|
251 |
|
252 |
gr.Examples(
|
253 |
examples=[
|
254 |
["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
|
255 |
+
["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""],
|
256 |
+
["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""]
|
257 |
],
|
258 |
|
259 |
inputs=[lrc],
|
260 |
label="Lrc Examples",
|
261 |
+
examples_per_page=3,
|
262 |
elem_id="lrc-examples-container",
|
263 |
)
|
264 |
|
|
|
273 |
gr.Markdown("### Method 1: Generate from Theme")
|
274 |
theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
|
275 |
tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
|
276 |
+
language = gr.Radio(["cn", "en"], label="Language", value="en")
|
277 |
gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
|
278 |
|
279 |
gr.Examples(
|
|
|
286 |
[
|
287 |
"Heroic Epic",
|
288 |
"choir orchestral powerful",
|
289 |
+
"cn"
|
290 |
]
|
291 |
],
|
292 |
inputs=[theme, tags_gen, language],
|
|
|
324 |
|
325 |
with gr.Column():
|
326 |
lrc_output = gr.Textbox(
|
327 |
+
label="Generated LRC",
|
328 |
placeholder="Timed lyrics will appear here",
|
329 |
lines=57,
|
330 |
elem_classes="lrc-output",
|
diffrhythm/infer/infer_utils.py
CHANGED
@@ -56,6 +56,7 @@ def get_style_prompt(model, wav_path):
|
|
56 |
audio, _ = librosa.load(wav_path, sr=24000)
|
57 |
audio_len = librosa.get_duration(y=audio, sr=24000)
|
58 |
|
|
|
59 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
60 |
|
61 |
if audio_len > 10:
|
|
|
56 |
audio, _ = librosa.load(wav_path, sr=24000)
|
57 |
audio_len = librosa.get_duration(y=audio, sr=24000)
|
58 |
|
59 |
+
|
60 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
61 |
|
62 |
if audio_len > 10:
|
src/prompt/rap_cn.wav
ADDED
Binary file (441 kB). View file
|
|
src/prompt/rap_en.wav
ADDED
Binary file (882 kB). View file
|
|