ing0 commited on
Commit
d411505
·
1 Parent(s): b311ba6

cn lyrics example

Browse files
app.py CHANGED
@@ -29,15 +29,18 @@ device='cuda'
29
  cfm, tokenizer, muq, vae = prepare_model(device)
30
  cfm = torch.compile(cfm)
31
 
32
- @spaces.GPU
33
  def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
34
 
35
  if randomize_seed:
36
  seed = random.randint(0, MAX_SEED)
37
  torch.manual_seed(seed)
38
  sway_sampling_coef = -1 if steps < 32 else None
39
- lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
40
- style_prompt = get_style_prompt(muq, ref_audio_path)
 
 
 
41
  negative_style_prompt = get_negative_style_prompt(device)
42
  latent_prompt = get_reference_latent(device, max_frames)
43
  generated_song = inference(cfm_model=cfm,
@@ -169,7 +172,7 @@ with gr.Blocks(css=css) as demo:
169
  with gr.Row():
170
  with gr.Column():
171
  lrc = gr.Textbox(
172
- label="Lrc",
173
  placeholder="Input the full lyrics",
174
  lines=12,
175
  max_lines=50,
@@ -181,26 +184,23 @@ with gr.Blocks(css=css) as demo:
181
  with gr.Column():
182
  with gr.Accordion("Best Practices Guide", open=True):
183
  gr.Markdown("""
184
- 1. **Lyrics Format Requirements**
185
- - Each line must follow: `[mm:ss.xx]Lyric content`
186
- - Example of valid format:
187
- ```
188
- [00:10.00]Moonlight spills through broken blinds
189
- [00:13.20]Your shadow dances on the dashboard shrine
190
- ```
191
-
192
- 2. **Generation Duration Limits**
193
- - Current version supports maximum **95 seconds** of music generation
194
- - Total timestamps should not exceed 01:35.00 (95 seconds)
195
-
196
- 3. **Audio Prompt Requirements**
197
- - Reference audio should be 1 second, audio >10 seconds will be randomly clipped into 10 seconds
198
- - For optimal results, the 10-second clips should be carefully selected
199
- - Shorter clips may lead to incoherent generation
200
-
201
- 4. **Supported Languages**
202
- - **Chinese and English**
203
- - More languages comming soon
204
  """)
205
 
206
  lyrics_btn = gr.Button("Generate", variant="primary")
@@ -239,23 +239,26 @@ with gr.Blocks(css=css) as demo:
239
  ["./src/prompt/classic_en.wav"],
240
  ["./src/prompt/jazz_cn.wav"],
241
  ["./src/prompt/jazz_en.wav"],
 
 
242
  ["./src/prompt/default.wav"]
243
  ],
244
  inputs=[audio_prompt],
245
  label="Audio Examples",
246
- examples_per_page=11,
247
  elem_id="audio-examples-container"
248
  )
249
 
250
  gr.Examples(
251
  examples=[
252
  ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
253
- ["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""]
 
254
  ],
255
 
256
  inputs=[lrc],
257
  label="Lrc Examples",
258
- examples_per_page=2,
259
  elem_id="lrc-examples-container",
260
  )
261
 
@@ -270,7 +273,7 @@ with gr.Blocks(css=css) as demo:
270
  gr.Markdown("### Method 1: Generate from Theme")
271
  theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
272
  tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
273
- language = gr.Radio(["zh", "en"], label="Language", value="en")
274
  gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
275
 
276
  gr.Examples(
@@ -283,7 +286,7 @@ with gr.Blocks(css=css) as demo:
283
  [
284
  "Heroic Epic",
285
  "choir orchestral powerful",
286
- "zh"
287
  ]
288
  ],
289
  inputs=[theme, tags_gen, language],
@@ -321,7 +324,7 @@ with gr.Blocks(css=css) as demo:
321
 
322
  with gr.Column():
323
  lrc_output = gr.Textbox(
324
- label="Generated LRC Lyrics",
325
  placeholder="Timed lyrics will appear here",
326
  lines=57,
327
  elem_classes="lrc-output",
 
29
  cfm, tokenizer, muq, vae = prepare_model(device)
30
  cfm = torch.compile(cfm)
31
 
32
+ @spaces.GPU(duration=20)
33
  def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
34
 
35
  if randomize_seed:
36
  seed = random.randint(0, MAX_SEED)
37
  torch.manual_seed(seed)
38
  sway_sampling_coef = -1 if steps < 32 else None
39
+ try:
40
+ lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
41
+ style_prompt = get_style_prompt(muq, ref_audio_path)
42
+ except Exception as e:
43
+ raise gr.Error(f"Error: {str(e)}")
44
  negative_style_prompt = get_negative_style_prompt(device)
45
  latent_prompt = get_reference_latent(device, max_frames)
46
  generated_song = inference(cfm_model=cfm,
 
172
  with gr.Row():
173
  with gr.Column():
174
  lrc = gr.Textbox(
175
+ label="Lyrics",
176
  placeholder="Input the full lyrics",
177
  lines=12,
178
  max_lines=50,
 
184
  with gr.Column():
185
  with gr.Accordion("Best Practices Guide", open=True):
186
  gr.Markdown("""
187
+ 1. **Lyrics Format Requirements**
188
+ - Each line must follow: `[mm:ss.xx]Lyric content`
189
+ - Example of valid format:
190
+ ```
191
+ [00:10.00]Moonlight spills through broken blinds
192
+ [00:13.20]Your shadow dances on the dashboard shrine
193
+ ```
194
+ 2. **Generation Duration Limits**
195
+ - Current version supports maximum **95 seconds** of music generation
196
+ - Total timestamps should not exceed 01:35.00 (95 seconds)
197
+ 3. **Audio Prompt Requirements**
198
+ - Reference audio should be ≥ 1 second, audio >10 seconds will be randomly clipped into 10 seconds
199
+ - For optimal results, the 10-second clips should be carefully selected
200
+ - Shorter clips may lead to incoherent generation
201
+ 4. **Supported Languages**
202
+ - **Chinese and English**
203
+ - More languages comming soon
 
 
 
204
  """)
205
 
206
  lyrics_btn = gr.Button("Generate", variant="primary")
 
239
  ["./src/prompt/classic_en.wav"],
240
  ["./src/prompt/jazz_cn.wav"],
241
  ["./src/prompt/jazz_en.wav"],
242
+ ["./src/prompt/rap_cn.wav"],
243
+ ["./src/prompt/rap_en.wav"],
244
  ["./src/prompt/default.wav"]
245
  ],
246
  inputs=[audio_prompt],
247
  label="Audio Examples",
248
+ examples_per_page=13,
249
  elem_id="audio-examples-container"
250
  )
251
 
252
  gr.Examples(
253
  examples=[
254
  ["""[00:10.00]Moonlight spills through broken blinds\n[00:13.20]Your shadow dances on the dashboard shrine\n[00:16.85]Neon ghosts in gasoline rain\n[00:20.40]I hear your laughter down the midnight train\n[00:24.15]Static whispers through frayed wires\n[00:27.65]Guitar strings hum our cathedral choirs\n[00:31.30]Flicker screens show reruns of June\n[00:34.90]I'm drowning in this mercury lagoon\n[00:38.55]Electric veins pulse through concrete skies\n[00:42.10]Your name echoes in the hollow where my heartbeat lies\n[00:45.75]We're satellites trapped in parallel light\n[00:49.25]Burning through the atmosphere of endless night\n[01:00.00]Dusty vinyl spins reverse\n[01:03.45]Our polaroid timeline bleeds through the verse\n[01:07.10]Telescope aimed at dead stars\n[01:10.65]Still tracing constellations through prison bars\n[01:14.30]Electric veins pulse through concrete skies\n[01:17.85]Your name echoes in the hollow where my heartbeat lies\n[01:21.50]We're satellites trapped in parallel light\n[01:25.05]Burning through the atmosphere of endless night\n[02:10.00]Clockwork gears grind moonbeams to rust\n[02:13.50]Our fingerprint smudged by interstellar dust\n[02:17.15]Velvet thunder rolls through my veins\n[02:20.70]Chasing phantom trains through solar plane\n[02:24.35]Electric veins pulse through concrete skies\n[02:27.90]Your name echoes in the hollow where my heartbeat lies"""],
255
+ ["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it"""],
256
+ ["""[00:04.27]只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n"""]
257
  ],
258
 
259
  inputs=[lrc],
260
  label="Lrc Examples",
261
+ examples_per_page=3,
262
  elem_id="lrc-examples-container",
263
  )
264
 
 
273
  gr.Markdown("### Method 1: Generate from Theme")
274
  theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
275
  tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
276
+ language = gr.Radio(["cn", "en"], label="Language", value="en")
277
  gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
278
 
279
  gr.Examples(
 
286
  [
287
  "Heroic Epic",
288
  "choir orchestral powerful",
289
+ "cn"
290
  ]
291
  ],
292
  inputs=[theme, tags_gen, language],
 
324
 
325
  with gr.Column():
326
  lrc_output = gr.Textbox(
327
+ label="Generated LRC",
328
  placeholder="Timed lyrics will appear here",
329
  lines=57,
330
  elem_classes="lrc-output",
diffrhythm/infer/infer_utils.py CHANGED
@@ -56,6 +56,7 @@ def get_style_prompt(model, wav_path):
56
  audio, _ = librosa.load(wav_path, sr=24000)
57
  audio_len = librosa.get_duration(y=audio, sr=24000)
58
 
 
59
  assert audio_len >= 1, "Input audio length shorter than 1 second"
60
 
61
  if audio_len > 10:
 
56
  audio, _ = librosa.load(wav_path, sr=24000)
57
  audio_len = librosa.get_duration(y=audio, sr=24000)
58
 
59
+
60
  assert audio_len >= 1, "Input audio length shorter than 1 second"
61
 
62
  if audio_len > 10:
src/prompt/rap_cn.wav ADDED
Binary file (441 kB). View file
 
src/prompt/rap_en.wav ADDED
Binary file (882 kB). View file