Spaces:
Sleeping
Sleeping
Create simpler_app.py
Browse files- simpler_app.py +126 -0
simpler_app.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
# from volcenginesdkarkruntime import Ark
|
5 |
+
import torch
|
6 |
+
import torchaudio
|
7 |
+
from einops import rearrange
|
8 |
+
import argparse
|
9 |
+
import json
|
10 |
+
import os
|
11 |
+
import spaces
|
12 |
+
from tqdm import tqdm
|
13 |
+
import random
|
14 |
+
import numpy as np
|
15 |
+
import sys
|
16 |
+
import base64
|
17 |
+
from diffrhythm.infer.infer_utils import (
|
18 |
+
get_reference_latent,
|
19 |
+
get_lrc_token,
|
20 |
+
get_style_prompt,
|
21 |
+
prepare_model,
|
22 |
+
get_negative_style_prompt
|
23 |
+
)
|
24 |
+
from diffrhythm.infer.infer import inference
|
25 |
+
|
26 |
+
MAX_SEED = np.iinfo(np.int32).max
|
27 |
+
device='cuda'
|
28 |
+
cfm, tokenizer, muq, vae = prepare_model(device)
|
29 |
+
cfm = torch.compile(cfm)
|
30 |
+
|
31 |
+
def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
|
32 |
+
|
33 |
+
if randomize_seed:
|
34 |
+
seed = random.randint(0, MAX_SEED)
|
35 |
+
torch.manual_seed(seed)
|
36 |
+
sway_sampling_coef = -1 if steps < 32 else None
|
37 |
+
lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
|
38 |
+
style_prompt = get_style_prompt(muq, ref_audio_path)
|
39 |
+
negative_style_prompt = get_negative_style_prompt(device)
|
40 |
+
latent_prompt = get_reference_latent(device, max_frames)
|
41 |
+
generated_song = inference(cfm_model=cfm,
|
42 |
+
vae_model=vae,
|
43 |
+
cond=latent_prompt,
|
44 |
+
text=lrc_prompt,
|
45 |
+
duration=max_frames,
|
46 |
+
style_prompt=style_prompt,
|
47 |
+
negative_style_prompt=negative_style_prompt,
|
48 |
+
steps=steps,
|
49 |
+
sway_sampling_coef=sway_sampling_coef,
|
50 |
+
start_time=start_time,
|
51 |
+
file_type=file_type
|
52 |
+
)
|
53 |
+
return generated_song
|
54 |
+
|
55 |
+
import re
|
56 |
+
from transformers import pipeline
|
57 |
+
|
58 |
+
zephyr_model = "HuggingFaceH4/zephyr-7b-beta"
|
59 |
+
mixtral_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
60 |
+
|
61 |
+
pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
def prepare_lyrics_with_llm(theme, tags, lyrics):
|
66 |
+
|
67 |
+
language = "English"
|
68 |
+
standard_sys = f"""
|
69 |
+
Please generate a complete song with lyrics in {language}, following the {tags} style and centered around the theme "{theme}".
|
70 |
+
If {lyrics} is provided, format it accordingly.
|
71 |
+
If {lyrics} is None, generate original lyrics based on the given theme and style.
|
72 |
+
|
73 |
+
Strictly adhere to the following requirements:
|
74 |
+
|
75 |
+
### Mandatory Formatting Rules
|
76 |
+
1. Only output the formatted lyrics—do not include any explanations, introductions, or additional messages.
|
77 |
+
2. Only include timestamps and lyrics. Do not use brackets, side notes, or section markers (e.g., chorus, instrumental, outro).
|
78 |
+
3. Each line must follow the format [mm:ss.xx]Lyrics content, with no spaces between the timestamp and lyrics. The lyrics should be continuous and complete.
|
79 |
+
4. The total song length must not exceed 1 minute 30 seconds.
|
80 |
+
5. Timestamps should be naturally distributed. The first lyric must not start at [00:00.00]—consider an intro before the lyrics begin.
|
81 |
+
|
82 |
+
### Prohibited Examples (Do Not Include)
|
83 |
+
- Incorrect: [01:30.00](Piano solo)
|
84 |
+
- Incorrect: [00:45.00][Chorus]
|
85 |
+
"""
|
86 |
+
|
87 |
+
instruction = f"""
|
88 |
+
<|system|>
|
89 |
+
{standard_sys}</s>
|
90 |
+
<|user|>
|
91 |
+
theme: {theme}
|
92 |
+
tags: {tags}
|
93 |
+
lyrics: {lyrics}
|
94 |
+
"""
|
95 |
+
|
96 |
+
prompt = f"{instruction.strip()}</s>"
|
97 |
+
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
98 |
+
pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
|
99 |
+
cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
|
100 |
+
|
101 |
+
print(f"SUGGESTED Lyrics: {cleaned_text}")
|
102 |
+
return cleaned_text.lstrip("\n")
|
103 |
+
|
104 |
+
def general_process(theme, tags, lyrics):
|
105 |
+
result = prepare_lyrics_with_llm(theme, tags, lyrics)
|
106 |
+
return None, result
|
107 |
+
|
108 |
+
|
109 |
+
with gr.Blocks(css=css) as demo:
|
110 |
+
with gr.Column():
|
111 |
+
gr.Markdown("# Simpler Diff Rythm")
|
112 |
+
|
113 |
+
theme_song = gr.Textbox(label="Theme")
|
114 |
+
style_tags = gr.Textbox(label="Music style tags")
|
115 |
+
lyrics = gr.Textbox(label="Lyrics optional")
|
116 |
+
submit_btn = gr.Button("Submit")
|
117 |
+
song_result = gr.Audio(label="Song result")
|
118 |
+
generated_lyrics = gr.Textbox(label="Generated Lyrics")
|
119 |
+
|
120 |
+
submit_btn.click(
|
121 |
+
fn = general_process,
|
122 |
+
inputs = [theme_song, style_tags, lyrics],
|
123 |
+
outputs = [song_result, generated_lyrics]
|
124 |
+
)
|
125 |
+
|
126 |
+
demo.queue().launch(show_api=False, show_error=True, ssr_mode=False)
|