fffiloni commited on
Commit
1f45bd9
·
verified ·
1 Parent(s): 1866ab0

Create simpler_app.py

Browse files
Files changed (1) hide show
  1. simpler_app.py +126 -0
simpler_app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ # from volcenginesdkarkruntime import Ark
5
+ import torch
6
+ import torchaudio
7
+ from einops import rearrange
8
+ import argparse
9
+ import json
10
+ import os
11
+ import spaces
12
+ from tqdm import tqdm
13
+ import random
14
+ import numpy as np
15
+ import sys
16
+ import base64
17
+ from diffrhythm.infer.infer_utils import (
18
+ get_reference_latent,
19
+ get_lrc_token,
20
+ get_style_prompt,
21
+ prepare_model,
22
+ get_negative_style_prompt
23
+ )
24
+ from diffrhythm.infer.infer import inference
25
+
26
+ MAX_SEED = np.iinfo(np.int32).max
27
+ device='cuda'
28
+ cfm, tokenizer, muq, vae = prepare_model(device)
29
+ cfm = torch.compile(cfm)
30
+
31
+ def infer_music(lrc, ref_audio_path, seed=42, randomize_seed=False, steps=32, file_type='wav', max_frames=2048, device='cuda'):
32
+
33
+ if randomize_seed:
34
+ seed = random.randint(0, MAX_SEED)
35
+ torch.manual_seed(seed)
36
+ sway_sampling_coef = -1 if steps < 32 else None
37
+ lrc_prompt, start_time = get_lrc_token(lrc, tokenizer, device)
38
+ style_prompt = get_style_prompt(muq, ref_audio_path)
39
+ negative_style_prompt = get_negative_style_prompt(device)
40
+ latent_prompt = get_reference_latent(device, max_frames)
41
+ generated_song = inference(cfm_model=cfm,
42
+ vae_model=vae,
43
+ cond=latent_prompt,
44
+ text=lrc_prompt,
45
+ duration=max_frames,
46
+ style_prompt=style_prompt,
47
+ negative_style_prompt=negative_style_prompt,
48
+ steps=steps,
49
+ sway_sampling_coef=sway_sampling_coef,
50
+ start_time=start_time,
51
+ file_type=file_type
52
+ )
53
+ return generated_song
54
+
55
+ import re
56
+ from transformers import pipeline
57
+
58
+ zephyr_model = "HuggingFaceH4/zephyr-7b-beta"
59
+ mixtral_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
60
+
61
+ pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
62
+
63
+
64
+
65
+ def prepare_lyrics_with_llm(theme, tags, lyrics):
66
+
67
+ language = "English"
68
+ standard_sys = f"""
69
+ Please generate a complete song with lyrics in {language}, following the {tags} style and centered around the theme "{theme}".
70
+ If {lyrics} is provided, format it accordingly.
71
+ If {lyrics} is None, generate original lyrics based on the given theme and style.
72
+
73
+ Strictly adhere to the following requirements:
74
+
75
+ ### Mandatory Formatting Rules
76
+ 1. Only output the formatted lyrics—do not include any explanations, introductions, or additional messages.
77
+ 2. Only include timestamps and lyrics. Do not use brackets, side notes, or section markers (e.g., chorus, instrumental, outro).
78
+ 3. Each line must follow the format [mm:ss.xx]Lyrics content, with no spaces between the timestamp and lyrics. The lyrics should be continuous and complete.
79
+ 4. The total song length must not exceed 1 minute 30 seconds.
80
+ 5. Timestamps should be naturally distributed. The first lyric must not start at [00:00.00]—consider an intro before the lyrics begin.
81
+
82
+ ### Prohibited Examples (Do Not Include)
83
+ - Incorrect: [01:30.00](Piano solo)
84
+ - Incorrect: [00:45.00][Chorus]
85
+ """
86
+
87
+ instruction = f"""
88
+ <|system|>
89
+ {standard_sys}</s>
90
+ <|user|>
91
+ theme: {theme}
92
+ tags: {tags}
93
+ lyrics: {lyrics}
94
+ """
95
+
96
+ prompt = f"{instruction.strip()}</s>"
97
+ outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
98
+ pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
99
+ cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
100
+
101
+ print(f"SUGGESTED Lyrics: {cleaned_text}")
102
+ return cleaned_text.lstrip("\n")
103
+
104
+ def general_process(theme, tags, lyrics):
105
+ result = prepare_lyrics_with_llm(theme, tags, lyrics)
106
+ return None, result
107
+
108
+
109
+ with gr.Blocks(css=css) as demo:
110
+ with gr.Column():
111
+ gr.Markdown("# Simpler Diff Rythm")
112
+
113
+ theme_song = gr.Textbox(label="Theme")
114
+ style_tags = gr.Textbox(label="Music style tags")
115
+ lyrics = gr.Textbox(label="Lyrics optional")
116
+ submit_btn = gr.Button("Submit")
117
+ song_result = gr.Audio(label="Song result")
118
+ generated_lyrics = gr.Textbox(label="Generated Lyrics")
119
+
120
+ submit_btn.click(
121
+ fn = general_process,
122
+ inputs = [theme_song, style_tags, lyrics],
123
+ outputs = [song_result, generated_lyrics]
124
+ )
125
+
126
+ demo.queue().launch(show_api=False, show_error=True, ssr_mode=False)