Spaces:
Running
on
Zero
Running
on
Zero
add logo
Browse files- app.py +7 -2
- diffrhythm/infer/infer_utils.py +6 -14
app.py
CHANGED
@@ -139,6 +139,11 @@ css = """
|
|
139 |
|
140 |
with gr.Blocks(css=css) as demo:
|
141 |
gr.HTML(f"""
|
|
|
|
|
|
|
|
|
|
|
142 |
<div style="flex: 1; text-align: center;">
|
143 |
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
|
144 |
Di♪♪Rhythm (谛韵)
|
@@ -194,7 +199,7 @@ with gr.Blocks(css=css) as demo:
|
|
194 |
- Shorter clips may lead to incoherent generation
|
195 |
|
196 |
4. **Supported Languages**
|
197 |
-
- Chinese and English
|
198 |
- More languages comming soon
|
199 |
""")
|
200 |
|
@@ -255,7 +260,7 @@ with gr.Blocks(css=css) as demo:
|
|
255 |
)
|
256 |
|
257 |
# page 2
|
258 |
-
with gr.Tab("
|
259 |
with gr.Row():
|
260 |
with gr.Column():
|
261 |
with gr.Accordion("Notice", open=False):
|
|
|
139 |
|
140 |
with gr.Blocks(css=css) as demo:
|
141 |
gr.HTML(f"""
|
142 |
+
<div style="display: flex; align-items: center;">
|
143 |
+
<img src='https://raw.githubusercontent.com/ASLP-lab/DiffRhythm/refs/heads/main/src/DiffRhythm_logo.jpg'
|
144 |
+
style='width: 200px; height: 40%; display: block; margin: 0 auto 20px;'>
|
145 |
+
</div>
|
146 |
+
|
147 |
<div style="flex: 1; text-align: center;">
|
148 |
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
|
149 |
Di♪♪Rhythm (谛韵)
|
|
|
199 |
- Shorter clips may lead to incoherent generation
|
200 |
|
201 |
4. **Supported Languages**
|
202 |
+
- **Chinese and English**
|
203 |
- More languages comming soon
|
204 |
""")
|
205 |
|
|
|
260 |
)
|
261 |
|
262 |
# page 2
|
263 |
+
with gr.Tab("Lyrics Generate", id=1):
|
264 |
with gr.Row():
|
265 |
with gr.Column():
|
266 |
with gr.Accordion("Notice", open=False):
|
diffrhythm/infer/infer_utils.py
CHANGED
@@ -53,25 +53,17 @@ def get_negative_style_prompt(device):
|
|
53 |
|
54 |
def get_style_prompt(model, wav_path):
|
55 |
mulan = model
|
56 |
-
|
57 |
-
|
58 |
-
if ext == '.mp3':
|
59 |
-
meta = MP3(wav_path)
|
60 |
-
audio_len = meta.info.length
|
61 |
-
elif ext in ['.wav', '.flac']:
|
62 |
-
audio_len = librosa.get_duration(path=wav_path)
|
63 |
-
else:
|
64 |
-
raise ValueError("Unsupported file format: {}".format(ext))
|
65 |
|
66 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
67 |
|
68 |
-
if audio_len
|
69 |
-
|
70 |
-
|
71 |
-
wav, _ = librosa.load(wav_path, sr=24000, offset=start_time, duration=10)
|
72 |
|
73 |
else:
|
74 |
-
wav
|
75 |
wav = torch.tensor(wav).unsqueeze(0).to(model.device)
|
76 |
|
77 |
with torch.no_grad():
|
|
|
53 |
|
54 |
def get_style_prompt(model, wav_path):
|
55 |
mulan = model
|
56 |
+
audio, _ = librosa.load(wav_path, sr=24000)
|
57 |
+
audio_len = librosa.get_duration(y=audio, sr=24000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
assert audio_len >= 1, "Input audio length shorter than 1 second"
|
60 |
|
61 |
+
if audio_len > 10:
|
62 |
+
start_time = int(audio_len // 2 - 5)
|
63 |
+
wav = audio[start_time*24000:(start_time+10)*24000]
|
|
|
64 |
|
65 |
else:
|
66 |
+
wav = audio
|
67 |
wav = torch.tensor(wav).unsqueeze(0).to(model.device)
|
68 |
|
69 |
with torch.no_grad():
|