ing0 commited on
Commit
b311ba6
·
1 Parent(s): 1866ab0
Files changed (2) hide show
  1. app.py +7 -2
  2. diffrhythm/infer/infer_utils.py +6 -14
app.py CHANGED
@@ -139,6 +139,11 @@ css = """
139
 
140
  with gr.Blocks(css=css) as demo:
141
  gr.HTML(f"""
 
 
 
 
 
142
  <div style="flex: 1; text-align: center;">
143
  <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
144
  Di♪♪Rhythm (谛韵)
@@ -194,7 +199,7 @@ with gr.Blocks(css=css) as demo:
194
  - Shorter clips may lead to incoherent generation
195
 
196
  4. **Supported Languages**
197
- - Chinese and English
198
  - More languages comming soon
199
  """)
200
 
@@ -255,7 +260,7 @@ with gr.Blocks(css=css) as demo:
255
  )
256
 
257
  # page 2
258
- with gr.Tab("LLM Generate LRC", id=1):
259
  with gr.Row():
260
  with gr.Column():
261
  with gr.Accordion("Notice", open=False):
 
139
 
140
  with gr.Blocks(css=css) as demo:
141
  gr.HTML(f"""
142
+ <div style="display: flex; align-items: center;">
143
+ <img src='https://raw.githubusercontent.com/ASLP-lab/DiffRhythm/refs/heads/main/src/DiffRhythm_logo.jpg'
144
+ style='width: 200px; height: 40%; display: block; margin: 0 auto 20px;'>
145
+ </div>
146
+
147
  <div style="flex: 1; text-align: center;">
148
  <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
149
  Di♪♪Rhythm (谛韵)
 
199
  - Shorter clips may lead to incoherent generation
200
 
201
  4. **Supported Languages**
202
+ - **Chinese and English**
203
  - More languages comming soon
204
  """)
205
 
 
260
  )
261
 
262
  # page 2
263
+ with gr.Tab("Lyrics Generate", id=1):
264
  with gr.Row():
265
  with gr.Column():
266
  with gr.Accordion("Notice", open=False):
diffrhythm/infer/infer_utils.py CHANGED
@@ -53,25 +53,17 @@ def get_negative_style_prompt(device):
53
 
54
  def get_style_prompt(model, wav_path):
55
  mulan = model
56
-
57
- ext = os.path.splitext(wav_path)[-1].lower()
58
- if ext == '.mp3':
59
- meta = MP3(wav_path)
60
- audio_len = meta.info.length
61
- elif ext in ['.wav', '.flac']:
62
- audio_len = librosa.get_duration(path=wav_path)
63
- else:
64
- raise ValueError("Unsupported file format: {}".format(ext))
65
 
66
  assert audio_len >= 1, "Input audio length shorter than 1 second"
67
 
68
- if audio_len >= 10:
69
- mid_time = audio_len // 2
70
- start_time = mid_time - 5
71
- wav, _ = librosa.load(wav_path, sr=24000, offset=start_time, duration=10)
72
 
73
  else:
74
- wav, _ = librosa.load(wav_path, sr=24000)
75
  wav = torch.tensor(wav).unsqueeze(0).to(model.device)
76
 
77
  with torch.no_grad():
 
53
 
54
  def get_style_prompt(model, wav_path):
55
  mulan = model
56
+ audio, _ = librosa.load(wav_path, sr=24000)
57
+ audio_len = librosa.get_duration(y=audio, sr=24000)
 
 
 
 
 
 
 
58
 
59
  assert audio_len >= 1, "Input audio length shorter than 1 second"
60
 
61
+ if audio_len > 10:
62
+ start_time = int(audio_len // 2 - 5)
63
+ wav = audio[start_time*24000:(start_time+10)*24000]
 
64
 
65
  else:
66
+ wav = audio
67
  wav = torch.tensor(wav).unsqueeze(0).to(model.device)
68
 
69
  with torch.no_grad():