Spaces:
Running
Running
ziqiangao
commited on
Commit
·
d7ff226
1
Parent(s):
b232319
add app
Browse files- app.py +207 -0
- packages.txt +1 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import librosa
|
3 |
+
from PIL import Image, ImageDraw, ImageFont
|
4 |
+
from mutagen.mp3 import MP3
|
5 |
+
from mutagen.id3 import ID3, APIC, TIT2, TPE1
|
6 |
+
import io
|
7 |
+
from colorthief import ColorThief
|
8 |
+
import colorsys
|
9 |
+
import math
|
10 |
+
import os
|
11 |
+
from multiprocessing import Pool, cpu_count
|
12 |
+
import tempfile
|
13 |
+
import ffmpeg
|
14 |
+
import subprocess
|
15 |
+
import traceback
|
16 |
+
|
17 |
+
path = 'C:/Users/ziqia/Downloads/oooo/' # Update with your path
|
18 |
+
|
19 |
+
def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
|
20 |
+
i = idx - res // 2
|
21 |
+
x, y = size[0] * .9 / -2, (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
|
22 |
+
c = []
|
23 |
+
while i < idx + (res // 2):
|
24 |
+
c.append((x, y))
|
25 |
+
i += 1
|
26 |
+
y = (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
|
27 |
+
x += (size[0] * .9) / res
|
28 |
+
return c
|
29 |
+
|
30 |
+
def center_to_top_left(coords, width=1280, height=720):
|
31 |
+
new_coords = []
|
32 |
+
for x, y in coords:
|
33 |
+
new_coords.append(totopleft((x, y), width=width, height=height))
|
34 |
+
return new_coords
|
35 |
+
|
36 |
+
def totopleft(coord, width=1280, height=720):
|
37 |
+
return coord[0] + width / 2, height / 2 - coord[1]
|
38 |
+
|
39 |
+
def getTrigger(ad: int, a: list, max: int = 1024) -> int:
|
40 |
+
i = ad
|
41 |
+
while not (a[i] < 128 and not a[i + 2] < 128 or i - ad > max):
|
42 |
+
i += 1
|
43 |
+
return i
|
44 |
+
|
45 |
+
def extract_cover_image(mp3_file):
|
46 |
+
audio = MP3(mp3_file, ID3=ID3)
|
47 |
+
for tag in audio.tags.values():
|
48 |
+
if isinstance(tag, APIC):
|
49 |
+
image_data = tag.data
|
50 |
+
cover_image = Image.open(io.BytesIO(image_data))
|
51 |
+
return cover_image
|
52 |
+
print("No cover image found in the MP3 file.")
|
53 |
+
return None
|
54 |
+
|
55 |
+
def getTitleAndArtist(mp3_file):
|
56 |
+
audio = MP3(mp3_file, ID3=ID3)
|
57 |
+
title = audio.get('TIT2', TIT2(encoding=3, text='Unknown Title')).text[0]
|
58 |
+
artist = audio.get('TPE1', TPE1(encoding=3, text='Unknown Artist')).text[0]
|
59 |
+
return title, artist
|
60 |
+
|
61 |
+
def getColour(img):
|
62 |
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
|
63 |
+
img.save(tmpfile.name, format="PNG")
|
64 |
+
color_thief = ColorThief(tmpfile.name)
|
65 |
+
dominant_color = color_thief.get_color(quality=1)
|
66 |
+
os.remove(tmpfile.name)
|
67 |
+
return dominant_color
|
68 |
+
|
69 |
+
def clamp(number):
|
70 |
+
return max(0, min(number, 1))
|
71 |
+
|
72 |
+
def normalizeColour(C) -> tuple[int, int, int]:
|
73 |
+
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
|
74 |
+
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
|
75 |
+
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
|
76 |
+
|
77 |
+
def normalizeColourBar(C) -> tuple[int, int, int]:
|
78 |
+
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
|
79 |
+
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
|
80 |
+
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
|
81 |
+
|
82 |
+
def stamp_text(draw, text, font, position, align='left'):
|
83 |
+
text_bbox = draw.textbbox((0, 0), text, font=font)
|
84 |
+
text_width = text_bbox[2] - text_bbox[0]
|
85 |
+
text_height = text_bbox[3] - text_bbox[1]
|
86 |
+
x, y = position
|
87 |
+
y -= text_height // 2
|
88 |
+
if align == 'center':
|
89 |
+
x -= text_width // 2
|
90 |
+
elif align == 'right':
|
91 |
+
x -= text_width
|
92 |
+
|
93 |
+
draw.text((x, y), text, font=font, fill="#fff")
|
94 |
+
|
95 |
+
def linear_interpolate(start, stop, progress):
|
96 |
+
return start + progress * (stop - start)
|
97 |
+
|
98 |
+
def render_frame(params):
|
99 |
+
n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres = params
|
100 |
+
num_frames = len(samples_array) // (11025 // fps)
|
101 |
+
img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
|
102 |
+
d = ImageDraw.Draw(img)
|
103 |
+
|
104 |
+
s = (11025 // fps) * n
|
105 |
+
if s > len(samples_array):
|
106 |
+
return
|
107 |
+
e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
|
108 |
+
d.line(e, fill='#fff', width=2)
|
109 |
+
|
110 |
+
cs = math.floor(min(width, height) / 2)
|
111 |
+
cov = cover_img.resize((cs, cs))
|
112 |
+
img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
|
113 |
+
|
114 |
+
fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)//720))
|
115 |
+
fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)//720))
|
116 |
+
fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)//720))
|
117 |
+
|
118 |
+
stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
|
119 |
+
stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')
|
120 |
+
|
121 |
+
d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
|
122 |
+
fill=normalizeColourBar(dominant_color), width=15 * height // 360)
|
123 |
+
d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
|
124 |
+
(linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
|
125 |
+
height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)
|
126 |
+
|
127 |
+
os.makedirs(path+f'out/{name}/', exist_ok=True)
|
128 |
+
img.save(path+f'out/{name}/{str(n)}.png', 'PNG',)
|
129 |
+
|
130 |
+
def RenderVid(af, n, fps=30):
|
131 |
+
(ffmpeg
|
132 |
+
.input(path+f'out/{n}/%d.png', framerate=fps)
|
133 |
+
.input(af)
|
134 |
+
.output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
|
135 |
+
.run()
|
136 |
+
)
|
137 |
+
|
138 |
+
def main(file, name, fps=30, res: tuple=(1280,720), oscres = 512):
|
139 |
+
global iii
|
140 |
+
iii = 0
|
141 |
+
# Load the audio file
|
142 |
+
audio_path = file
|
143 |
+
y, sr = librosa.load(audio_path, sr=11025) # Resample to 11025 Hz
|
144 |
+
y_u8 = (y * 128 + 128).astype('uint8')
|
145 |
+
samples_array = y_u8.tolist()
|
146 |
+
|
147 |
+
# Extract cover image, title, and artist
|
148 |
+
cover_img = extract_cover_image(audio_path)
|
149 |
+
if cover_img is None:
|
150 |
+
return # Exit if no cover image found
|
151 |
+
|
152 |
+
title, artist = getTitleAndArtist(audio_path)
|
153 |
+
dominant_color = getColour(cover_img)
|
154 |
+
|
155 |
+
# Frame rendering parameters
|
156 |
+
width, height, fps = res[0], res[1], fps
|
157 |
+
num_frames = len(samples_array) // (11025 // fps)
|
158 |
+
|
159 |
+
# Prepare parameters for each frame
|
160 |
+
params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres) for n in range(num_frames)]
|
161 |
+
|
162 |
+
try:
|
163 |
+
with Pool(cpu_count()) as pool:
|
164 |
+
pool.map(render_frame, params)
|
165 |
+
except Exception as e:
|
166 |
+
print('Ended in error: ' + traceback.format_exc())
|
167 |
+
|
168 |
+
print('FFMPEG')
|
169 |
+
ffmpeg_cmd = [
|
170 |
+
"ffmpeg",
|
171 |
+
'-framerate', '30',
|
172 |
+
'-i', path+f'out/{name}/%d.png', # Input PNG images
|
173 |
+
'-i', f'{file}', # Input MP3 audio
|
174 |
+
'-c:v', 'libx264',
|
175 |
+
'-r', '30',
|
176 |
+
'-pix_fmt', 'yuv420p',
|
177 |
+
'-c:a', 'aac',
|
178 |
+
'-shortest',
|
179 |
+
path+f'{name}.mp4' # Output MP4 filename
|
180 |
+
]
|
181 |
+
subprocess.run(ffmpeg_cmd)
|
182 |
+
|
183 |
+
def gradio_interface(audio_file, output_name, fps=30, resolution='1280x720', oscres=512):
|
184 |
+
try:
|
185 |
+
res = tuple(map(int, resolution.split('x')))
|
186 |
+
main(audio_file, output_name, fps=fps, res=res, oscres=oscres)
|
187 |
+
return f"Output video '{output_name}.mp4' has been created. Click the link to download."
|
188 |
+
except Exception as e:
|
189 |
+
return f"Error processing file: {e}"
|
190 |
+
|
191 |
+
# Define Gradio interface
|
192 |
+
iface = gr.Interface(
|
193 |
+
fn=gradio_interface,
|
194 |
+
inputs=[
|
195 |
+
gr.inputs.File(label="Upload your MP3 file"),
|
196 |
+
gr.inputs.Textbox(label="Output Video Name (without extension)"),
|
197 |
+
gr.inputs.Number(label="Frames per Second", default=30, min=1, max=60),
|
198 |
+
gr.inputs.Textbox(label="Output Resolution (e.g., 1280x720)", default="1280x720"),
|
199 |
+
gr.inputs.Number(label="Number of Visualization Segments", default=512)
|
200 |
+
],
|
201 |
+
outputs=gr.outputs.Textbox(label="Output"),
|
202 |
+
title="MP3 to Video Visualization",
|
203 |
+
description="Upload an MP3 file and configure parameters to create a visualization video."
|
204 |
+
)
|
205 |
+
|
206 |
+
# Launch Gradio interface
|
207 |
+
iface.launch()
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
librosa
|
2 |
+
pillow
|
3 |
+
mutagen
|
4 |
+
Colorthief
|
5 |
+
ffmpeg
|