ziqiangao commited on
Commit
d7ff226
·
1 Parent(s): b232319
Files changed (3) hide show
  1. app.py +207 -0
  2. packages.txt +1 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ from PIL import Image, ImageDraw, ImageFont
4
+ from mutagen.mp3 import MP3
5
+ from mutagen.id3 import ID3, APIC, TIT2, TPE1
6
+ import io
7
+ from colorthief import ColorThief
8
+ import colorsys
9
+ import math
10
+ import os
11
+ from multiprocessing import Pool, cpu_count
12
+ import tempfile
13
+ import ffmpeg
14
+ import subprocess
15
+ import traceback
16
+
17
+ path = 'C:/Users/ziqia/Downloads/oooo/' # Update with your path
18
+
19
+ def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
20
+ i = idx - res // 2
21
+ x, y = size[0] * .9 / -2, (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
22
+ c = []
23
+ while i < idx + (res // 2):
24
+ c.append((x, y))
25
+ i += 1
26
+ y = (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
27
+ x += (size[0] * .9) / res
28
+ return c
29
+
30
+ def center_to_top_left(coords, width=1280, height=720):
31
+ new_coords = []
32
+ for x, y in coords:
33
+ new_coords.append(totopleft((x, y), width=width, height=height))
34
+ return new_coords
35
+
36
+ def totopleft(coord, width=1280, height=720):
37
+ return coord[0] + width / 2, height / 2 - coord[1]
38
+
39
+ def getTrigger(ad: int, a: list, max: int = 1024) -> int:
40
+ i = ad
41
+ while not (a[i] < 128 and not a[i + 2] < 128 or i - ad > max):
42
+ i += 1
43
+ return i
44
+
45
+ def extract_cover_image(mp3_file):
46
+ audio = MP3(mp3_file, ID3=ID3)
47
+ for tag in audio.tags.values():
48
+ if isinstance(tag, APIC):
49
+ image_data = tag.data
50
+ cover_image = Image.open(io.BytesIO(image_data))
51
+ return cover_image
52
+ print("No cover image found in the MP3 file.")
53
+ return None
54
+
55
+ def getTitleAndArtist(mp3_file):
56
+ audio = MP3(mp3_file, ID3=ID3)
57
+ title = audio.get('TIT2', TIT2(encoding=3, text='Unknown Title')).text[0]
58
+ artist = audio.get('TPE1', TPE1(encoding=3, text='Unknown Artist')).text[0]
59
+ return title, artist
60
+
61
+ def getColour(img):
62
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
63
+ img.save(tmpfile.name, format="PNG")
64
+ color_thief = ColorThief(tmpfile.name)
65
+ dominant_color = color_thief.get_color(quality=1)
66
+ os.remove(tmpfile.name)
67
+ return dominant_color
68
+
69
+ def clamp(number):
70
+ return max(0, min(number, 1))
71
+
72
+ def normalizeColour(C) -> tuple[int, int, int]:
73
+ cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
74
+ ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
75
+ return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
76
+
77
+ def normalizeColourBar(C) -> tuple[int, int, int]:
78
+ cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
79
+ ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
80
+ return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
81
+
82
+ def stamp_text(draw, text, font, position, align='left'):
83
+ text_bbox = draw.textbbox((0, 0), text, font=font)
84
+ text_width = text_bbox[2] - text_bbox[0]
85
+ text_height = text_bbox[3] - text_bbox[1]
86
+ x, y = position
87
+ y -= text_height // 2
88
+ if align == 'center':
89
+ x -= text_width // 2
90
+ elif align == 'right':
91
+ x -= text_width
92
+
93
+ draw.text((x, y), text, font=font, fill="#fff")
94
+
95
+ def linear_interpolate(start, stop, progress):
96
+ return start + progress * (stop - start)
97
+
98
+ def render_frame(params):
99
+ n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres = params
100
+ num_frames = len(samples_array) // (11025 // fps)
101
+ img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
102
+ d = ImageDraw.Draw(img)
103
+
104
+ s = (11025 // fps) * n
105
+ if s > len(samples_array):
106
+ return
107
+ e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
108
+ d.line(e, fill='#fff', width=2)
109
+
110
+ cs = math.floor(min(width, height) / 2)
111
+ cov = cover_img.resize((cs, cs))
112
+ img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
113
+
114
+ fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)//720))
115
+ fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)//720))
116
+ fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)//720))
117
+
118
+ stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
119
+ stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')
120
+
121
+ d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
122
+ fill=normalizeColourBar(dominant_color), width=15 * height // 360)
123
+ d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
124
+ (linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
125
+ height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)
126
+
127
+ os.makedirs(path+f'out/{name}/', exist_ok=True)
128
+ img.save(path+f'out/{name}/{str(n)}.png', 'PNG',)
129
+
130
+ def RenderVid(af, n, fps=30):
131
+ (ffmpeg
132
+ .input(path+f'out/{n}/%d.png', framerate=fps)
133
+ .input(af)
134
+ .output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
135
+ .run()
136
+ )
137
+
138
+ def main(file, name, fps=30, res: tuple=(1280,720), oscres = 512):
139
+ global iii
140
+ iii = 0
141
+ # Load the audio file
142
+ audio_path = file
143
+ y, sr = librosa.load(audio_path, sr=11025) # Resample to 11025 Hz
144
+ y_u8 = (y * 128 + 128).astype('uint8')
145
+ samples_array = y_u8.tolist()
146
+
147
+ # Extract cover image, title, and artist
148
+ cover_img = extract_cover_image(audio_path)
149
+ if cover_img is None:
150
+ return # Exit if no cover image found
151
+
152
+ title, artist = getTitleAndArtist(audio_path)
153
+ dominant_color = getColour(cover_img)
154
+
155
+ # Frame rendering parameters
156
+ width, height, fps = res[0], res[1], fps
157
+ num_frames = len(samples_array) // (11025 // fps)
158
+
159
+ # Prepare parameters for each frame
160
+ params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres) for n in range(num_frames)]
161
+
162
+ try:
163
+ with Pool(cpu_count()) as pool:
164
+ pool.map(render_frame, params)
165
+ except Exception as e:
166
+ print('Ended in error: ' + traceback.format_exc())
167
+
168
+ print('FFMPEG')
169
+ ffmpeg_cmd = [
170
+ "ffmpeg",
171
+ '-framerate', '30',
172
+ '-i', path+f'out/{name}/%d.png', # Input PNG images
173
+ '-i', f'{file}', # Input MP3 audio
174
+ '-c:v', 'libx264',
175
+ '-r', '30',
176
+ '-pix_fmt', 'yuv420p',
177
+ '-c:a', 'aac',
178
+ '-shortest',
179
+ path+f'{name}.mp4' # Output MP4 filename
180
+ ]
181
+ subprocess.run(ffmpeg_cmd)
182
+
183
+ def gradio_interface(audio_file, output_name, fps=30, resolution='1280x720', oscres=512):
184
+ try:
185
+ res = tuple(map(int, resolution.split('x')))
186
+ main(audio_file, output_name, fps=fps, res=res, oscres=oscres)
187
+ return f"Output video '{output_name}.mp4' has been created. Click the link to download."
188
+ except Exception as e:
189
+ return f"Error processing file: {e}"
190
+
191
+ # Define Gradio interface
192
+ iface = gr.Interface(
193
+ fn=gradio_interface,
194
+ inputs=[
195
+ gr.inputs.File(label="Upload your MP3 file"),
196
+ gr.inputs.Textbox(label="Output Video Name (without extension)"),
197
+ gr.inputs.Number(label="Frames per Second", default=30, min=1, max=60),
198
+ gr.inputs.Textbox(label="Output Resolution (e.g., 1280x720)", default="1280x720"),
199
+ gr.inputs.Number(label="Number of Visualization Segments", default=512)
200
+ ],
201
+ outputs=gr.outputs.Textbox(label="Output"),
202
+ title="MP3 to Video Visualization",
203
+ description="Upload an MP3 file and configure parameters to create a visualization video."
204
+ )
205
+
206
+ # Launch Gradio interface
207
+ iface.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ librosa
2
+ pillow
3
+ mutagen
4
+ Colorthief
5
+ ffmpeg