Spaces:

ziqiangao
/

musicscopegen

Running

App Files Files Community

ziqiangao commited on Jul 16, 2024

Commit

d7ff226

1 Parent(s): b232319

add app

Browse files

Files changed (3) hide show

app.py +207 -0
packages.txt +1 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import gradio as gr
+import librosa
+from PIL import Image, ImageDraw, ImageFont
+from mutagen.mp3 import MP3
+from mutagen.id3 import ID3, APIC, TIT2, TPE1
+import io
+from colorthief import ColorThief
+import colorsys
+import math
+import os
+from multiprocessing import Pool, cpu_count
+import tempfile
+import ffmpeg
+import subprocess
+import traceback
+path = 'C:/Users/ziqia/Downloads/oooo/'  # Update with your path
+def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
+    i = idx - res // 2
+    x, y = size[0] * .9 / -2, (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
+    c = []
+    while i < idx + (res // 2):
+        c.append((x, y))
+        i += 1
+        y = (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
+        x += (size[0] * .9) / res
+    return c
+def center_to_top_left(coords, width=1280, height=720):
+    new_coords = []
+    for x, y in coords:
+        new_coords.append(totopleft((x, y), width=width, height=height))
+    return new_coords
+def totopleft(coord, width=1280, height=720):
+    return coord[0] + width / 2, height / 2 - coord[1]
+def getTrigger(ad: int, a: list, max: int = 1024) -> int:
+    i = ad
+    while not (a[i] < 128 and not a[i + 2] < 128 or i - ad > max):
+        i += 1
+    return i
+def extract_cover_image(mp3_file):
+    audio = MP3(mp3_file, ID3=ID3)
+    for tag in audio.tags.values():
+        if isinstance(tag, APIC):
+            image_data = tag.data
+            cover_image = Image.open(io.BytesIO(image_data))
+            return cover_image
+    print("No cover image found in the MP3 file.")
+    return None
+def getTitleAndArtist(mp3_file):
+    audio = MP3(mp3_file, ID3=ID3)
+    title = audio.get('TIT2', TIT2(encoding=3, text='Unknown Title')).text[0]
+    artist = audio.get('TPE1', TPE1(encoding=3, text='Unknown Artist')).text[0]
+    return title, artist
+def getColour(img):
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
+        img.save(tmpfile.name, format="PNG")
+        color_thief = ColorThief(tmpfile.name)
+        dominant_color = color_thief.get_color(quality=1)
+    os.remove(tmpfile.name)
+    return dominant_color
+def clamp(number):
+    return max(0, min(number, 1))
+def normalizeColour(C) -> tuple[int, int, int]:
+    cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
+    ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
+    return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
+def normalizeColourBar(C) -> tuple[int, int, int]:
+    cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
+    ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
+    return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
+def stamp_text(draw, text, font, position, align='left'):
+    text_bbox = draw.textbbox((0, 0), text, font=font)
+    text_width = text_bbox[2] - text_bbox[0]
+    text_height = text_bbox[3] - text_bbox[1]
+    x, y = position
+    y -= text_height // 2
+    if align == 'center':
+        x -= text_width // 2
+    elif align == 'right':
+        x -= text_width
+    draw.text((x, y), text, font=font, fill="#fff")
+def linear_interpolate(start, stop, progress):
+    return start + progress * (stop - start)
+def render_frame(params):
+    n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres = params
+    num_frames = len(samples_array) // (11025 // fps)
+    img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
+    d = ImageDraw.Draw(img)
+    s = (11025 // fps) * n
+    if s > len(samples_array):
+        return
+    e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
+    d.line(e, fill='#fff', width=2)
+    cs = math.floor(min(width, height) / 2)
+    cov = cover_img.resize((cs, cs))
+    img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
+    fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)//720))
+    fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)//720))
+    fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)//720))
+    stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
+    stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')
+    d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
+           fill=normalizeColourBar(dominant_color), width=15 * height // 360)
+    d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
+                               (linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
+                                height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)
+    os.makedirs(path+f'out/{name}/', exist_ok=True)
+    img.save(path+f'out/{name}/{str(n)}.png', 'PNG',)
+def RenderVid(af, n, fps=30):
+    (ffmpeg
+     .input(path+f'out/{n}/%d.png', framerate=fps)
+     .input(af)
+     .output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
+     .run()
+     )
+def main(file, name, fps=30, res: tuple=(1280,720), oscres = 512):
+    global iii
+    iii = 0
+    # Load the audio file
+    audio_path = file
+    y, sr = librosa.load(audio_path, sr=11025)  # Resample to 11025 Hz
+    y_u8 = (y * 128 + 128).astype('uint8')
+    samples_array = y_u8.tolist()
+    # Extract cover image, title, and artist
+    cover_img = extract_cover_image(audio_path)
+    if cover_img is None:
+        return  # Exit if no cover image found
+    title, artist = getTitleAndArtist(audio_path)
+    dominant_color = getColour(cover_img)
+    # Frame rendering parameters
+    width, height, fps = res[0], res[1], fps
+    num_frames = len(samples_array) // (11025 // fps)
+    # Prepare parameters for each frame
+    params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres) for n in range(num_frames)]
+    try:
+        with Pool(cpu_count()) as pool:
+            pool.map(render_frame, params)
+    except Exception as e:
+        print('Ended in error: ' + traceback.format_exc())
+    print('FFMPEG')
+    ffmpeg_cmd = [
+        "ffmpeg",
+        '-framerate', '30',
+        '-i', path+f'out/{name}/%d.png',  # Input PNG images
+        '-i', f'{file}',              # Input MP3 audio
+        '-c:v', 'libx264',
+        '-r', '30',
+        '-pix_fmt', 'yuv420p',
+        '-c:a', 'aac',
+        '-shortest',
+        path+f'{name}.mp4'  # Output MP4 filename
+    ]
+    subprocess.run(ffmpeg_cmd)
+def gradio_interface(audio_file, output_name, fps=30, resolution='1280x720', oscres=512):
+    try:
+        res = tuple(map(int, resolution.split('x')))
+        main(audio_file, output_name, fps=fps, res=res, oscres=oscres)
+        return f"Output video '{output_name}.mp4' has been created. Click the link to download."
+    except Exception as e:
+        return f"Error processing file: {e}"
+# Define Gradio interface
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.inputs.File(label="Upload your MP3 file"),
+        gr.inputs.Textbox(label="Output Video Name (without extension)"),
+        gr.inputs.Number(label="Frames per Second", default=30, min=1, max=60),
+        gr.inputs.Textbox(label="Output Resolution (e.g., 1280x720)", default="1280x720"),
+        gr.inputs.Number(label="Number of Visualization Segments", default=512)
+    ],
+    outputs=gr.outputs.Textbox(label="Output"),
+    title="MP3 to Video Visualization",
+    description="Upload an MP3 file and configure parameters to create a visualization video."
+)
+# Launch Gradio interface
+iface.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+librosa
+pillow
+mutagen
+Colorthief
+ffmpeg