File size: 3,882 Bytes
de60a6a
458da1c
a0110cc
 
 
bdcf458
ccd4978
 
bdcf458
 
 
 
 
 
 
 
eb0bc41
ccd4978
0e08ca7
 
 
 
ccd4978
 
 
 
 
 
0e08ca7
 
ccd4978
 
 
 
0e08ca7
 
 
 
bdcf458
0e08ca7
 
ccd4978
bdcf458
 
 
ccd4978
bdcf458
ccd4978
 
 
bdcf458
ccd4978
 
d3097eb
ccd4978
763202b
bd89bd4
a0110cc
ccd4978
 
 
 
 
 
 
 
 
d0b2fc8
 
0e08ca7
 
9770bf5
ccd4978
 
0e08ca7
bdcf458
5a86410
763202b
ccd4978
0e08ca7
 
bdcf458
0e08ca7
 
 
 
 
ccd4978
5a86410
ccd4978
ed72842
cb85b93
 
a0110cc
cb85b93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import os
import soundfile as sf
import requests
import librosa.display


def download_file(url):
    file_id = url.split('/')[-2]
    download_url = f'https://docs.google.com/uc?export=download&id={file_id}'
    response = requests.get(download_url, allow_redirects=True)
    local_filename = url.split('/')[-1] + '.wav'
    open(local_filename, 'wb').write(response.content)
    return local_filename


def main():
    with gr.Blocks() as app:
        gr.Markdown(
            """
            <h1><center>Audio Analyzer by Ilaria</center></h1>\n
            <h3><center>Help me on <a href="https://ko-fi.com/ilariaowo/shop">Ko-Fi</a>!</center></h3>\n
            ## Special thanks to Alex Murkoff for helping me code it!
            #### Need help with AI? Join [AI Hub](https://discord.gg/aihub)!\n
            **Note**: Try to keep the audio length under **2 minutes**,
            since long audio files dont work well with a static spectrogram
            """
        )

        with gr.Row():
            image_output = gr.Image(type='filepath', interactive=False)

        with gr.Row():
            with gr.Column():
                audio_input = gr.Audio(type='filepath')
                create_spec_butt = gr.Button(value='Create Spectrogram And Get Info', variant='primary')

            with gr.Column():
                output_markdown = gr.Markdown(value="", visible=True)

                with gr.Accordion('Audio Downloader', open=False):
                    url_input = gr.Textbox(value='', label='Google Drive Audio URL')
                    download_butt = gr.Button(value='Download audio', variant='primary')

                download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
                create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
                                       outputs=[output_markdown, image_output])

        download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
        create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
                               outputs=[output_markdown, image_output])

        app.queue(max_size=1022).launch(share=True)

def create_spectrogram_and_get_info(audio_file):
    plt.clf()

    y, sr = librosa.load(audio_file, sr=None)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=256)
    log_S = librosa.amplitude_to_db(S, ref=np.max, top_db=256)
    plt.figure(figsize=(12, 5.5))
    librosa.display.specshow(log_S, sr=sr, x_axis='time')
    plt.colorbar(format='%+2.0f dB', pad=0.01)
    plt.tight_layout(pad=0.5)
    plt.savefig('spectrogram.png', dpi=500)
    audio_info = sf.info(audio_file)
    bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0)
    minutes, seconds = divmod(audio_info.duration, 60)
    seconds, milliseconds = divmod(seconds, 1)
    milliseconds *= 1000
    # bitrate = audio_info.samplerate * audio_info.channels * bit_depth / 8 / 1024 / 1024
    # this bitrate one doesnt seem to be used anywhere so i just removed it
    speed_in_kbps = audio_info.samplerate * bit_depth / 1000
    filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file))
    info_table = f"""


    | Information | Value |
    | :---: | :---: |
    | File Name | {filename_without_extension} |
    | Duration | {int(minutes)} minutes - {int(seconds)} seconds - {int(milliseconds)} milliseconds |
    | Bitrate | {speed_in_kbps} kbp/s |
    | Audio Channels | {audio_info.channels} |
    | Samples per second | {audio_info.samplerate} Hz |
    | Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s |

    """

    # Return the PNG file of the spectrogram and the info table
    return info_table, 'spectrogram.png'

# Create the Gradio interface
main()