File size: 9,621 Bytes
0bb0d8e
 
 
16bc3e4
0bb0d8e
da9138c
 
 
64259e4
0bb0d8e
16bc3e4
d274746
 
 
 
 
 
16bc3e4
 
 
 
 
 
da9138c
16bc3e4
d274746
da9138c
a96aeb1
d274746
da9138c
d274746
 
 
 
 
 
 
 
 
 
 
 
 
 
a96aeb1
d274746
 
 
 
a96aeb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d274746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a96aeb1
 
 
 
64259e4
a96aeb1
 
 
 
 
 
 
 
 
 
64259e4
a96aeb1
 
 
 
 
 
 
 
64259e4
d274746
 
 
 
 
 
 
 
da9138c
d274746
 
 
 
 
 
 
 
 
 
64259e4
d274746
64259e4
d274746
 
 
 
64259e4
 
 
 
 
 
 
 
a96aeb1
 
 
 
 
 
 
 
d274746
 
 
 
a96aeb1
 
 
 
64259e4
 
 
 
 
a96aeb1
64259e4
 
 
a96aeb1
d274746
64259e4
a96aeb1
 
64259e4
d274746
 
64259e4
d274746
 
 
 
 
64259e4
 
d274746
 
 
 
 
 
 
 
64259e4
 
a96aeb1
d274746
 
 
a96aeb1
 
d274746
a96aeb1
 
64259e4
a96aeb1
 
 
 
 
64259e4
 
 
 
 
16bc3e4
d274746
 
 
 
 
 
16bc3e4
 
 
 
0021652
d274746
64259e4
 
 
 
d274746
 
 
 
64259e4
16bc3e4
0021652
d274746
da9138c
64259e4
 
 
da9138c
a96aeb1
16bc3e4
 
64259e4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
import gradio as gr
import os
import time
import sys
import subprocess
import tempfile
import requests
from urllib.parse import urlparse
from pydub import AudioSegment

# Clone and install faster-whisper from GitHub
try:
    subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
    subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
except subprocess.CalledProcessError as e:
    print(f"Error during faster-whisper installation: {e}")
    sys.exit(1)

# Add the faster-whisper directory to the Python path
sys.path.append("./faster-whisper")

from faster_whisper import WhisperModel
from faster_whisper.transcribe import BatchedInferencePipeline
import yt_dlp

def download_audio(url, method_choice):
    parsed_url = urlparse(url)
    if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
        return download_youtube_audio(url, method_choice)
    else:
        return download_direct_audio(url, method_choice)

# Additional YouTube download methods
def download_youtube_audio(url, method_choice):
    methods = {
        'yt-dlp': youtube_dl_method,
        'pytube': pytube_method,
        'youtube-dl': youtube_dl_classic_method,
        'yt-dlp-alt': youtube_dl_alternative_method,
        'ffmpeg': ffmpeg_method,
        'aria2': aria2_method
    }
    
    method = methods.get(method_choice, youtube_dl_method)
    
    try:
        return method(url)
    except Exception as e:
        return f"Error downloading using {method_choice}: {str(e)}"

def youtube_dl_method(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': '%(id)s.%(ext)s',
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        return f"{info['id']}.mp3"

def pytube_method(url):
    from pytube import YouTube
    yt = YouTube(url)
    audio_stream = yt.streams.filter(only_audio=True).first()
    out_file = audio_stream.download()
    base, ext = os.path.splitext(out_file)
    new_file = base + '.mp3'
    os.rename(out_file, new_file)
    return new_file

def youtube_dl_classic_method(url):
    # Classic youtube-dl method
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': '%(id)s.%(ext)s',
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        return f"{info['id']}.mp3"

def youtube_dl_alternative_method(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio', 
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': '%(id)s.%(ext)s',
        'no_warnings': True,
        'quiet': True,
        'no_check_certificate': True,
        'prefer_insecure': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True) 
        return f"{info['id']}.mp3"

def ffmpeg_method(url):
    output_file = tempfile.mktemp(suffix='.mp3')
    command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
    subprocess.run(command, check=True, capture_output=True)
    return output_file

def aria2_method(url):  
    output_file = tempfile.mktemp(suffix='.mp3')
    command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
    subprocess.run(command, check=True, capture_output=True)
    return output_file

def download_direct_audio(url, method_choice):
    if method_choice == 'wget':
        return wget_method(url)
    else:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
                    temp_file.write(response.content)
                    return temp_file.name
            else:
                raise Exception(f"Failed to download audio from {url}")
        except Exception as e:
            return f"Error downloading direct audio: {str(e)}"
        
def wget_method(url):
    output_file = tempfile.mktemp(suffix='.mp3')  
    command = ['wget', '-O', output_file, url]
    subprocess.run(command, check=True, capture_output=True)
    return output_file

def trim_audio(audio_path, start_time, end_time):
    audio = AudioSegment.from_mp3(audio_path)
    trimmed_audio = audio[start_time*1000:end_time*1000] if end_time else audio[start_time*1000:]
    trimmed_audio_path = tempfile.mktemp(suffix='.mp3')
    trimmed_audio.export(trimmed_audio_path, format="mp3")
    return trimmed_audio_path

def transcribe_audio(input_source, batch_size, download_method, start_time=None, end_time=None, verbose=False):
    try:
        # Initialize the model
        model = WhisperModel("cstr/whisper-large-v3-turbo-int8_float32", device="auto", compute_type="int8")
        batched_model = BatchedInferencePipeline(model=model)

        # Handle input source
        if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
            # It's a URL, download the audio
            audio_path = download_audio(input_source, download_method)
            if audio_path.startswith("Error"):
                yield f"Error: {audio_path}", "", None
                return
        else:
            # It's a local file path
            audio_path = input_source

        # Trim the audio if start_time or end_time is specified
        if start_time is not None or end_time is not None:
            trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
            audio_path = trimmed_audio_path

        # Benchmark transcription time
        start_time_perf = time.time()
        segments, info = batched_model.transcribe(audio_path, batch_size=batch_size, initial_prompt=None)
        end_time_perf = time.time()

        # Show initial metrics as soon as possible
        transcription_time = end_time_perf - start_time_perf
        real_time_factor = info.duration / transcription_time
        audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)  # Size in MB
        
        metrics_output = (
            f"Language: {info.language}, Probability: {info.language_probability:.2f}\n"
            f"Duration: {info.duration:.2f}s, Duration after VAD: {info.duration_after_vad:.2f}s\n"  
            f"Transcription time: {transcription_time:.2f} seconds\n"
            f"Real-time factor: {real_time_factor:.2f}x\n"
            f"Audio file size: {audio_file_size:.2f} MB\n"
        )

        if verbose:
            yield metrics_output, "", None

        transcription = ""

        # Stream transcription output gradually
        for segment in segments:
            transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
            transcription += transcription_segment

            if verbose:  
                yield metrics_output, transcription, None

        # Final output with download option
        transcription_file = save_transcription(transcription)
        yield metrics_output, transcription, transcription_file

    except Exception as e:
        yield f"An error occurred: {str(e)}", "", None

    finally:
        # Clean up downloaded and trimmed files
        if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
            try:
                os.remove(audio_path)
            except:
                pass
        if start_time is not None or end_time is not None:
            try:
                os.remove(trimmed_audio_path)
            except:
                pass

def save_transcription(transcription):
    file_path = tempfile.mktemp(suffix='.txt')
    with open(file_path, 'w') as f:
        f.write(transcription)
    return file_path

# Gradio interface
iface = gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Textbox(label="Audio Source (Upload, MP3 URL, or YouTube URL)"),
        gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size"),
        gr.Dropdown(choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"], label="Download Method", value="yt-dlp"),
        gr.Number(label="Start Time (seconds)", value=0, optional=True),  
        gr.Number(label="End Time (seconds)", optional=True),
        gr.Checkbox(label="Verbose Output", value=False)
    ],
    outputs=[
        gr.Textbox(label="Transcription Metrics and Verbose Messages", live=True),
        gr.Textbox(label="Transcription", live=True),
        gr.File(label="Download Transcription")  
    ],
    title="Faster Whisper Multi-Input Transcription",
    description="Enter an audio file path, MP3 URL, or YouTube URL to transcribe using Faster Whisper (GitHub version). Adjust the batch size and choose a download method.",
    examples=[
        ["https://www.youtube.com/watch?v=daQ_hqA6HDo", 16, "yt-dlp", 0, None, False], 
        ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", 16, "ffmpeg", 0, 300, True],
        ["path/to/local/audio.mp3", 16, "yt-dlp", 60, 180, False]
    ],
    cache_examples=False  # Prevents automatic processing of examples
)

iface.launch()