cstr commited on
Commit
aeca221
·
verified ·
1 Parent(s): 5f48e16
Files changed (1) hide show
  1. app.py +179 -217
app.py CHANGED
@@ -1,8 +1,6 @@
1
  import gradio as gr
2
  import os
3
  import time
4
- import sys
5
- import io
6
  import tempfile
7
  import subprocess
8
  import requests
@@ -13,40 +11,21 @@ import torch
13
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
14
  import yt_dlp
15
 
16
- class LogCapture(io.StringIO):
17
- def __init__(self, callback):
18
- super().__init__()
19
- self.callback = callback
20
-
21
- def write(self, s):
22
- super().write(s)
23
- self.callback(s)
24
-
25
- logging.basicConfig(level=logging.INFO)
26
-
27
- # Clone and install faster-whisper from GitHub
28
- try:
29
- subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
30
- subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
31
- except subprocess.CalledProcessError as e:
32
- logging.error(f"Error during faster-whisper installation: {e}")
33
- sys.exit(1)
34
-
35
- sys.path.append("./faster-whisper")
36
-
37
- from faster_whisper import WhisperModel
38
- from faster_whisper.transcribe import BatchedInferencePipeline
39
 
 
40
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
41
 
42
  def download_audio(url, method_choice):
43
  """
44
  Downloads audio from a given URL using the specified method.
45
-
46
  Args:
47
  url (str): The URL of the audio.
48
  method_choice (str): The method to use for downloading audio.
49
-
50
  Returns:
51
  tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
52
  """
@@ -54,50 +33,27 @@ def download_audio(url, method_choice):
54
  logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
55
  try:
56
  if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
57
- # Use YouTube download methods
58
  audio_file = download_youtube_audio(url, method_choice)
59
  elif parsed_url.scheme == 'rtsp':
60
- # Use RTSP download methods
61
  audio_file = download_rtsp_audio(url)
62
  else:
63
- # Use direct download methods
64
  audio_file = download_direct_audio(url, method_choice)
 
65
  if not audio_file or not os.path.exists(audio_file):
66
  raise Exception(f"Failed to download audio from {url}")
67
- return audio_file, True # The file is a temporary file
68
  except Exception as e:
69
  logging.error(f"Error downloading audio: {str(e)}")
70
  return f"Error: {str(e)}", False
71
 
72
- def download_rtsp_audio(url):
73
- """
74
- Downloads audio from an RTSP URL using FFmpeg.
75
-
76
- Args:
77
- url (str): The RTSP URL.
78
-
79
- Returns:
80
- str: Path to the downloaded audio file, or None if failed.
81
- """
82
- logging.info("Using FFmpeg to download RTSP stream")
83
- output_file = tempfile.mktemp(suffix='.mp3')
84
- command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
85
- try:
86
- subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
87
- logging.info(f"Downloaded RTSP audio to: {output_file}")
88
- return output_file
89
- except Exception as e:
90
- logging.error(f"Error downloading RTSP audio: {str(e)}")
91
- return None
92
-
93
  def download_youtube_audio(url, method_choice):
94
  """
95
  Downloads audio from a YouTube URL using the specified method.
96
-
97
  Args:
98
  url (str): The YouTube URL.
99
- method_choice (str): The method to use for downloading ('yt-dlp', 'pytube').
100
-
101
  Returns:
102
  str: Path to the downloaded audio file, or None if failed.
103
  """
@@ -105,10 +61,7 @@ def download_youtube_audio(url, method_choice):
105
  'yt-dlp': yt_dlp_method,
106
  'pytube': pytube_method,
107
  }
108
- method = methods.get(method_choice)
109
- if method is None:
110
- logging.warning(f"Invalid download method for YouTube: {method_choice}. Defaulting to 'yt-dlp'.")
111
- method = yt_dlp_method
112
  try:
113
  logging.info(f"Attempting to download YouTube audio using {method_choice}")
114
  return method(url)
@@ -116,143 +69,110 @@ def download_youtube_audio(url, method_choice):
116
  logging.error(f"Error downloading using {method_choice}: {str(e)}")
117
  return None
118
 
119
- def youtube_dl_method(url):
120
- logging.info("Using yt-dlp method")
121
- try:
122
- ydl_opts = {
123
- 'format': 'bestaudio/best',
124
- 'postprocessors': [{
125
- 'key': 'FFmpegExtractAudio',
126
- 'preferredcodec': 'mp3',
127
- 'preferredquality': '192',
128
- }],
129
- 'outtmpl': '%(id)s.%(ext)s',
130
- }
131
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
132
- info = ydl.extract_info(url, download=True)
133
- output_file = f"{info['id']}.mp3"
134
- logging.info(f"Downloaded YouTube audio: {output_file}")
135
- return output_file
136
- except Exception as e:
137
- logging.error(f"Error in youtube_dl_method: {str(e)}")
138
- return None
139
-
140
- def yt_dlp_direct_method(url):
141
  """
142
- Downloads audio using yt-dlp (supports various protocols and sites).
143
-
144
  Args:
145
- url (str): The URL of the audio or webpage containing audio.
146
-
147
  Returns:
148
- str: Path to the downloaded audio file, or None if failed.
149
  """
150
- logging.info("Using yt-dlp direct method")
151
- output_file = tempfile.mktemp(suffix='.mp3')
152
  ydl_opts = {
153
  'format': 'bestaudio/best',
154
- 'outtmpl': output_file,
155
- 'quiet': True,
156
- 'no_warnings': True,
157
  'postprocessors': [{
158
  'key': 'FFmpegExtractAudio',
159
  'preferredcodec': 'mp3',
160
  'preferredquality': '192',
161
  }],
 
162
  }
163
- try:
164
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
165
- ydl.download([url])
166
- logging.info(f"Downloaded audio to: {output_file}")
167
  return output_file
168
- except Exception as e:
169
- logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
170
- return None
171
-
172
  def pytube_method(url):
173
  """
174
  Downloads audio using pytube.
175
-
176
  Args:
177
  url (str): The YouTube URL.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
 
 
 
 
 
 
 
179
  Returns:
180
  str: Path to the downloaded audio file, or None if failed.
181
  """
182
- logging.info("Using pytube method")
 
 
183
  try:
184
- from pytube import YouTube
185
- yt = YouTube(url)
186
- audio_stream = yt.streams.filter(only_audio=True).first()
187
- out_file = audio_stream.download()
188
- base, ext = os.path.splitext(out_file)
189
- new_file = base + '.mp3'
190
- os.rename(out_file, new_file)
191
- logging.info(f"Downloaded and converted audio to: {new_file}")
192
- return new_file
193
  except Exception as e:
194
- logging.error(f"Error in pytube_method: {str(e)}")
195
  return None
196
 
197
- def youtube_dl_classic_method(url):
198
- logging.info("Using youtube-dl classic method")
199
- ydl_opts = {
200
- 'format': 'bestaudio/best',
201
- 'postprocessors': [{
202
- 'key': 'FFmpegExtractAudio',
203
- 'preferredcodec': 'mp3',
204
- 'preferredquality': '192',
205
- }],
206
- 'outtmpl': '%(id)s.%(ext)s',
207
- }
208
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
209
- info = ydl.extract_info(url, download=True)
210
- logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
211
- return f"{info['id']}.mp3"
212
-
213
- def youtube_dl_alternative_method(url):
214
- logging.info("Using yt-dlp alternative method")
215
- ydl_opts = {
216
- 'format': 'bestaudio/best',
217
- 'postprocessors': [{
218
- 'key': 'FFmpegExtractAudio',
219
- 'preferredcodec': 'mp3',
220
- 'preferredquality': '192',
221
- }],
222
- 'outtmpl': '%(id)s.%(ext)s',
223
- 'no_warnings': True,
224
- 'quiet': True,
225
- 'no_check_certificate': True,
226
- 'prefer_insecure': True,
227
  }
228
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
229
- info = ydl.extract_info(url, download=True)
230
- logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
231
- return f"{info['id']}.mp3"
232
-
233
- def ffmpeg_method(url):
234
- logging.info("Using ffmpeg method")
235
- output_file = tempfile.mktemp(suffix='.mp3')
236
- command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
237
- subprocess.run(command, check=True, capture_output=True)
238
- logging.info(f"Downloaded and converted audio to: {output_file}")
239
- return output_file
240
-
241
- def aria2_method(url):
242
- logging.info("Using aria2 method")
243
- output_file = tempfile.mktemp(suffix='.mp3')
244
- command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
245
- subprocess.run(command, check=True, capture_output=True)
246
- logging.info(f"Downloaded audio to: {output_file}")
247
- return output_file
248
 
249
  def requests_method(url):
250
  """
251
  Downloads audio using the requests library.
252
-
253
  Args:
254
  url (str): The URL of the audio file.
255
-
256
  Returns:
257
  str: Path to the downloaded audio file, or None if failed.
258
  """
@@ -271,71 +191,125 @@ def requests_method(url):
271
  except Exception as e:
272
  logging.error(f"Error in requests_method: {str(e)}")
273
  return None
274
-
275
- def download_direct_audio(url, method_choice):
276
- """
277
- Downloads audio from a direct URL or podcast URL using the specified method.
278
 
 
 
 
 
279
  Args:
280
- url (str): The direct URL of the audio file.
281
- method_choice (str): The method to use for downloading ('wget', 'requests', 'yt-dlp', 'ffmpeg', 'aria2').
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
 
 
 
 
 
 
 
283
  Returns:
284
  str: Path to the downloaded audio file, or None if failed.
285
  """
286
- logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
287
- methods = {
288
- 'wget': wget_method,
289
- 'requests': requests_method,
290
- 'yt-dlp': yt_dlp_direct_method,
291
- 'ffmpeg': ffmpeg_method,
292
- 'aria2': aria2_method,
 
 
 
 
 
293
  }
294
- method = methods.get(method_choice)
295
- if method is None:
296
- logging.warning(f"Invalid download method: {method_choice}. Defaulting to 'requests'.")
297
- method = requests_method
298
  try:
299
- return method(url)
 
 
 
300
  except Exception as e:
301
- logging.error(f"Error downloading direct audio: {str(e)}")
302
  return None
303
 
304
- def wget_method(url):
305
  """
306
- Downloads audio using the wget command-line tool.
307
-
308
  Args:
309
  url (str): The URL of the audio file.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
 
 
 
 
 
 
 
311
  Returns:
312
  str: Path to the downloaded audio file, or None if failed.
313
  """
314
- logging.info("Using wget method")
315
  output_file = tempfile.mktemp(suffix='.mp3')
316
- command = ['wget', '-O', output_file, url]
317
  try:
318
- subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
319
  logging.info(f"Downloaded audio to: {output_file}")
320
  return output_file
 
 
 
321
  except Exception as e:
322
- logging.error(f"Error in wget_method: {str(e)}")
323
  return None
324
 
325
  def trim_audio(audio_path, start_time, end_time):
326
  """
327
- Trims an audio file to the specified start and end times using pydub.
328
-
329
  Args:
330
  audio_path (str): Path to the audio file.
331
  start_time (float): Start time in seconds.
332
  end_time (float): End time in seconds.
333
-
334
  Returns:
335
  str: Path to the trimmed audio file.
336
-
337
  Raises:
338
- gr.Error: If invalid start or end times are provided or if FFmpeg is not found.
339
  """
340
  try:
341
  logging.info(f"Trimming audio from {start_time} to {end_time}")
@@ -343,27 +317,18 @@ def trim_audio(audio_path, start_time, end_time):
343
  audio_duration = len(audio) / 1000 # Duration in seconds
344
 
345
  # Default start and end times if None
346
- if start_time is None:
347
- start_time = 0
348
- if end_time is None or end_time > audio_duration:
349
- end_time = audio_duration
350
 
351
  # Validate times
352
- if start_time < 0 or end_time <= 0:
353
- raise gr.Error("Start time and end time must be positive.")
354
  if start_time >= end_time:
355
  raise gr.Error("End time must be greater than start time.")
356
- if start_time > audio_duration:
357
- raise gr.Error("Start time exceeds audio duration.")
358
 
359
- trimmed_audio = audio[start_time * 1000:end_time * 1000]
360
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
361
  trimmed_audio.export(temp_audio_file.name, format="wav")
362
  logging.info(f"Trimmed audio saved to: {temp_audio_file.name}")
363
  return temp_audio_file.name
364
- except FileNotFoundError as e:
365
- logging.error(f"FFmpeg not found: {str(e)}")
366
- raise gr.Error("FFmpeg not found. Please ensure that FFmpeg is installed and in your system PATH.")
367
  except Exception as e:
368
  logging.error(f"Error trimming audio: {str(e)}")
369
  raise gr.Error(f"Error trimming audio: {str(e)}")
@@ -371,10 +336,10 @@ def trim_audio(audio_path, start_time, end_time):
371
  def save_transcription(transcription):
372
  """
373
  Saves the transcription text to a temporary file.
374
-
375
  Args:
376
  transcription (str): The transcription text.
377
-
378
  Returns:
379
  str: The path to the transcription file.
380
  """
@@ -386,22 +351,19 @@ def save_transcription(transcription):
386
  def get_model_options(pipeline_type):
387
  """
388
  Returns a list of model IDs based on the selected pipeline type.
389
-
390
  Args:
391
- pipeline_type (str): The type of pipeline ('faster-batched', 'faster-sequenced', 'transformers').
392
-
393
  Returns:
394
  list: A list of model IDs.
395
  """
396
- if pipeline_type == "faster-batched":
397
- return ["cstr/whisper-large-v3-turbo-int8_float32", "SYSTRAN/faster-whisper-large-v1", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
398
- elif pipeline_type == "faster-sequenced":
399
- return ["SYSTRAN/faster-whisper-large-v1", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
400
- elif pipeline_type == "transformers":
401
- return ["openai/whisper-large-v3", "openai/whisper-large-v2"]
402
  else:
403
  return []
404
 
 
405
  loaded_models = {}
406
 
407
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
 
1
  import gradio as gr
2
  import os
3
  import time
 
 
4
  import tempfile
5
  import subprocess
6
  import requests
 
11
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
12
  import yt_dlp
13
 
14
+ # Set up logging
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Check for CUDA availability
18
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
19
+ logging.info(f"Using device: {device}")
20
 
21
  def download_audio(url, method_choice):
22
  """
23
  Downloads audio from a given URL using the specified method.
24
+
25
  Args:
26
  url (str): The URL of the audio.
27
  method_choice (str): The method to use for downloading audio.
28
+
29
  Returns:
30
  tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
31
  """
 
33
  logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
34
  try:
35
  if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
 
36
  audio_file = download_youtube_audio(url, method_choice)
37
  elif parsed_url.scheme == 'rtsp':
 
38
  audio_file = download_rtsp_audio(url)
39
  else:
 
40
  audio_file = download_direct_audio(url, method_choice)
41
+
42
  if not audio_file or not os.path.exists(audio_file):
43
  raise Exception(f"Failed to download audio from {url}")
44
+ return audio_file, True
45
  except Exception as e:
46
  logging.error(f"Error downloading audio: {str(e)}")
47
  return f"Error: {str(e)}", False
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def download_youtube_audio(url, method_choice):
50
  """
51
  Downloads audio from a YouTube URL using the specified method.
52
+
53
  Args:
54
  url (str): The YouTube URL.
55
+ method_choice (str): The method to use for downloading.
56
+
57
  Returns:
58
  str: Path to the downloaded audio file, or None if failed.
59
  """
 
61
  'yt-dlp': yt_dlp_method,
62
  'pytube': pytube_method,
63
  }
64
+ method = methods.get(method_choice, yt_dlp_method)
 
 
 
65
  try:
66
  logging.info(f"Attempting to download YouTube audio using {method_choice}")
67
  return method(url)
 
69
  logging.error(f"Error downloading using {method_choice}: {str(e)}")
70
  return None
71
 
72
+ def yt_dlp_method(url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  """
74
+ Downloads YouTube audio using yt-dlp.
75
+
76
  Args:
77
+ url (str): The YouTube URL.
78
+
79
  Returns:
80
+ str: Path to the downloaded audio file.
81
  """
82
+ logging.info("Using yt-dlp method")
 
83
  ydl_opts = {
84
  'format': 'bestaudio/best',
 
 
 
85
  'postprocessors': [{
86
  'key': 'FFmpegExtractAudio',
87
  'preferredcodec': 'mp3',
88
  'preferredquality': '192',
89
  }],
90
+ 'outtmpl': '%(id)s.%(ext)s',
91
  }
92
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
93
+ info = ydl.extract_info(url, download=True)
94
+ output_file = f"{info['id']}.mp3"
95
+ logging.info(f"Downloaded YouTube audio: {output_file}")
96
  return output_file
97
+
 
 
 
98
  def pytube_method(url):
99
  """
100
  Downloads audio using pytube.
101
+
102
  Args:
103
  url (str): The YouTube URL.
104
+
105
+ Returns:
106
+ str: Path to the downloaded audio file.
107
+ """
108
+ logging.info("Using pytube method")
109
+ from pytube import YouTube
110
+ yt = YouTube(url)
111
+ audio_stream = yt.streams.filter(only_audio=True).first()
112
+ out_file = audio_stream.download()
113
+ base, ext = os.path.splitext(out_file)
114
+ new_file = base + '.mp3'
115
+ os.rename(out_file, new_file)
116
+ logging.info(f"Downloaded and converted audio to: {new_file}")
117
+ return new_file
118
 
119
+ def download_rtsp_audio(url):
120
+ """
121
+ Downloads audio from an RTSP URL using FFmpeg.
122
+
123
+ Args:
124
+ url (str): The RTSP URL.
125
+
126
  Returns:
127
  str: Path to the downloaded audio file, or None if failed.
128
  """
129
+ logging.info("Using FFmpeg to download RTSP stream")
130
+ output_file = tempfile.mktemp(suffix='.mp3')
131
+ command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
132
  try:
133
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
134
+ logging.info(f"Downloaded RTSP audio to: {output_file}")
135
+ return output_file
136
+ except subprocess.CalledProcessError as e:
137
+ logging.error(f"FFmpeg error: {e.stderr.decode()}")
138
+ return None
 
 
 
139
  except Exception as e:
140
+ logging.error(f"Error downloading RTSP audio: {str(e)}")
141
  return None
142
 
143
+ def download_direct_audio(url, method_choice):
144
+ """
145
+ Downloads audio from a direct URL using the specified method.
146
+
147
+ Args:
148
+ url (str): The direct URL of the audio file.
149
+ method_choice (str): The method to use for downloading.
150
+
151
+ Returns:
152
+ str: Path to the downloaded audio file, or None if failed.
153
+ """
154
+ logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
155
+ methods = {
156
+ 'wget': wget_method,
157
+ 'requests': requests_method,
158
+ 'yt-dlp': yt_dlp_direct_method,
159
+ 'ffmpeg': ffmpeg_method,
160
+ 'aria2': aria2_method,
 
 
 
 
 
 
 
 
 
 
 
 
161
  }
162
+ method = methods.get(method_choice, requests_method)
163
+ try:
164
+ return method(url)
165
+ except Exception as e:
166
+ logging.error(f"Error downloading direct audio: {str(e)}")
167
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  def requests_method(url):
170
  """
171
  Downloads audio using the requests library.
172
+
173
  Args:
174
  url (str): The URL of the audio file.
175
+
176
  Returns:
177
  str: Path to the downloaded audio file, or None if failed.
178
  """
 
191
  except Exception as e:
192
  logging.error(f"Error in requests_method: {str(e)}")
193
  return None
 
 
 
 
194
 
195
+ def wget_method(url):
196
+ """
197
+ Downloads audio using the wget command-line tool.
198
+
199
  Args:
200
+ url (str): The URL of the audio file.
201
+
202
+ Returns:
203
+ str: Path to the downloaded audio file, or None if failed.
204
+ """
205
+ logging.info("Using wget method")
206
+ output_file = tempfile.mktemp(suffix='.mp3')
207
+ command = ['wget', '-O', output_file, url]
208
+ try:
209
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
210
+ logging.info(f"Downloaded audio to: {output_file}")
211
+ return output_file
212
+ except subprocess.CalledProcessError as e:
213
+ logging.error(f"Wget error: {e.stderr.decode()}")
214
+ return None
215
+ except Exception as e:
216
+ logging.error(f"Error in wget_method: {str(e)}")
217
+ return None
218
 
219
+ def yt_dlp_direct_method(url):
220
+ """
221
+ Downloads audio using yt-dlp (supports various protocols and sites).
222
+
223
+ Args:
224
+ url (str): The URL of the audio or webpage containing audio.
225
+
226
  Returns:
227
  str: Path to the downloaded audio file, or None if failed.
228
  """
229
+ logging.info("Using yt-dlp direct method")
230
+ output_file = tempfile.mktemp(suffix='.mp3')
231
+ ydl_opts = {
232
+ 'format': 'bestaudio/best',
233
+ 'outtmpl': output_file,
234
+ 'quiet': True,
235
+ 'no_warnings': True,
236
+ 'postprocessors': [{
237
+ 'key': 'FFmpegExtractAudio',
238
+ 'preferredcodec': 'mp3',
239
+ 'preferredquality': '192',
240
+ }],
241
  }
 
 
 
 
242
  try:
243
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
244
+ ydl.download([url])
245
+ logging.info(f"Downloaded audio to: {output_file}")
246
+ return output_file
247
  except Exception as e:
248
+ logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
249
  return None
250
 
251
+ def ffmpeg_method(url):
252
  """
253
+ Downloads audio using FFmpeg.
254
+
255
  Args:
256
  url (str): The URL of the audio file.
257
+
258
+ Returns:
259
+ str: Path to the downloaded audio file, or None if failed.
260
+ """
261
+ logging.info("Using ffmpeg method")
262
+ output_file = tempfile.mktemp(suffix='.mp3')
263
+ command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
264
+ try:
265
+ subprocess.run(command, check=True, capture_output=True, text=True)
266
+ logging.info(f"Downloaded and converted audio to: {output_file}")
267
+ return output_file
268
+ except subprocess.CalledProcessError as e:
269
+ logging.error(f"FFmpeg error: {e.stderr}")
270
+ return None
271
+ except Exception as e:
272
+ logging.error(f"Error in ffmpeg_method: {str(e)}")
273
+ return None
274
 
275
+ def aria2_method(url):
276
+ """
277
+ Downloads audio using aria2.
278
+
279
+ Args:
280
+ url (str): The URL of the audio file.
281
+
282
  Returns:
283
  str: Path to the downloaded audio file, or None if failed.
284
  """
285
+ logging.info("Using aria2 method")
286
  output_file = tempfile.mktemp(suffix='.mp3')
287
+ command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
288
  try:
289
+ subprocess.run(command, check=True, capture_output=True, text=True)
290
  logging.info(f"Downloaded audio to: {output_file}")
291
  return output_file
292
+ except subprocess.CalledProcessError as e:
293
+ logging.error(f"Aria2 error: {e.stderr}")
294
+ return None
295
  except Exception as e:
296
+ logging.error(f"Error in aria2_method: {str(e)}")
297
  return None
298
 
299
  def trim_audio(audio_path, start_time, end_time):
300
  """
301
+ Trims an audio file to the specified start and end times.
302
+
303
  Args:
304
  audio_path (str): Path to the audio file.
305
  start_time (float): Start time in seconds.
306
  end_time (float): End time in seconds.
307
+
308
  Returns:
309
  str: Path to the trimmed audio file.
310
+
311
  Raises:
312
+ gr.Error: If invalid start or end times are provided.
313
  """
314
  try:
315
  logging.info(f"Trimming audio from {start_time} to {end_time}")
 
317
  audio_duration = len(audio) / 1000 # Duration in seconds
318
 
319
  # Default start and end times if None
320
+ start_time = max(0, start_time) if start_time is not None else 0
321
+ end_time = min(audio_duration, end_time) if end_time is not None else audio_duration
 
 
322
 
323
  # Validate times
 
 
324
  if start_time >= end_time:
325
  raise gr.Error("End time must be greater than start time.")
 
 
326
 
327
+ trimmed_audio = audio[int(start_time * 1000):int(end_time * 1000)]
328
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
329
  trimmed_audio.export(temp_audio_file.name, format="wav")
330
  logging.info(f"Trimmed audio saved to: {temp_audio_file.name}")
331
  return temp_audio_file.name
 
 
 
332
  except Exception as e:
333
  logging.error(f"Error trimming audio: {str(e)}")
334
  raise gr.Error(f"Error trimming audio: {str(e)}")
 
336
  def save_transcription(transcription):
337
  """
338
  Saves the transcription text to a temporary file.
339
+
340
  Args:
341
  transcription (str): The transcription text.
342
+
343
  Returns:
344
  str: The path to the transcription file.
345
  """
 
351
  def get_model_options(pipeline_type):
352
  """
353
  Returns a list of model IDs based on the selected pipeline type.
354
+
355
  Args:
356
+ pipeline_type (str): The type of pipeline.
357
+
358
  Returns:
359
  list: A list of model IDs.
360
  """
361
+ if pipeline_type == "transformers":
362
+ return ["openai/whisper-large-v3", "openai/whisper-large-v2", "openai/whisper-medium", "openai/whisper-small"]
 
 
 
 
363
  else:
364
  return []
365
 
366
+ # Dictionary to store loaded models
367
  loaded_models = {}
368
 
369
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):