cstr commited on
Commit
e922c51
·
verified ·
1 Parent(s): 4f55f4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -18
app.py CHANGED
@@ -173,7 +173,23 @@ def wget_method(url):
173
  def trim_audio(audio_path, start_time, end_time):
174
  logging.info(f"Trimming audio from {start_time} to {end_time}")
175
  audio = AudioSegment.from_file(audio_path)
176
- trimmed_audio = audio[start_time*1000:end_time*1000] if end_time else audio[start_time*1000:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  trimmed_audio_path = tempfile.mktemp(suffix='.wav')
178
  trimmed_audio.export(trimmed_audio_path, format="wav")
179
  logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
@@ -197,6 +213,20 @@ def get_model_options(pipeline_type):
197
  return []
198
 
199
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  try:
201
  logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
202
  verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
@@ -243,8 +273,11 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
243
  else:
244
  audio_path = input_source
245
 
 
 
 
246
  if start_time is not None or end_time is not None:
247
- trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
248
  audio_path = trimmed_audio_path
249
  verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
250
  if verbose:
@@ -289,23 +322,23 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
289
  yield f"An error occurred: {str(e)}", "", None
290
 
291
  finally:
292
- if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
293
- try:
294
- os.remove(audio_path)
295
- except:
296
- pass
297
- if start_time is not None or end_time is not None:
298
- try:
299
- os.remove(trimmed_audio_path)
300
- except:
301
- pass
302
 
303
  with gr.Blocks() as iface:
304
  gr.Markdown("# Multi-Pipeline Transcription")
305
  gr.Markdown("Transcribe audio using multiple pipelines and models.")
306
 
307
  with gr.Row():
308
- input_source = gr.Textbox(label="Audio Source (Upload, URL, or YouTube URL)")
309
  pipeline_type = gr.Dropdown(
310
  choices=["faster-batched", "faster-sequenced", "transformers"],
311
  label="Pipeline Type",
@@ -327,8 +360,8 @@ with gr.Blocks() as iface:
327
  )
328
 
329
  with gr.Row():
330
- start_time = gr.Number(label="Start Time (seconds)", value=0)
331
- end_time = gr.Number(label="End Time (seconds)", value=0)
332
  verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
333
 
334
  transcribe_button = gr.Button("Transcribe")
@@ -366,9 +399,9 @@ with gr.Blocks() as iface:
366
 
367
  gr.Examples(
368
  examples=[
369
- ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", 0, None, True],
370
- ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
371
- ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
372
  ],
373
  inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
374
  )
 
173
  def trim_audio(audio_path, start_time, end_time):
174
  logging.info(f"Trimming audio from {start_time} to {end_time}")
175
  audio = AudioSegment.from_file(audio_path)
176
+ audio_duration = len(audio) / 1000 # Duration in seconds
177
+
178
+ # Default start and end times if None
179
+ if start_time is None:
180
+ start_time = 0
181
+ if end_time is None or end_time > audio_duration:
182
+ end_time = audio_duration
183
+
184
+ # Validate times
185
+ if start_time < 0 or end_time < 0:
186
+ raise ValueError("Start time and end time must be non-negative.")
187
+ if start_time >= end_time:
188
+ raise gr.Error("End time must be greater than start time.")
189
+ if start_time > audio_duration:
190
+ raise ValueError("Start time exceeds audio duration.")
191
+
192
+ trimmed_audio = audio[start_time * 1000:end_time * 1000]
193
  trimmed_audio_path = tempfile.mktemp(suffix='.wav')
194
  trimmed_audio.export(trimmed_audio_path, format="wav")
195
  logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
 
213
  return []
214
 
215
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
216
+ try:
217
+ # Determine if input_source is a URL or file
218
+ if isinstance(input_source, str):
219
+ if input_source.startswith('http://') or input_source.startswith('https://'):
220
+ audio_path = download_audio(input_source, download_method)
221
+ # Handle potential errors during download
222
+ if not audio_path or audio_path.startswith("Error"):
223
+ yield f"Error: {audio_path}", "", None
224
+ return
225
+ else:
226
+ # Assume input_source is an uploaded file object
227
+ audio_path = input_source.name
228
+ logging.info(f"Using uploaded audio file: {audio_path}")
229
+
230
  try:
231
  logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
232
  verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
 
273
  else:
274
  audio_path = input_source
275
 
276
+ start_time = float(start_time) if start_time else None
277
+ end_time = float(end_time) if end_time else None
278
+
279
  if start_time is not None or end_time is not None:
280
+ trimmed_audio_path = trim_audio(audio_path, start_time, end_time)
281
  audio_path = trimmed_audio_path
282
  verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
283
  if verbose:
 
322
  yield f"An error occurred: {str(e)}", "", None
323
 
324
  finally:
325
+ # Remove downloaded audio file
326
+ if audio_path and os.path.exists(audio_path):
327
+ os.remove(audio_path)
328
+ # Remove trimmed audio file
329
+ if 'trimmed_audio_path' in locals() and os.path.exists(trimmed_audio_path):
330
+ os.remove(trimmed_audio_path)
331
+ # Remove transcription file if needed
332
+ if transcription_file and os.path.exists(transcription_file):
333
+ os.remove(transcription_file)
334
+
335
 
336
  with gr.Blocks() as iface:
337
  gr.Markdown("# Multi-Pipeline Transcription")
338
  gr.Markdown("Transcribe audio using multiple pipelines and models.")
339
 
340
  with gr.Row():
341
+ input_source = gr.File(label="Audio Source (Upload a file or enter a URL/YouTube URL)")
342
  pipeline_type = gr.Dropdown(
343
  choices=["faster-batched", "faster-sequenced", "transformers"],
344
  label="Pipeline Type",
 
360
  )
361
 
362
  with gr.Row():
363
+ start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
364
+ end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
365
  verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
366
 
367
  transcribe_button = gr.Button("Transcribe")
 
399
 
400
  gr.Examples(
401
  examples=[
402
+ ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, True],
403
+ ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
404
+ [None, "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
405
  ],
406
  inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
407
  )