transcribe_audio

Running

App Files Files Community

cstr commited on Oct 2, 2024

Commit

5e6091c

verified ·

1 Parent(s): 8cc0029

fix bench

Browse files

Files changed (1) hide show

app.py +8 -4

app.py CHANGED Viewed

@@ -528,13 +528,14 @@ def get_model_options(pipeline_type):
 # Dictionary to store loaded models
 loaded_models = {}
-def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
     """
     Transcribes audio from a given source using the specified pipeline and model.
     Args:
         audio_input (str): Path to uploaded audio file or recorded audio.
         audio_url (str): URL of audio.
         proxy_url (str): Proxy URL if needed.
         proxy_username (str): Proxy username.
         proxy_password (str): Proxy password.
@@ -573,7 +574,7 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
             is_temp_file = False
         elif audio_url is not None and len(audio_url.strip()) > 0:
             # audio_url is provided
-            audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
             if not audio_path:
                 error_msg = f"Error downloading audio from {audio_url} using method {download_method}. Check logs for details."
                 logging.error(error_msg)
@@ -639,10 +640,15 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
         # Perform the transcription
         start_time_perf = time.time()
         if pipeline_type == "faster-batched":
             segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
         elif pipeline_type == "faster-sequenced":
             segments, info = model_or_pipeline.transcribe(audio_path)
         else:
             result = model_or_pipeline(audio_path)
             segments = result["chunks"]
@@ -661,8 +667,6 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
             yield verbose_messages + metrics_output, "", None
         # Compile the transcription text
-        transcription = ""
         for segment in segments:
             if pipeline_type in ["faster-batched", "faster-sequenced"]:
                 if include_timecodes:

 # Dictionary to store loaded models
 loaded_models = {}
+def transcribe_audio(audio_input, audio_url, use_tor, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
     """
     Transcribes audio from a given source using the specified pipeline and model.
     Args:
         audio_input (str): Path to uploaded audio file or recorded audio.
         audio_url (str): URL of audio.
+        use_tor (bool): Whether to use Tor for downloading.
         proxy_url (str): Proxy URL if needed.
         proxy_username (str): Proxy username.
         proxy_password (str): Proxy password.
             is_temp_file = False
         elif audio_url is not None and len(audio_url.strip()) > 0:
             # audio_url is provided
+            audio_path, is_temp_file = download_audio(audio_url, download_method, use_tor, proxy_url, proxy_username, proxy_password)
             if not audio_path:
                 error_msg = f"Error downloading audio from {audio_url} using method {download_method}. Check logs for details."
                 logging.error(error_msg)
         # Perform the transcription
         start_time_perf = time.time()
+        transcription = ""
         if pipeline_type == "faster-batched":
             segments, info = model_or_pipeline.transcribe(audio_path, batch_size=batch_size)
+            # Since segments is a generator, we need to iterate over it to complete transcription
+            segments = list(segments)  # Exhaust the generator
         elif pipeline_type == "faster-sequenced":
             segments, info = model_or_pipeline.transcribe(audio_path)
+            segments = list(segments)  # Exhaust the generator
         else:
             result = model_or_pipeline(audio_path)
             segments = result["chunks"]
             yield verbose_messages + metrics_output, "", None
         # Compile the transcription text
         for segment in segments:
             if pipeline_type in ["faster-batched", "faster-sequenced"]:
                 if include_timecodes: