transcribe_audio

Running

App Files Files Community

cstr commited on Oct 2, 2024

Commit

aeca221

verified ·

1 Parent(s): 5f48e16

c_1

Browse files

Files changed (1) hide show

app.py +179 -217

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import gradio as gr
 import os
 import time
-import sys
-import io
 import tempfile
 import subprocess
 import requests
@@ -13,40 +11,21 @@ import torch
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import yt_dlp
-class LogCapture(io.StringIO):
-    def __init__(self, callback):
-        super().__init__()
-        self.callback = callback
-    def write(self, s):
-        super().write(s)
-        self.callback(s)
-logging.basicConfig(level=logging.INFO)
-# Clone and install faster-whisper from GitHub
-try:
-    subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
-    subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
-except subprocess.CalledProcessError as e:
-    logging.error(f"Error during faster-whisper installation: {e}")
-    sys.exit(1)
-sys.path.append("./faster-whisper")
-from faster_whisper import WhisperModel
-from faster_whisper.transcribe import BatchedInferencePipeline
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 def download_audio(url, method_choice):
     """
     Downloads audio from a given URL using the specified method.
     Args:
         url (str): The URL of the audio.
         method_choice (str): The method to use for downloading audio.
     Returns:
         tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
     """
@@ -54,50 +33,27 @@ def download_audio(url, method_choice):
     logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
     try:
         if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
-            # Use YouTube download methods
             audio_file = download_youtube_audio(url, method_choice)
         elif parsed_url.scheme == 'rtsp':
-            # Use RTSP download methods
             audio_file = download_rtsp_audio(url)
         else:
-            # Use direct download methods
             audio_file = download_direct_audio(url, method_choice)
         if not audio_file or not os.path.exists(audio_file):
             raise Exception(f"Failed to download audio from {url}")
-        return audio_file, True  # The file is a temporary file
     except Exception as e:
         logging.error(f"Error downloading audio: {str(e)}")
         return f"Error: {str(e)}", False
-def download_rtsp_audio(url):
-    """
-    Downloads audio from an RTSP URL using FFmpeg.
-    Args:
-        url (str): The RTSP URL.
-    Returns:
-        str: Path to the downloaded audio file, or None if failed.
-    """
-    logging.info("Using FFmpeg to download RTSP stream")
-    output_file = tempfile.mktemp(suffix='.mp3')
-    command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
-    try:
-        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        logging.info(f"Downloaded RTSP audio to: {output_file}")
-        return output_file
-    except Exception as e:
-        logging.error(f"Error downloading RTSP audio: {str(e)}")
-        return None
 def download_youtube_audio(url, method_choice):
     """
     Downloads audio from a YouTube URL using the specified method.
     Args:
         url (str): The YouTube URL.
-        method_choice (str): The method to use for downloading ('yt-dlp', 'pytube').
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
@@ -105,10 +61,7 @@ def download_youtube_audio(url, method_choice):
         'yt-dlp': yt_dlp_method,
         'pytube': pytube_method,
     }
-    method = methods.get(method_choice)
-    if method is None:
-        logging.warning(f"Invalid download method for YouTube: {method_choice}. Defaulting to 'yt-dlp'.")
-        method = yt_dlp_method
     try:
         logging.info(f"Attempting to download YouTube audio using {method_choice}")
         return method(url)
@@ -116,143 +69,110 @@ def download_youtube_audio(url, method_choice):
         logging.error(f"Error downloading using {method_choice}: {str(e)}")
         return None
-def youtube_dl_method(url):
-    logging.info("Using yt-dlp method")
-    try:
-        ydl_opts = {
-            'format': 'bestaudio/best',
-            'postprocessors': [{
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'mp3',
-                'preferredquality': '192',
-            }],
-            'outtmpl': '%(id)s.%(ext)s',
-        }
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            output_file = f"{info['id']}.mp3"
-            logging.info(f"Downloaded YouTube audio: {output_file}")
-            return output_file
-    except Exception as e:
-        logging.error(f"Error in youtube_dl_method: {str(e)}")
-        return None
-def yt_dlp_direct_method(url):
     """
-    Downloads audio using yt-dlp (supports various protocols and sites).
     Args:
-        url (str): The URL of the audio or webpage containing audio.
     Returns:
-        str: Path to the downloaded audio file, or None if failed.
     """
-    logging.info("Using yt-dlp direct method")
-    output_file = tempfile.mktemp(suffix='.mp3')
     ydl_opts = {
         'format': 'bestaudio/best',
-        'outtmpl': output_file,
-        'quiet': True,
-        'no_warnings': True,
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'mp3',
             'preferredquality': '192',
         }],
     }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
-        logging.info(f"Downloaded audio to: {output_file}")
         return output_file
-    except Exception as e:
-        logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
-        return None
 def pytube_method(url):
     """
     Downloads audio using pytube.
     Args:
         url (str): The YouTube URL.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
-    logging.info("Using pytube method")
     try:
-        from pytube import YouTube
-        yt = YouTube(url)
-        audio_stream = yt.streams.filter(only_audio=True).first()
-        out_file = audio_stream.download()
-        base, ext = os.path.splitext(out_file)
-        new_file = base + '.mp3'
-        os.rename(out_file, new_file)
-        logging.info(f"Downloaded and converted audio to: {new_file}")
-        return new_file
     except Exception as e:
-        logging.error(f"Error in pytube_method: {str(e)}")
         return None
-def youtube_dl_classic_method(url):
-    logging.info("Using youtube-dl classic method")
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'outtmpl': '%(id)s.%(ext)s',
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        info = ydl.extract_info(url, download=True)
-        logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
-        return f"{info['id']}.mp3"
-def youtube_dl_alternative_method(url):
-    logging.info("Using yt-dlp alternative method")
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'outtmpl': '%(id)s.%(ext)s',
-        'no_warnings': True,
-        'quiet': True,
-        'no_check_certificate': True,
-        'prefer_insecure': True,
     }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        info = ydl.extract_info(url, download=True)
-        logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
-        return f"{info['id']}.mp3"
-def ffmpeg_method(url):
-    logging.info("Using ffmpeg method")
-    output_file = tempfile.mktemp(suffix='.mp3')
-    command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
-    subprocess.run(command, check=True, capture_output=True)
-    logging.info(f"Downloaded and converted audio to: {output_file}")
-    return output_file
-def aria2_method(url):
-    logging.info("Using aria2 method")
-    output_file = tempfile.mktemp(suffix='.mp3')
-    command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
-    subprocess.run(command, check=True, capture_output=True)
-    logging.info(f"Downloaded audio to: {output_file}")
-    return output_file
 def requests_method(url):
     """
     Downloads audio using the requests library.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
@@ -271,71 +191,125 @@ def requests_method(url):
     except Exception as e:
         logging.error(f"Error in requests_method: {str(e)}")
         return None
-def download_direct_audio(url, method_choice):
-    """
-    Downloads audio from a direct URL or podcast URL using the specified method.
     Args:
-        url (str): The direct URL of the audio file.
-        method_choice (str): The method to use for downloading ('wget', 'requests', 'yt-dlp', 'ffmpeg', 'aria2').
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
-    logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
-    methods = {
-        'wget': wget_method,
-        'requests': requests_method,
-        'yt-dlp': yt_dlp_direct_method,
-        'ffmpeg': ffmpeg_method,
-        'aria2': aria2_method,
     }
-    method = methods.get(method_choice)
-    if method is None:
-        logging.warning(f"Invalid download method: {method_choice}. Defaulting to 'requests'.")
-        method = requests_method
     try:
-        return method(url)
     except Exception as e:
-        logging.error(f"Error downloading direct audio: {str(e)}")
         return None
-def wget_method(url):
     """
-    Downloads audio using the wget command-line tool.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
-    logging.info("Using wget method")
     output_file = tempfile.mktemp(suffix='.mp3')
-    command = ['wget', '-O', output_file, url]
     try:
-        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         logging.info(f"Downloaded audio to: {output_file}")
         return output_file
     except Exception as e:
-        logging.error(f"Error in wget_method: {str(e)}")
         return None
 def trim_audio(audio_path, start_time, end_time):
     """
-    Trims an audio file to the specified start and end times using pydub.
     Args:
         audio_path (str): Path to the audio file.
         start_time (float): Start time in seconds.
         end_time (float): End time in seconds.
     Returns:
         str: Path to the trimmed audio file.
     Raises:
-        gr.Error: If invalid start or end times are provided or if FFmpeg is not found.
     """
     try:
         logging.info(f"Trimming audio from {start_time} to {end_time}")
@@ -343,27 +317,18 @@ def trim_audio(audio_path, start_time, end_time):
         audio_duration = len(audio) / 1000  # Duration in seconds
         # Default start and end times if None
-        if start_time is None:
-            start_time = 0
-        if end_time is None or end_time > audio_duration:
-            end_time = audio_duration
         # Validate times
-        if start_time < 0 or end_time <= 0:
-            raise gr.Error("Start time and end time must be positive.")
         if start_time >= end_time:
             raise gr.Error("End time must be greater than start time.")
-        if start_time > audio_duration:
-            raise gr.Error("Start time exceeds audio duration.")
-        trimmed_audio = audio[start_time * 1000:end_time * 1000]
         with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
             trimmed_audio.export(temp_audio_file.name, format="wav")
             logging.info(f"Trimmed audio saved to: {temp_audio_file.name}")
         return temp_audio_file.name
-    except FileNotFoundError as e:
-        logging.error(f"FFmpeg not found: {str(e)}")
-        raise gr.Error("FFmpeg not found. Please ensure that FFmpeg is installed and in your system PATH.")
     except Exception as e:
         logging.error(f"Error trimming audio: {str(e)}")
         raise gr.Error(f"Error trimming audio: {str(e)}")
@@ -371,10 +336,10 @@ def trim_audio(audio_path, start_time, end_time):
 def save_transcription(transcription):
     """
     Saves the transcription text to a temporary file.
     Args:
         transcription (str): The transcription text.
     Returns:
         str: The path to the transcription file.
     """
@@ -386,22 +351,19 @@ def save_transcription(transcription):
 def get_model_options(pipeline_type):
     """
     Returns a list of model IDs based on the selected pipeline type.
     Args:
-        pipeline_type (str): The type of pipeline ('faster-batched', 'faster-sequenced', 'transformers').
     Returns:
         list: A list of model IDs.
     """
-    if pipeline_type == "faster-batched":
-        return ["cstr/whisper-large-v3-turbo-int8_float32", "SYSTRAN/faster-whisper-large-v1", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
-    elif pipeline_type == "faster-sequenced":
-        return ["SYSTRAN/faster-whisper-large-v1", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
-    elif pipeline_type == "transformers":
-        return ["openai/whisper-large-v3", "openai/whisper-large-v2"]
     else:
         return []
 loaded_models = {}
 def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):

 import gradio as gr
 import os
 import time
 import tempfile
 import subprocess
 import requests
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import yt_dlp
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Check for CUDA availability
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
+logging.info(f"Using device: {device}")
 def download_audio(url, method_choice):
     """
     Downloads audio from a given URL using the specified method.
     Args:
         url (str): The URL of the audio.
         method_choice (str): The method to use for downloading audio.
     Returns:
         tuple: (path to the downloaded audio file, is_temp_file), or (error message, False).
     """
     logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
     try:
         if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
             audio_file = download_youtube_audio(url, method_choice)
         elif parsed_url.scheme == 'rtsp':
             audio_file = download_rtsp_audio(url)
         else:
             audio_file = download_direct_audio(url, method_choice)
         if not audio_file or not os.path.exists(audio_file):
             raise Exception(f"Failed to download audio from {url}")
+        return audio_file, True
     except Exception as e:
         logging.error(f"Error downloading audio: {str(e)}")
         return f"Error: {str(e)}", False
 def download_youtube_audio(url, method_choice):
     """
     Downloads audio from a YouTube URL using the specified method.
     Args:
         url (str): The YouTube URL.
+        method_choice (str): The method to use for downloading.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
         'yt-dlp': yt_dlp_method,
         'pytube': pytube_method,
     }
+    method = methods.get(method_choice, yt_dlp_method)
     try:
         logging.info(f"Attempting to download YouTube audio using {method_choice}")
         return method(url)
         logging.error(f"Error downloading using {method_choice}: {str(e)}")
         return None
+def yt_dlp_method(url):
     """
+    Downloads YouTube audio using yt-dlp.
     Args:
+        url (str): The YouTube URL.
     Returns:
+        str: Path to the downloaded audio file.
     """
+    logging.info("Using yt-dlp method")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'mp3',
             'preferredquality': '192',
         }],
+        'outtmpl': '%(id)s.%(ext)s',
     }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(url, download=True)
+        output_file = f"{info['id']}.mp3"
+        logging.info(f"Downloaded YouTube audio: {output_file}")
         return output_file
 def pytube_method(url):
     """
     Downloads audio using pytube.
     Args:
         url (str): The YouTube URL.
+    Returns:
+        str: Path to the downloaded audio file.
+    """
+    logging.info("Using pytube method")
+    from pytube import YouTube
+    yt = YouTube(url)
+    audio_stream = yt.streams.filter(only_audio=True).first()
+    out_file = audio_stream.download()
+    base, ext = os.path.splitext(out_file)
+    new_file = base + '.mp3'
+    os.rename(out_file, new_file)
+    logging.info(f"Downloaded and converted audio to: {new_file}")
+    return new_file
+def download_rtsp_audio(url):
+    """
+    Downloads audio from an RTSP URL using FFmpeg.
+    Args:
+        url (str): The RTSP URL.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
+    logging.info("Using FFmpeg to download RTSP stream")
+    output_file = tempfile.mktemp(suffix='.mp3')
+    command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
     try:
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        logging.info(f"Downloaded RTSP audio to: {output_file}")
+        return output_file
+    except subprocess.CalledProcessError as e:
+        logging.error(f"FFmpeg error: {e.stderr.decode()}")
+        return None
     except Exception as e:
+        logging.error(f"Error downloading RTSP audio: {str(e)}")
         return None
+def download_direct_audio(url, method_choice):
+    """
+    Downloads audio from a direct URL using the specified method.
+    Args:
+        url (str): The direct URL of the audio file.
+        method_choice (str): The method to use for downloading.
+    Returns:
+        str: Path to the downloaded audio file, or None if failed.
+    """
+    logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
+    methods = {
+        'wget': wget_method,
+        'requests': requests_method,
+        'yt-dlp': yt_dlp_direct_method,
+        'ffmpeg': ffmpeg_method,
+        'aria2': aria2_method,
     }
+    method = methods.get(method_choice, requests_method)
+    try:
+        return method(url)
+    except Exception as e:
+        logging.error(f"Error downloading direct audio: {str(e)}")
+        return None
 def requests_method(url):
     """
     Downloads audio using the requests library.
     Args:
         url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
     except Exception as e:
         logging.error(f"Error in requests_method: {str(e)}")
         return None
+def wget_method(url):
+    """
+    Downloads audio using the wget command-line tool.
     Args:
+        url (str): The URL of the audio file.
+    Returns:
+        str: Path to the downloaded audio file, or None if failed.
+    """
+    logging.info("Using wget method")
+    output_file = tempfile.mktemp(suffix='.mp3')
+    command = ['wget', '-O', output_file, url]
+    try:
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        logging.info(f"Downloaded audio to: {output_file}")
+        return output_file
+    except subprocess.CalledProcessError as e:
+        logging.error(f"Wget error: {e.stderr.decode()}")
+        return None
+    except Exception as e:
+        logging.error(f"Error in wget_method: {str(e)}")
+        return None
+def yt_dlp_direct_method(url):
+    """
+    Downloads audio using yt-dlp (supports various protocols and sites).
+    Args:
+        url (str): The URL of the audio or webpage containing audio.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
+    logging.info("Using yt-dlp direct method")
+    output_file = tempfile.mktemp(suffix='.mp3')
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': output_file,
+        'quiet': True,
+        'no_warnings': True,
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
     }
     try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        logging.info(f"Downloaded audio to: {output_file}")
+        return output_file
     except Exception as e:
+        logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
         return None
+def ffmpeg_method(url):
     """
+    Downloads audio using FFmpeg.
     Args:
         url (str): The URL of the audio file.
+    Returns:
+        str: Path to the downloaded audio file, or None if failed.
+    """
+    logging.info("Using ffmpeg method")
+    output_file = tempfile.mktemp(suffix='.mp3')
+    command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
+    try:
+        subprocess.run(command, check=True, capture_output=True, text=True)
+        logging.info(f"Downloaded and converted audio to: {output_file}")
+        return output_file
+    except subprocess.CalledProcessError as e:
+        logging.error(f"FFmpeg error: {e.stderr}")
+        return None
+    except Exception as e:
+        logging.error(f"Error in ffmpeg_method: {str(e)}")
+        return None
+def aria2_method(url):
+    """
+    Downloads audio using aria2.
+    Args:
+        url (str): The URL of the audio file.
     Returns:
         str: Path to the downloaded audio file, or None if failed.
     """
+    logging.info("Using aria2 method")
     output_file = tempfile.mktemp(suffix='.mp3')
+    command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
     try:
+        subprocess.run(command, check=True, capture_output=True, text=True)
         logging.info(f"Downloaded audio to: {output_file}")
         return output_file
+    except subprocess.CalledProcessError as e:
+        logging.error(f"Aria2 error: {e.stderr}")
+        return None
     except Exception as e:
+        logging.error(f"Error in aria2_method: {str(e)}")
         return None
 def trim_audio(audio_path, start_time, end_time):
     """
+    Trims an audio file to the specified start and end times.
     Args:
         audio_path (str): Path to the audio file.
         start_time (float): Start time in seconds.
         end_time (float): End time in seconds.
     Returns:
         str: Path to the trimmed audio file.
     Raises:
+        gr.Error: If invalid start or end times are provided.
     """
     try:
         logging.info(f"Trimming audio from {start_time} to {end_time}")
         audio_duration = len(audio) / 1000  # Duration in seconds
         # Default start and end times if None
+        start_time = max(0, start_time) if start_time is not None else 0
+        end_time = min(audio_duration, end_time) if end_time is not None else audio_duration
         # Validate times
         if start_time >= end_time:
             raise gr.Error("End time must be greater than start time.")
+        trimmed_audio = audio[int(start_time * 1000):int(end_time * 1000)]
         with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file:
             trimmed_audio.export(temp_audio_file.name, format="wav")
             logging.info(f"Trimmed audio saved to: {temp_audio_file.name}")
         return temp_audio_file.name
     except Exception as e:
         logging.error(f"Error trimming audio: {str(e)}")
         raise gr.Error(f"Error trimming audio: {str(e)}")
 def save_transcription(transcription):
     """
     Saves the transcription text to a temporary file.
     Args:
         transcription (str): The transcription text.
     Returns:
         str: The path to the transcription file.
     """
 def get_model_options(pipeline_type):
     """
     Returns a list of model IDs based on the selected pipeline type.
     Args:
+        pipeline_type (str): The type of pipeline.
     Returns:
         list: A list of model IDs.
     """
+    if pipeline_type == "transformers":
+        return ["openai/whisper-large-v3", "openai/whisper-large-v2", "openai/whisper-medium", "openai/whisper-small"]
     else:
         return []
+# Dictionary to store loaded models
 loaded_models = {}
 def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):