Spaces:

abidlabs
/

transcription-delight

Runtime error

App Files Files Community

abidlabs HF staff commited on Jul 2, 2024

Commit

88322f7

1 Parent(s): 9421b23

changes

Browse files

Files changed (6) hide show

__pycache__/app.cpython-312.pyc +0 -0
__pycache__/transcribe.cpython-312.pyc +0 -0
__pycache__/utils.cpython-312.pyc +0 -0
app.py +47 -28
transcribe.py +27 -2
utils.py +18 -2

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (4.55 kB). View file

__pycache__/transcribe.cpython-312.pyc ADDED Viewed

Binary file (1.61 kB). View file

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (3.8 kB). View file

app.py CHANGED Viewed

@@ -3,33 +3,52 @@ import utils
 import transcribe
 with gr.Blocks(theme="base") as demo:
-    gr.Markdown("<center><h1> 🔊 Transcribe Anything </h1></center>")
-    with gr.Tab("Input"):
-        with gr.Row():
-            with gr.Column():
-                source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="Audio")
-                @gr.render(inputs=source)
-                def show_source(s):
-                    if s == "Audio":
-                        gr.Audio()
-                    elif s == "Video":
-                        gr.Video()
-                    elif s == "YouTube URL":
-                        t = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
-                        h = gr.HTML(label="Video preview")
-                        t.change(utils.convert_to_embed_url, t, h)
-            with gr.Column():
-                gr.Dropdown(label="Languages", choices=["(Autodetect)", "English"], value="(Autodetect)")
-                gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
-                gr.Checkbox(label="Diarize Speakers (coming soon)", interactive=False)
-        transcribe_btn = gr.Button("Transcribe!")
-    with gr.Tab("Result"):
-        pass
-    with gr.Tab("Summarize"):
-        pass
-    with gr.Tab("Chat"):
-        pass
 demo.launch()

 import transcribe
 with gr.Blocks(theme="base") as demo:
+    gr.Markdown("<center><h1> 🔊 Transcription Delight </h1></center>")
+    with gr.Tabs(selected="result") as tabs:
+        with gr.Tab("Input"):
+            with gr.Row():
+                with gr.Column():
+                    source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="audio")
+                    @gr.render(inputs=source)
+                    def show_source(s):
+                        if s == "audio":
+                            source_component = gr.Audio(type="filepath")
+                        elif s == "video":
+                            source_component = gr.Video()
+                        else:
+                            source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
+                            preview = gr.HTML(label="Video preview")
+                            source_component.change(utils.convert_to_embed_url, source_component, preview)
+                        transcribe_btn.click(
+                            lambda : gr.Tabs(selected="result"),
+                            None,
+                            tabs
+                        ).then(
+                            utils.generate_audio,
+                            [source, source_component],
+                            [download_audio],
+                            show_progress="minimal"
+                        ).then(
+                            transcribe.transcribe,
+                            [download_audio],
+                            [preliminary_transcript],
+                            show_progress="hidden"
+                        )
+                with gr.Column():
+                    gr.Dropdown(label="Languages", choices=["(Autodetect)", "English"], value="(Autodetect)")
+                    gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
+                    gr.Checkbox(label="Diarize Speakers (coming soon)", interactive=False)
+            transcribe_btn = gr.Button("Transcribe audio ✨", variant="primary")
+            source.change(utils.transcribe_button, source, transcribe_btn)
+        with gr.Tab("Result", id="result"):
+            with gr.Row():
+                with gr.Column():
+                    download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
+                    preliminary_transcript = gr.Textbox(info="Preliminary transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
+                with gr.Column():
+                    gr.Markdown("*Final transcript will appear here*")
 demo.launch()

transcribe.py CHANGED Viewed

@@ -1,2 +1,27 @@
-def transcribe():
-    pass

+from pydub import AudioSegment
+from pydub.utils import make_chunks
+import os
+import whisper
+model = whisper.load_model("base")
+def transcribe(audio_path):
+    transcripts = []
+    for transcript in transcribe_audio_in_chunks(audio_path, chunk_length_ms=30000):
+        transcripts.append(transcript)
+        yield " ".join(transcripts)
+def transcribe_segment(segment, segment_number):
+    temp_filename = f"temp_segment_{segment_number}.wav"
+    segment.export(temp_filename, format="wav")
+    result = model.transcribe(temp_filename)
+    os.remove(temp_filename)
+    return result["text"]
+def transcribe_audio_in_chunks(audio_path, chunk_length_ms):
+    audio = AudioSegment.from_file(audio_path)
+    chunks = make_chunks(audio, chunk_length_ms)
+    for i, chunk in enumerate(chunks):
+        transcription = transcribe_segment(chunk, i)
+        yield transcription

utils.py CHANGED Viewed

@@ -20,11 +20,11 @@ def download_audio_from_youtube(video_url):
         yt = YouTube(video_url)
         audio_stream = yt.streams.filter(only_audio=True).first()
         downloaded_file = audio_stream.download(".")
-        base, ext = os.path.splitext(downloaded_file)
         mp3_file = base + '.mp3'
         AudioSegment.from_file(downloaded_file).export(mp3_file, format='mp3')
         os.remove(downloaded_file)
-        return base
     except Exception as e:
         gr.Error(f"An error occurred: {e}")
@@ -41,3 +41,19 @@ def convert_video_to_audio(input_file):
     except ffmpeg.Error as e:
         gr.Error(f"An error occurred: {e}")

         yt = YouTube(video_url)
         audio_stream = yt.streams.filter(only_audio=True).first()
         downloaded_file = audio_stream.download(".")
+        base, _ = os.path.splitext(downloaded_file)
         mp3_file = base + '.mp3'
         AudioSegment.from_file(downloaded_file).export(mp3_file, format='mp3')
         os.remove(downloaded_file)
+        return mp3_file
     except Exception as e:
         gr.Error(f"An error occurred: {e}")
     except ffmpeg.Error as e:
         gr.Error(f"An error occurred: {e}")
+def transcribe_button(source):
+    if source == "audio":
+        return gr.Button("Transcribe audio ✨")
+    else:
+        return gr.Button("Transcribe video ✨")
+def generate_audio(source, source_file):
+    if source == "audio":
+        audio_file = source_file
+    elif source == "video":
+        gr.Info("Converting video to audio...")
+        audio_file = convert_video_to_audio(source_file)
+    else:
+        gr.Info("Downloading audio from YouTube...")
+        audio_file = download_audio_from_youtube(source_file)
+    return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)