Spaces:

datasciencedojo
/

AudioTranscription

Running

App Files Files Community

datasciencedojo commited on 16 days ago

Commit

60c7e9e

verified ·

1 Parent(s): 40d7084

upgraded code according to gradio 5.11.0

Browse files

Files changed (1) hide show

app.py +31 -28

app.py CHANGED Viewed

@@ -1,11 +1,9 @@
 import torch
 import gradio as gr
 import pytube as pt
 from transformers import pipeline
-from huggingface_hub import model_info
-MODEL_NAME = "openai/whisper-small" #this always needs to stay in line 8 :D sorry for the hackiness
 lang = "en"
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -20,22 +18,31 @@ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(lan
 def transcribe(microphone, file_upload):
     warn_output = ""
-    if (microphone is not None) and (file_upload is not None):
         warn_output = (
             "WARNING: You've uploaded an audio file and used the microphone. "
-            "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
         )
-    elif (microphone is None) and (file_upload is None):
-        return "ERROR: You have to either use the microphone or upload an audio file"
-    file = microphone if microphone is not None else file_upload
     text = pipe(file)["text"]
     return warn_output + text
-demo = gr.Blocks()
 css = """
 footer {display:none !important}
@@ -72,22 +79,18 @@ button.gallery-item:hover {
 }
 """
-examples = [
-    ['Martin Luther king - FREE AT LAST.mp3'], ['Winston Churchul - ARCH OF VICTOR.mp3'], ['Voice of Neil Armstrong.mp3'], ['Speeh by George Washington.mp3'], ['Speech by John Kennedy.mp3'], ['Al Gore on Inventing the Internet.mp3'], ['Alan Greenspan.mp3'], ['Neil Armstrong - ONE SMALL STEP.mp3'], ['General Eisenhower announcing D-Day landing.mp3'], ['Hey Siri.wav']
-]
-mf_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Audio(source="upload", type="filepath", optional=True)
-    ],
-    outputs="text",
-    layout="horizontal",
-    theme="huggingface",
-    allow_flagging="never",
-    examples = examples,
-    css = css
-).launch(enable_queue=True)
-#used openai/whisper model

 import torch
 import gradio as gr
 import pytube as pt
 from transformers import pipeline
+MODEL_NAME = "openai/whisper-small"  # this always needs to stay in line 8 :D sorry for the hackiness
 lang = "en"
 device = 0 if torch.cuda.is_available() else "cpu"
 def transcribe(microphone, file_upload):
     warn_output = ""
+    if microphone and file_upload:
         warn_output = (
             "WARNING: You've uploaded an audio file and used the microphone. "
+            "The recorded file from the microphone will be used, and the uploaded audio will be discarded.\n"
         )
+    elif not (microphone or file_upload):
+        return "ERROR: You have to either use the microphone or upload an audio file."
+    file = microphone if microphone else file_upload
     text = pipe(file)["text"]
     return warn_output + text
+examples = [
+    ['Martin Luther king - FREE AT LAST.mp3'],
+    ['Winston Churchul - ARCH OF VICTOR.mp3'],
+    ['Voice of Neil Armstrong.mp3'],
+    ['Speeh by George Washington.mp3'],
+    ['Speech by John Kennedy.mp3'],
+    ['Al Gore on Inventing the Internet.mp3'],
+    ['Alan Greenspan.mp3'],
+    ['Neil Armstrong - ONE SMALL STEP.mp3'],
+    ['General Eisenhower announcing D-Day landing.mp3'],
+    ['Hey Siri.wav']
+]
 css = """
 footer {display:none !important}
 }
 """
+with gr.Blocks(css=css) as demo:
+    with gr.Row():
+        gr.Markdown("## Speech Recognition Demo")
+    with gr.Row():
+        mic_input = gr.Audio(source="microphone", type="filepath", label="Microphone Input", interactive=True)
+        file_upload = gr.Audio(source="upload", type="filepath", label="File Upload", interactive=True)
+    with gr.Row():
+        output = gr.Textbox(label="Transcription Output")
+    with gr.Row():
+        gr.Examples(examples=examples, inputs=[file_upload], label="Examples")
+    transcribe_button = gr.Button("Transcribe")
+    transcribe_button.click(transcribe, inputs=[mic_input, file_upload], outputs=[output])
+demo.launch()