File size: 1,600 Bytes
59c41a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1d7aff
59c41a7
 
 
 
 
 
 
 
f1d7aff
59c41a7
 
aa9af15
59c41a7
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from transformers import pipeline

asr = pipeline(task="automatic-speech-recognition",
               model= "distil-whisper/distil-small.en")

import gradio as gr
demo = gr.Blocks()

def transcribe_long_form(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry")
        return
    output = asr(filepath,
                 max_new_tokens=256,
                 chunk_length_s=30,
                 batch_size=4,)
    return output['text']

mic_transcribe = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=7),
    allow_flagging="never",
    description="Speak into the microphone or upload an audio file to transcribe it into text. This model uses a state-of-the-art speech recognition algorithm to recognize spoken words and phrases")

file_transcribe = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=7),
    allow_flagging="never",
    description="Speak into the microphone or upload an audio file to transcribe it into text. This model uses a state-of-the-art speech recognition algorithm to recognize spoken words and phrases")



with demo:
    gr.TabbedInterface(
        [mic_transcribe,
         file_transcribe],
        ["Transcribe Microphone",
         "Transcribe Audio File"],
        title="Speak out Loud - Automatic Speech Recognition"
    )
demo.launch()