Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
from datasets import load_dataset | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model_id = "openai/whisper-large-v3-turbo" | |
model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True | |
) | |
model.to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
torch_dtype=torch_dtype, | |
device=device, | |
) | |
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation") | |
# sample = dataset[0]["audio"] | |
# result = pipe(sample) | |
# transcript = result["text"] | |
import os | |
import gradio as gr | |
def launch(input): | |
out = pipe(input) | |
result = pipe(out[0]) | |
transcript = result["text"] | |
# context_str = out[0]['generated_text'] | |
# translate_str = translate(context_str, 'en', 'sq') | |
return translate_str | |
iface = gr.Interface(launch, | |
inputs=gr.Audio(label="Audio", source="microphone", type="filepath", elem_id='audio'), | |
outputs="text") | |
iface.launch(share=True) | |
# iface.launch(share=True, | |
# server_port=int(os.environ['PORT1'])) | |
iface.close() | |
# def click_js(): | |
# return """function audioRecord() { | |
# var xPathRes = document.evaluate ('//*[@id="audio"]//button', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); | |
# xPathRes.singleNodeValue.click();}""" | |
# def action(btn): | |
# """Changes button text on click""" | |
# if btn == 'Speak': return 'Stop' | |
# else: return 'Speak' | |
# def check_btn(btn): | |
# """Checks for correct button text before invoking transcribe()""" | |
# if btn != 'Speak': raise Exception('Recording...') | |
# def transcribe(): | |
# return 'Success' | |
# with gr.Blocks() as demo: | |
# msg = gr.Textbox() | |
# audio_box = gr.Audio(label="Audio", source="microphone", type="filepath", elem_id='audio') | |
# with gr.Row(): | |
# audio_btn = gr.Button('Speak') | |
# clear = gr.Button("Clear") | |
# audio_btn.click(fn=action, inputs=audio_btn, outputs=audio_btn).\ | |
# then(fn=lambda: None, _js=click_js()).\ | |
# then(fn=check_btn, inputs=audio_btn).\ | |
# success(fn=transcribe, outputs=msg) | |
# clear.click(lambda: None, None, msg, queue=False) | |
# demo.queue().launch(debug=True) | |
# print(result["text"]) |