Spaces:

riteshkr
/

transcribe-using-q-whi-L-v3

Sleeping

App Files Files Community

transcribe-using-q-whi-L-v3 / app.py

riteshkr

Update app.py

09b9573 verified 2 months ago

raw

history blame contribute delete

2.05 kB

	import gradio as gr
	import torch
	from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor

	# Check if a GPU is available and set the device
	device = 0 if torch.cuda.is_available() else -1

	# Load the ASR model and processor
	model_id = "riteshkr/quantized-whisper-large-v3"
	model = WhisperForConditionalGeneration.from_pretrained(model_id)
	processor = WhisperProcessor.from_pretrained(model_id)

	# Set the language to English using forced_decoder_ids
	forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")

	# Create the pipeline, explicitly setting the tokenizer and feature extractor
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer, # Use the processor's tokenizer
	feature_extractor=processor.feature_extractor, # Use the processor's feature extractor
	device=device
	)

	# Define the transcription function
	def transcribe_speech(filepath):
	batch_size = 16 if torch.cuda.is_available() else 4

	output = pipe(
	filepath,
	max_new_tokens=256,
	generate_kwargs={
	"forced_decoder_ids": forced_decoder_ids, # Set language through forced_decoder_ids
	},
	chunk_length_s=30,
	batch_size=batch_size, # Dynamic batch size
	)
	return output["text"]

	# Define the Gradio interface for microphone input
	mic_transcribe = gr.Interface(
	fn=transcribe_speech,
	inputs=gr.Audio(sources="microphone", type="filepath"),
	outputs=gr.Textbox(),
	)

	# Define the Gradio interface for file upload input
	file_transcribe = gr.Interface(
	fn=transcribe_speech,
	inputs=gr.Audio(sources="upload", type="filepath"),
	outputs=gr.Textbox(),
	)

	# Creating the tabbed layout using Blocks
	demo = gr.Blocks()

	with demo:
	gr.TabbedInterface(
	[mic_transcribe, file_transcribe],
	["Transcribe Microphone", "Transcribe Audio File"],
	)

	# Launch the app with debugging enabled
	if __name__ == "__main__":
	demo.launch(debug=True, share=True)