Spaces:

riteshkr
/

transcribe-using-q-whi-L-v3

Sleeping

riteshkr commited on Sep 13

Commit

912008d

•

1 Parent(s): 8c140fb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,25 +1,29 @@
 import gradio as gr
 import torch
-from transformers import pipeline
 # Check if a GPU is available and set the device
 device = 0 if torch.cuda.is_available() else -1
 # Load the ASR model using the Hugging Face pipeline
-model_id = "riteshkr/quantized-whisper-large-v3"  # Update with your model path or ID
-pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
-# Define the transcription function with batching support
 def transcribe_speech(filepath):
-    # Adjust batch size based on device (smaller batch for CPU)
     batch_size = 16 if torch.cuda.is_available() else 4
     output = pipe(
         filepath,
         max_new_tokens=256,
         generate_kwargs={
-            "task": "transcribe",
-            "language": "english",
         },
         chunk_length_s=30,
         batch_size=batch_size,  # Dynamic batch size

 import gradio as gr
 import torch
+from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
 # Check if a GPU is available and set the device
 device = 0 if torch.cuda.is_available() else -1
 # Load the ASR model using the Hugging Face pipeline
+model_id = "riteshkr/quantized-whisper-large-v3"
+model = WhisperForConditionalGeneration.from_pretrained(model_id)
+processor = WhisperProcessor.from_pretrained(model_id)
+# Set the language to English using forced_decoder_ids
+forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
+pipe = pipeline("automatic-speech-recognition", model=model, processor=processor, device=device)
+# Define the transcription function
 def transcribe_speech(filepath):
     batch_size = 16 if torch.cuda.is_available() else 4
     output = pipe(
         filepath,
         max_new_tokens=256,
         generate_kwargs={
+            "forced_decoder_ids": forced_decoder_ids,  # Set language through forced_decoder_ids
         },
         chunk_length_s=30,
         batch_size=batch_size,  # Dynamic batch size