Spaces:

riteshkr
/

transcribe-using-q-whi-L-v3

Sleeping

riteshkr commited on Sep 13

Commit

8c140fb

•

1 Parent(s): 3a35350

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,21 +1,28 @@
 import gradio as gr
 from transformers import pipeline
 # Load the ASR model using the Hugging Face pipeline
 model_id = "riteshkr/quantized-whisper-large-v3"  # Update with your model path or ID
-pipe = pipeline("automatic-speech-recognition", model=model_id)
-# Define the transcription function
 def transcribe_speech(filepath):
     output = pipe(
         filepath,
         max_new_tokens=256,
         generate_kwargs={
             "task": "transcribe",
             "language": "english",
-        },  # Update the language as per your model's fine-tuning
         chunk_length_s=30,
-        batch_size=8,
     )
     return output["text"]

 import gradio as gr
+import torch
 from transformers import pipeline
+# Check if a GPU is available and set the device
+device = 0 if torch.cuda.is_available() else -1
 # Load the ASR model using the Hugging Face pipeline
 model_id = "riteshkr/quantized-whisper-large-v3"  # Update with your model path or ID
+pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
+# Define the transcription function with batching support
 def transcribe_speech(filepath):
+    # Adjust batch size based on device (smaller batch for CPU)
+    batch_size = 16 if torch.cuda.is_available() else 4
     output = pipe(
         filepath,
         max_new_tokens=256,
         generate_kwargs={
             "task": "transcribe",
             "language": "english",
+        },
         chunk_length_s=30,
+        batch_size=batch_size,  # Dynamic batch size
     )
     return output["text"]