riteshkr commited on
Commit
912008d
1 Parent(s): 8c140fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -1,25 +1,29 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline
4
 
5
  # Check if a GPU is available and set the device
6
  device = 0 if torch.cuda.is_available() else -1
7
 
8
  # Load the ASR model using the Hugging Face pipeline
9
- model_id = "riteshkr/quantized-whisper-large-v3" # Update with your model path or ID
10
- pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
 
11
 
12
- # Define the transcription function with batching support
 
 
 
 
 
13
  def transcribe_speech(filepath):
14
- # Adjust batch size based on device (smaller batch for CPU)
15
  batch_size = 16 if torch.cuda.is_available() else 4
16
-
17
  output = pipe(
18
  filepath,
19
  max_new_tokens=256,
20
  generate_kwargs={
21
- "task": "transcribe",
22
- "language": "english",
23
  },
24
  chunk_length_s=30,
25
  batch_size=batch_size, # Dynamic batch size
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
4
 
5
  # Check if a GPU is available and set the device
6
  device = 0 if torch.cuda.is_available() else -1
7
 
8
  # Load the ASR model using the Hugging Face pipeline
9
+ model_id = "riteshkr/quantized-whisper-large-v3"
10
+ model = WhisperForConditionalGeneration.from_pretrained(model_id)
11
+ processor = WhisperProcessor.from_pretrained(model_id)
12
 
13
+ # Set the language to English using forced_decoder_ids
14
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
15
+
16
+ pipe = pipeline("automatic-speech-recognition", model=model, processor=processor, device=device)
17
+
18
+ # Define the transcription function
19
  def transcribe_speech(filepath):
 
20
  batch_size = 16 if torch.cuda.is_available() else 4
21
+
22
  output = pipe(
23
  filepath,
24
  max_new_tokens=256,
25
  generate_kwargs={
26
+ "forced_decoder_ids": forced_decoder_ids, # Set language through forced_decoder_ids
 
27
  },
28
  chunk_length_s=30,
29
  batch_size=batch_size, # Dynamic batch size