Spaces:

aiola
/

whisper-ner-v1

Running on Zero

aiola commited on 27 days ago

Commit

0d8c379

•

1 Parent(s): 8ec7a3f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,9 @@ import re  # Import regex library
 # Load model and processor
 processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
-model = WhisperForConditionalGeneration.from_pretrained("aiola/whisper-ner-v1").to("cuda")
 def unify_ner_text(text, symbols_to_replace=("/", " ", ":", "_")):
     """Process and standardize entity text by replacing certain symbols and normalizing spaces."""
@@ -26,6 +28,8 @@ def transcribe_and_recognize_entities(audio_file, prompt):
     signal = signal.cpu()  # Ensure signal is on CPU for processing
     input_features = processor(signal, sampling_rate=target_sample_rate, return_tensors="pt").input_features
     # Split the prompt into individual NER types and process each one
     ner_types = prompt.split(',')
@@ -34,10 +38,10 @@ def transcribe_and_recognize_entities(audio_file, prompt):
     print(f"Prompt after unify_ner_text: {prompt}")
     prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt")
-    prompt_ids = prompt_ids.to("cuda")
     predicted_ids = model.generate(
-        input_features.to("cuda"),
         max_new_tokens=256,
         prompt_ids=prompt_ids,
         language='en',  # Ensure transcription is translated to English

 # Load model and processor
 processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
+model = WhisperForConditionalGeneration.from_pretrained("aiola/whisper-ner-v1")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
 def unify_ner_text(text, symbols_to_replace=("/", " ", ":", "_")):
     """Process and standardize entity text by replacing certain symbols and normalizing spaces."""
     signal = signal.cpu()  # Ensure signal is on CPU for processing
     input_features = processor(signal, sampling_rate=target_sample_rate, return_tensors="pt").input_features
+    input_features = input_features.to(device)
     # Split the prompt into individual NER types and process each one
     ner_types = prompt.split(',')
     print(f"Prompt after unify_ner_text: {prompt}")
     prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt")
+    prompt_ids = prompt_ids.to(device)
     predicted_ids = model.generate(
+        input_features,
         max_new_tokens=256,
         prompt_ids=prompt_ids,
         language='en',  # Ensure transcription is translated to English