Spaces:

pratikshahp
/

audio-to-text-conversion

Runtime error

pratikshahp commited on Mar 27, 2024

Commit

4d2986c

verified ·

1 Parent(s): b521892

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,17 +20,16 @@ def transcribe_audio(audio_bytes):
     audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
     # Provide inputs to the processor
-    inputs = processor(audio=audio_tensor, sampling_rate=16000, return_tensors="pt")
-    # Generate logits from the model
-    logits = model(**inputs).logits
-    # Decode the predicted IDs to get the transcription
-    predicted_ids = torch.argmax(logits, dim=-1)
-    transcription = processor.decode(predicted_ids[0])
-    return transcription
 # Streamlit app
 st.title("Audio to Text Transcription..")

     audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
     # Provide inputs to the processor
+    #inputs = processor(audio=audio_tensor, sampling_rate=16000, return_tensors="pt")
+    input_features = processor(audio_tensor, sampling_rate=16000, return_tensors="pt").input_features
+   # generate token ids
+    predicted_ids = model.generate(input_features)
+    # decode token ids to text
+    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
+    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+    return transcription
 # Streamlit app
 st.title("Audio to Text Transcription..")