File size: 1,372 Bytes
62e68d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import streamlit as st
import whisper
import tempfile
from pydub import AudioSegment
# Define available models
available_models = ["tiny", "base", "small", "medium", "large"]
st.title("Voice Recognition App")
st.write("Upload an audio file and choose a Whisper model to transcribe it to text.")
# Model selection dropdown
model_choice = st.selectbox("Choose a Whisper model", available_models)
# Load the selected Whisper model
st.write(f"Loading {model_choice} model...")
model = whisper.load_model(model_choice)
st.write(f"{model_choice} model loaded successfully.")
# File uploader for audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
# Convert audio file to a format supported by Whisper (if necessary)
audio = AudioSegment.from_file(temp_file_path)
temp_wav_path = tempfile.mktemp(suffix=".wav")
audio.export(temp_wav_path, format="wav")
st.audio(uploaded_file, format="audio/wav")
st.write("Transcribing audio...")
# Transcribe audio using Whisper model
result = model.transcribe(temp_wav_path)
st.write("Transcription:")
st.write(result["text"])
|