Spaces:

ayaanzaveri
/

whisper-lit

Runtime error

App Files Files Community

whisper-lit / app.py

ayaanzaveri

Add application file

b144145 over 1 year ago

raw

history blame

No virus

5.66 kB

	import os
	import whisper
	import streamlit as st
	from pydub import AudioSegment

	st.set_page_config(
	page_title="Whisper based ASR",
	page_icon="musical_note",
	layout="wide",
	initial_sidebar_state="auto",
	)

	audio_tags = {'comments': 'Converted using pydub!'}

	upload_path = "uploads/"
	download_path = "downloads/"
	transcript_path = "transcripts/"

	@st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
	def to_mp3(audio_file, output_audio_file, upload_path, download_path):
	## Converting Different Audio Formats To MP3 ##
	if audio_file.name.split('.')[-1].lower()=="wav":
	audio_data = AudioSegment.from_wav(os.path.join(upload_path,audio_file.name))
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="mp3":
	audio_data = AudioSegment.from_mp3(os.path.join(upload_path,audio_file.name))
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="ogg":
	audio_data = AudioSegment.from_ogg(os.path.join(upload_path,audio_file.name))
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="wma":
	audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"wma")
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="aac":
	audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"aac")
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="flac":
	audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"flac")
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="flv":
	audio_data = AudioSegment.from_flv(os.path.join(upload_path,audio_file.name))
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

	elif audio_file.name.split('.')[-1].lower()=="mp4":
	audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"mp4")
	audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
	return output_audio_file

	@st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
	def process_audio(filename, model_type):
	model = whisper.load_model(model_type)
	result = model.transcribe(filename)
	return result["text"]

	@st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
	def save_transcript(transcript_data, txt_file):
	with open(os.path.join(transcript_path, txt_file),"w") as f:
	f.write(transcript_data)

	st.title("🗣 Automatic Speech Recognition using whisper by OpenAI ✨")
	st.info('✨ Supports all popular audio formats - WAV, MP3, MP4, OGG, WMA, AAC, FLAC, FLV 😉')
	uploaded_file = st.file_uploader("Upload audio file", type=["wav","mp3","ogg","wma","aac","flac","mp4","flv"])

	audio_file = None

	if uploaded_file is not None:
	audio_bytes = uploaded_file.read()
	with open(os.path.join(upload_path,uploaded_file.name),"wb") as f:
	f.write((uploaded_file).getbuffer())
	with st.spinner(f"Processing Audio ... 💫"):
	output_audio_file = uploaded_file.name.split('.')[0] + '.mp3'
	output_audio_file = to_mp3(uploaded_file, output_audio_file, upload_path, download_path)
	audio_file = open(os.path.join(download_path,output_audio_file), 'rb')
	audio_bytes = audio_file.read()
	print("Opening ",audio_file)
	st.markdown("---")
	col1, col2 = st.columns(2)
	with col1:
	st.markdown("Feel free to play your uploaded audio file 🎼")
	st.audio(audio_bytes)
	with col2:
	whisper_model_type = st.radio("Please choose your model type", ('Tiny', 'Base', 'Small', 'Medium', 'Large'))

	if st.button("Generate Transcript"):
	with st.spinner(f"Generating Transcript... 💫"):
	transcript = process_audio(str(os.path.abspath(os.path.join(download_path,output_audio_file))), whisper_model_type.lower())

	output_txt_file = str(output_audio_file.split('.')[0]+".txt")

	save_transcript(transcript, output_txt_file)
	output_file = open(os.path.join(transcript_path,output_txt_file),"r")
	output_file_data = output_file.read()

	if st.download_button(
	label="Download Transcript 📝",
	data=output_file_data,
	file_name=output_txt_file,
	mime='text/plain'
	):
	st.balloons()
	st.success('✅ Download Successful !!')

	else:
	st.warning('⚠ Please upload your audio file 😯')

	st.markdown("<br><hr><center>Made with ❤️ by <a href='mailto:[email protected]?subject=ASR Whisper WebApp!&body=Please specify the issue you are facing with the app.'><strong>Prateek Ralhan</strong></a> with the help of [whisper](https://github.com/openai/whisper) built by [OpenAI](https://github.com/openai) ✨</center><hr>", unsafe_allow_html=True)