ayaanzaveri commited on
Commit
b144145
β€’
1 Parent(s): e9c4b9d

Add application file

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import whisper
3
+ import streamlit as st
4
+ from pydub import AudioSegment
5
+
6
+ st.set_page_config(
7
+ page_title="Whisper based ASR",
8
+ page_icon="musical_note",
9
+ layout="wide",
10
+ initial_sidebar_state="auto",
11
+ )
12
+
13
+ audio_tags = {'comments': 'Converted using pydub!'}
14
+
15
+ upload_path = "uploads/"
16
+ download_path = "downloads/"
17
+ transcript_path = "transcripts/"
18
+
19
+ @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
20
+ def to_mp3(audio_file, output_audio_file, upload_path, download_path):
21
+ ## Converting Different Audio Formats To MP3 ##
22
+ if audio_file.name.split('.')[-1].lower()=="wav":
23
+ audio_data = AudioSegment.from_wav(os.path.join(upload_path,audio_file.name))
24
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
25
+
26
+ elif audio_file.name.split('.')[-1].lower()=="mp3":
27
+ audio_data = AudioSegment.from_mp3(os.path.join(upload_path,audio_file.name))
28
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
29
+
30
+ elif audio_file.name.split('.')[-1].lower()=="ogg":
31
+ audio_data = AudioSegment.from_ogg(os.path.join(upload_path,audio_file.name))
32
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
33
+
34
+ elif audio_file.name.split('.')[-1].lower()=="wma":
35
+ audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"wma")
36
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
37
+
38
+ elif audio_file.name.split('.')[-1].lower()=="aac":
39
+ audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"aac")
40
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
41
+
42
+ elif audio_file.name.split('.')[-1].lower()=="flac":
43
+ audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"flac")
44
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
45
+
46
+ elif audio_file.name.split('.')[-1].lower()=="flv":
47
+ audio_data = AudioSegment.from_flv(os.path.join(upload_path,audio_file.name))
48
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
49
+
50
+ elif audio_file.name.split('.')[-1].lower()=="mp4":
51
+ audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"mp4")
52
+ audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
53
+ return output_audio_file
54
+
55
+ @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
56
+ def process_audio(filename, model_type):
57
+ model = whisper.load_model(model_type)
58
+ result = model.transcribe(filename)
59
+ return result["text"]
60
+
61
+ @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
62
+ def save_transcript(transcript_data, txt_file):
63
+ with open(os.path.join(transcript_path, txt_file),"w") as f:
64
+ f.write(transcript_data)
65
+
66
+ st.title("πŸ—£ Automatic Speech Recognition using whisper by OpenAI ✨")
67
+ st.info('✨ Supports all popular audio formats - WAV, MP3, MP4, OGG, WMA, AAC, FLAC, FLV πŸ˜‰')
68
+ uploaded_file = st.file_uploader("Upload audio file", type=["wav","mp3","ogg","wma","aac","flac","mp4","flv"])
69
+
70
+ audio_file = None
71
+
72
+ if uploaded_file is not None:
73
+ audio_bytes = uploaded_file.read()
74
+ with open(os.path.join(upload_path,uploaded_file.name),"wb") as f:
75
+ f.write((uploaded_file).getbuffer())
76
+ with st.spinner(f"Processing Audio ... πŸ’«"):
77
+ output_audio_file = uploaded_file.name.split('.')[0] + '.mp3'
78
+ output_audio_file = to_mp3(uploaded_file, output_audio_file, upload_path, download_path)
79
+ audio_file = open(os.path.join(download_path,output_audio_file), 'rb')
80
+ audio_bytes = audio_file.read()
81
+ print("Opening ",audio_file)
82
+ st.markdown("---")
83
+ col1, col2 = st.columns(2)
84
+ with col1:
85
+ st.markdown("Feel free to play your uploaded audio file 🎼")
86
+ st.audio(audio_bytes)
87
+ with col2:
88
+ whisper_model_type = st.radio("Please choose your model type", ('Tiny', 'Base', 'Small', 'Medium', 'Large'))
89
+
90
+ if st.button("Generate Transcript"):
91
+ with st.spinner(f"Generating Transcript... πŸ’«"):
92
+ transcript = process_audio(str(os.path.abspath(os.path.join(download_path,output_audio_file))), whisper_model_type.lower())
93
+
94
+ output_txt_file = str(output_audio_file.split('.')[0]+".txt")
95
+
96
+ save_transcript(transcript, output_txt_file)
97
+ output_file = open(os.path.join(transcript_path,output_txt_file),"r")
98
+ output_file_data = output_file.read()
99
+
100
+ if st.download_button(
101
+ label="Download Transcript πŸ“",
102
+ data=output_file_data,
103
+ file_name=output_txt_file,
104
+ mime='text/plain'
105
+ ):
106
+ st.balloons()
107
+ st.success('βœ… Download Successful !!')
108
+
109
+ else:
110
+ st.warning('⚠ Please upload your audio file 😯')
111
+
112
+ st.markdown("<br><hr><center>Made with ❀️ by <a href='mailto:[email protected]?subject=ASR Whisper WebApp!&body=Please specify the issue you are facing with the app.'><strong>Prateek Ralhan</strong></a> with the help of [whisper](https://github.com/openai/whisper) built by [OpenAI](https://github.com/openai) ✨</center><hr>", unsafe_allow_html=True)
113
+
114
+