alex buz commited on
Commit
13dd234
·
1 Parent(s): 245f97c
__pycache__/whisper_stt.cpython-311.pyc CHANGED
Binary files a/__pycache__/whisper_stt.cpython-311.pyc and b/__pycache__/whisper_stt.cpython-311.pyc differ
 
app.py CHANGED
@@ -1,7 +1,68 @@
1
  import streamlit as st
2
- from whisper_stt import whisper_stt
3
  from st_pages import Page, show_pages
4
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Set page configuration
7
  st.set_page_config(layout="wide")
@@ -44,7 +105,7 @@ def handle_enter(key):
44
  print(f"session state: {st.session_state}")
45
  with st.sidebar:
46
  api_key = st.text_input("API Key", key="chatbot_api_key", type="password")
47
-
48
  col1, col2 = st.columns(2)
49
 
50
  with col1:
 
1
  import streamlit as st
 
2
  from st_pages import Page, show_pages
3
  from openai import OpenAI
4
+ from streamlit_mic_recorder import mic_recorder
5
+ import io
6
+
7
+
8
+
9
+
10
+ def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
11
+ use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
12
+
13
+
14
+
15
+ if not '_last_speech_to_text_transcript_id' in st.session_state:
16
+ st.session_state._last_speech_to_text_transcript_id = 0
17
+ if not '_last_speech_to_text_transcript' in st.session_state:
18
+ st.session_state._last_speech_to_text_transcript = None
19
+ if key and not key + '_output' in st.session_state:
20
+ st.session_state[key + '_output'] = None
21
+ audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,
22
+ use_container_width=use_container_width,format="webm", key=key)
23
+ new_output = False
24
+ if audio is None:
25
+ output = None
26
+ else:
27
+ if openai_api_key:
28
+ if not 'openai_client' in st.session_state:
29
+ #assert openai_api_key, openai_api_key
30
+ st.session_state.openai_client = OpenAI(api_key=openai_api_key)
31
+
32
+ id = audio['id']
33
+ new_output = (id > st.session_state._last_speech_to_text_transcript_id)
34
+ if new_output:
35
+ output = None
36
+ st.session_state._last_speech_to_text_transcript_id = id
37
+ audio_bio = io.BytesIO(audio['bytes'])
38
+ audio_bio.name = 'audio.webm'
39
+ success = False
40
+ err = 0
41
+ while not success and err < 3: # Retry up to 3 times in case of OpenAI server error.
42
+ try:
43
+ transcript = st.session_state.openai_client.audio.transcriptions.create(
44
+ model="whisper-1",
45
+ file=audio_bio,
46
+ language=language
47
+ )
48
+ except Exception as e:
49
+ print(str(e)) # log the exception in the terminal
50
+ err += 1
51
+ else:
52
+ success = True
53
+ output = transcript.text
54
+ st.session_state._last_speech_to_text_transcript = output
55
+ elif not just_once:
56
+ output = st.session_state._last_speech_to_text_transcript
57
+ else:
58
+ output = None
59
+ else:
60
+ output = None
61
+ if key:
62
+ st.session_state[key + '_output'] = output
63
+ if new_output and callback:
64
+ callback(*args, **(kwargs or {}))
65
+ return output
66
 
67
  # Set page configuration
68
  st.set_page_config(layout="wide")
 
105
  print(f"session state: {st.session_state}")
106
  with st.sidebar:
107
  api_key = st.text_input("API Key", key="chatbot_api_key", type="password")
108
+
109
  col1, col2 = st.columns(2)
110
 
111
  with col1:
whisper_stt.py CHANGED
@@ -5,9 +5,10 @@ from openai import OpenAI
5
  import os
6
 
7
 
8
- def whisper_stt(openai_api_key=None, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
9
  use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
10
  if not 'openai_client' in st.session_state:
 
11
  st.session_state.openai_client = OpenAI(api_key=openai_api_key)
12
  if not '_last_speech_to_text_transcript_id' in st.session_state:
13
  st.session_state._last_speech_to_text_transcript_id = 0
 
5
  import os
6
 
7
 
8
+ def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
9
  use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
10
  if not 'openai_client' in st.session_state:
11
+ assert openai_api_key, openai_api_key
12
  st.session_state.openai_client = OpenAI(api_key=openai_api_key)
13
  if not '_last_speech_to_text_transcript_id' in st.session_state:
14
  st.session_state._last_speech_to_text_transcript_id = 0