Spaces:
Running
Running
alex buz
commited on
Commit
·
13dd234
1
Parent(s):
245f97c
fix
Browse files- __pycache__/whisper_stt.cpython-311.pyc +0 -0
- app.py +63 -2
- whisper_stt.py +2 -1
__pycache__/whisper_stt.cpython-311.pyc
CHANGED
Binary files a/__pycache__/whisper_stt.cpython-311.pyc and b/__pycache__/whisper_stt.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -1,7 +1,68 @@
|
|
1 |
import streamlit as st
|
2 |
-
from whisper_stt import whisper_stt
|
3 |
from st_pages import Page, show_pages
|
4 |
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Set page configuration
|
7 |
st.set_page_config(layout="wide")
|
@@ -44,7 +105,7 @@ def handle_enter(key):
|
|
44 |
print(f"session state: {st.session_state}")
|
45 |
with st.sidebar:
|
46 |
api_key = st.text_input("API Key", key="chatbot_api_key", type="password")
|
47 |
-
|
48 |
col1, col2 = st.columns(2)
|
49 |
|
50 |
with col1:
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from st_pages import Page, show_pages
|
3 |
from openai import OpenAI
|
4 |
+
from streamlit_mic_recorder import mic_recorder
|
5 |
+
import io
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
|
11 |
+
use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
if not '_last_speech_to_text_transcript_id' in st.session_state:
|
16 |
+
st.session_state._last_speech_to_text_transcript_id = 0
|
17 |
+
if not '_last_speech_to_text_transcript' in st.session_state:
|
18 |
+
st.session_state._last_speech_to_text_transcript = None
|
19 |
+
if key and not key + '_output' in st.session_state:
|
20 |
+
st.session_state[key + '_output'] = None
|
21 |
+
audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,
|
22 |
+
use_container_width=use_container_width,format="webm", key=key)
|
23 |
+
new_output = False
|
24 |
+
if audio is None:
|
25 |
+
output = None
|
26 |
+
else:
|
27 |
+
if openai_api_key:
|
28 |
+
if not 'openai_client' in st.session_state:
|
29 |
+
#assert openai_api_key, openai_api_key
|
30 |
+
st.session_state.openai_client = OpenAI(api_key=openai_api_key)
|
31 |
+
|
32 |
+
id = audio['id']
|
33 |
+
new_output = (id > st.session_state._last_speech_to_text_transcript_id)
|
34 |
+
if new_output:
|
35 |
+
output = None
|
36 |
+
st.session_state._last_speech_to_text_transcript_id = id
|
37 |
+
audio_bio = io.BytesIO(audio['bytes'])
|
38 |
+
audio_bio.name = 'audio.webm'
|
39 |
+
success = False
|
40 |
+
err = 0
|
41 |
+
while not success and err < 3: # Retry up to 3 times in case of OpenAI server error.
|
42 |
+
try:
|
43 |
+
transcript = st.session_state.openai_client.audio.transcriptions.create(
|
44 |
+
model="whisper-1",
|
45 |
+
file=audio_bio,
|
46 |
+
language=language
|
47 |
+
)
|
48 |
+
except Exception as e:
|
49 |
+
print(str(e)) # log the exception in the terminal
|
50 |
+
err += 1
|
51 |
+
else:
|
52 |
+
success = True
|
53 |
+
output = transcript.text
|
54 |
+
st.session_state._last_speech_to_text_transcript = output
|
55 |
+
elif not just_once:
|
56 |
+
output = st.session_state._last_speech_to_text_transcript
|
57 |
+
else:
|
58 |
+
output = None
|
59 |
+
else:
|
60 |
+
output = None
|
61 |
+
if key:
|
62 |
+
st.session_state[key + '_output'] = output
|
63 |
+
if new_output and callback:
|
64 |
+
callback(*args, **(kwargs or {}))
|
65 |
+
return output
|
66 |
|
67 |
# Set page configuration
|
68 |
st.set_page_config(layout="wide")
|
|
|
105 |
print(f"session state: {st.session_state}")
|
106 |
with st.sidebar:
|
107 |
api_key = st.text_input("API Key", key="chatbot_api_key", type="password")
|
108 |
+
|
109 |
col1, col2 = st.columns(2)
|
110 |
|
111 |
with col1:
|
whisper_stt.py
CHANGED
@@ -5,9 +5,10 @@ from openai import OpenAI
|
|
5 |
import os
|
6 |
|
7 |
|
8 |
-
def whisper_stt(openai_api_key
|
9 |
use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
|
10 |
if not 'openai_client' in st.session_state:
|
|
|
11 |
st.session_state.openai_client = OpenAI(api_key=openai_api_key)
|
12 |
if not '_last_speech_to_text_transcript_id' in st.session_state:
|
13 |
st.session_state._last_speech_to_text_transcript_id = 0
|
|
|
5 |
import os
|
6 |
|
7 |
|
8 |
+
def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
|
9 |
use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
|
10 |
if not 'openai_client' in st.session_state:
|
11 |
+
assert openai_api_key, openai_api_key
|
12 |
st.session_state.openai_client = OpenAI(api_key=openai_api_key)
|
13 |
if not '_last_speech_to_text_transcript_id' in st.session_state:
|
14 |
st.session_state._last_speech_to_text_transcript_id = 0
|