Spaces:

alexbuz
/

interview_copilot_2

Running

App Files Files Community

alex buz commited on Jul 18, 2024

Commit

13dd234

1 Parent(s): 245f97c

fix

Browse files

Files changed (3) hide show

__pycache__/whisper_stt.cpython-311.pyc +0 -0
app.py +63 -2
whisper_stt.py +2 -1

__pycache__/whisper_stt.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/whisper_stt.cpython-311.pyc and b/__pycache__/whisper_stt.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -1,7 +1,68 @@
 import streamlit as st
-from whisper_stt import whisper_stt
 from st_pages import Page, show_pages
 from openai import OpenAI
 # Set page configuration
 st.set_page_config(layout="wide")
@@ -44,7 +105,7 @@ def handle_enter(key):
         print(f"session state: {st.session_state}")
 with st.sidebar:
     api_key = st.text_input("API Key", key="chatbot_api_key", type="password")
     col1, col2 = st.columns(2)
     with col1:

 import streamlit as st
 from st_pages import Page, show_pages
 from openai import OpenAI
+from streamlit_mic_recorder import mic_recorder
+import io
+def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
+               use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
+    if not '_last_speech_to_text_transcript_id' in st.session_state:
+        st.session_state._last_speech_to_text_transcript_id = 0
+    if not '_last_speech_to_text_transcript' in st.session_state:
+        st.session_state._last_speech_to_text_transcript = None
+    if key and not key + '_output' in st.session_state:
+        st.session_state[key + '_output'] = None
+    audio = mic_recorder(start_prompt=start_prompt, stop_prompt=stop_prompt, just_once=just_once,
+                         use_container_width=use_container_width,format="webm", key=key)
+    new_output = False
+    if audio is None:
+        output = None
+    else:
+        if openai_api_key:
+            if not 'openai_client' in st.session_state:
+                #assert openai_api_key, openai_api_key
+                st.session_state.openai_client = OpenAI(api_key=openai_api_key)
+            id = audio['id']
+            new_output = (id > st.session_state._last_speech_to_text_transcript_id)
+            if new_output:
+                output = None
+                st.session_state._last_speech_to_text_transcript_id = id
+                audio_bio = io.BytesIO(audio['bytes'])
+                audio_bio.name = 'audio.webm'
+                success = False
+                err = 0
+                while not success and err < 3:  # Retry up to 3 times in case of OpenAI server error.
+                    try:
+                        transcript = st.session_state.openai_client.audio.transcriptions.create(
+                            model="whisper-1",
+                            file=audio_bio,
+                            language=language
+                        )
+                    except Exception as e:
+                        print(str(e))  # log the exception in the terminal
+                        err += 1
+                    else:
+                        success = True
+                        output = transcript.text
+                        st.session_state._last_speech_to_text_transcript = output
+            elif not just_once:
+                output = st.session_state._last_speech_to_text_transcript
+            else:
+                output = None
+        else:
+            output = None
+    if key:
+        st.session_state[key + '_output'] = output
+    if new_output and callback:
+        callback(*args, **(kwargs or {}))
+    return output
 # Set page configuration
 st.set_page_config(layout="wide")
         print(f"session state: {st.session_state}")
 with st.sidebar:
     api_key = st.text_input("API Key", key="chatbot_api_key", type="password")
     col1, col2 = st.columns(2)
     with col1:

whisper_stt.py CHANGED Viewed

@@ -5,9 +5,10 @@ from openai import OpenAI
 import os
-def whisper_stt(openai_api_key=None, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
                use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
     if not 'openai_client' in st.session_state:
         st.session_state.openai_client = OpenAI(api_key=openai_api_key)
     if not '_last_speech_to_text_transcript_id' in st.session_state:
         st.session_state._last_speech_to_text_transcript_id = 0

 import os
+def whisper_stt(openai_api_key, start_prompt="Start recording", stop_prompt="Stop recording", just_once=False,
                use_container_width=False, language=None, callback=None, args=(), kwargs=None, key=None):
     if not 'openai_client' in st.session_state:
+        assert openai_api_key, openai_api_key
         st.session_state.openai_client = OpenAI(api_key=openai_api_key)
     if not '_last_speech_to_text_transcript_id' in st.session_state:
         st.session_state._last_speech_to_text_transcript_id = 0