Spaces:
Runtime error
Runtime error
from helper import * | |
import streamlit as st | |
import uuid | |
import copy | |
import pandas as pd | |
import openai | |
from requests.models import ChunkedEncodingError | |
from streamlit.components import v1 | |
from custom import css_code, js_code, set_context_all | |
from st_audiorec import st_audiorec | |
import numpy as np | |
import wave | |
device = "cpu" | |
# STT | |
import whisper | |
WHISPER_LANG = "en" # detecting language if None | |
#warnings.filterwarnings("ignore") | |
WHISPER_MODEL = whisper.load_model("base") | |
WHISPER_MODEL.to(device) | |
def transcribe(aud_inp): | |
if aud_inp is None: | |
return "" | |
aud = whisper.load_audio(aud_inp) | |
aud = whisper.pad_or_trim(aud) | |
mel = whisper.log_mel_spectrogram(aud).to(device) | |
_, probs = WHISPER_MODEL.detect_language(mel) | |
print(f"spectrogram.shape = {mel}") | |
if device == "cpu": | |
options = whisper.DecodingOptions(fp16 = False, language=WHISPER_LANG) | |
else: | |
options = whisper.DecodingOptions(language=WHISPER_LANG) | |
result = whisper.decode(WHISPER_MODEL, mel, options) | |
print("result.text", result.text) | |
result_text = "" | |
if result and result.text: | |
result_text = result.text | |
return result_text | |
def bytes_to_wav(wav_bytes, output_wav_file = "output.wav", | |
sample_width = 1, # 1 byte per sample | |
sample_rate = 44100, # Sample rate in Hz | |
num_channels = 1, # Mono audio | |
): | |
# Create a WAV file in pcm_s16le format using the wave module | |
with wave.open(output_wav_file, 'wb') as wav_file: | |
wav_file.setnchannels(num_channels) | |
wav_file.setsampwidth(sample_width) | |
wav_file.setframerate(sample_rate) | |
wav_file.setcomptype('NONE', 'not compressed') | |
# Write the audio data from wav_bytes to the WAV file | |
wav_file.writeframes(wav_bytes) | |
###################################################################################### | |
st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖') | |
# 自定义元素样式 | |
st.markdown(css_code, unsafe_allow_html=True) | |
if "initial_settings" not in st.session_state: | |
# 历史聊天窗口 | |
st.session_state["path"] = set_chats_path() | |
st.session_state['history_chats'] = get_history_chats(st.session_state["path"]) | |
# ss参数初始化 | |
st.session_state['pre_chat'] = None | |
st.session_state['if_chat_change'] = False | |
st.session_state['error_info'] = '' | |
st.session_state["current_chat_index"] = 0 | |
st.session_state['user_input_content'] = '' | |
# 设置完成 | |
st.session_state["initial_settings"] = True | |
with st.sidebar: | |
st.markdown("# 🤖 聊天窗口") | |
current_chat = st.radio( | |
label='历史聊天窗口', | |
format_func=lambda x: x.split('_')[0] if '_' in x else x, | |
options=st.session_state['history_chats'], | |
label_visibility='collapsed', | |
index=st.session_state["current_chat_index"], | |
key='current_chat' + st.session_state['history_chats'][st.session_state["current_chat_index"]], | |
# on_change=current_chat_callback # 此处不适合用回调,无法识别到窗口增减的变动 | |
) | |
if st.session_state['pre_chat'] != current_chat: | |
st.session_state['pre_chat'] = current_chat | |
st.session_state['if_chat_change'] = True | |
st.write("---") | |
c1, c2 = st.columns(2) | |
create_chat_button = c1.button('新建', use_container_width=True, key='create_chat_button') | |
if create_chat_button: | |
st.session_state['history_chats'] = ['New Chat_' + str(uuid.uuid4())] + st.session_state['history_chats'] | |
st.session_state["current_chat_index"] = 0 | |
st.experimental_rerun() | |
delete_chat_button = c2.button('删除', use_container_width=True, key='delete_chat_button') | |
if delete_chat_button: | |
if len(st.session_state['history_chats']) == 1: | |
chat_init = 'New Chat_' + str(uuid.uuid4()) | |
st.session_state['history_chats'].append(chat_init) | |
st.session_state['current_chat'] = chat_init | |
pre_chat_index = st.session_state['history_chats'].index(current_chat) | |
if pre_chat_index > 0: | |
st.session_state["current_chat_index"] = st.session_state['history_chats'].index(current_chat) - 1 | |
else: | |
st.session_state["current_chat_index"] = 0 | |
st.session_state['history_chats'].remove(current_chat) | |
remove_data(st.session_state["path"], current_chat) | |
st.experimental_rerun() | |
for i in range(5): | |
st.write("\n") | |
st.markdown("OpenAI API Key") | |
st.text_input("OpenAI API Key", type='password', key='apikey_input', label_visibility='collapsed') | |
st.caption( | |
"此Key仅在当前网页有效,且优先级高于Secrets中的配置,仅自己可用,他人无法共享。[官网获取](https://platform.openai.com/account/api-keys)") | |
st.caption(""" | |
- 双击页面可直接定位输入栏 | |
- Ctrl + Enter 可快捷提交问题 | |
""") | |
# 加载数据 | |
if ("history" + current_chat not in st.session_state) or (st.session_state['if_chat_change']): | |
for key, value in load_data(st.session_state["path"], current_chat).items(): | |
if key == 'history': | |
st.session_state[key + current_chat] = value | |
else: | |
for k, v in value.items(): | |
st.session_state[k + current_chat + 'default'] = v | |
st.session_state['if_chat_change'] = False | |
# 对话展示 | |
show_messages(st.session_state["history" + current_chat]) | |
# 数据写入文件 | |
def write_data(new_chat_name=current_chat): | |
# 防止高频创建时组件尚未渲染完成,不影响正常写入 | |
if "frequency_penalty" + current_chat in st.session_state: | |
st.session_state["paras"] = { | |
"model": st.session_state["model" + current_chat], | |
"temperature": st.session_state["temperature" + current_chat], | |
"top_p": st.session_state["top_p" + current_chat], | |
"presence_penalty": st.session_state["presence_penalty" + current_chat], | |
"frequency_penalty": st.session_state["frequency_penalty" + current_chat], | |
} | |
st.session_state["contexts"] = { | |
"context_select": st.session_state["context_select" + current_chat], | |
"context_input": st.session_state["context_input" + current_chat], | |
"context_level": st.session_state["context_level" + current_chat], | |
} | |
save_data(st.session_state["path"], new_chat_name, st.session_state["history" + current_chat], | |
st.session_state["paras"], st.session_state["contexts"]) | |
# 输入内容展示 | |
area_user_svg = st.empty() | |
area_user_content = st.empty() | |
# 回复展示 | |
area_gpt_svg = st.empty() | |
area_gpt_content = st.empty() | |
# 报错展示 | |
area_error = st.empty() | |
st.header('ChatGPT Assistant') | |
tap_input, tap_context, tap_set = st.tabs(['💬 聊天', '🗒️ 预设', '⚙️ 设置']) | |
with tap_context: | |
set_context_list = list(set_context_all.keys()) | |
context_select_index = set_context_list.index(st.session_state['context_select' + current_chat + "default"]) | |
st.selectbox(label='选择上下文', options=set_context_list, key='context_select' + current_chat, | |
index=context_select_index, on_change=write_data) | |
st.caption(set_context_all[st.session_state['context_select' + current_chat]]) | |
context_input = st.text_area(label='补充或自定义上下文:', key="context_input" + current_chat, | |
value=st.session_state['context_input' + current_chat + "default"], | |
on_change=write_data) | |
st.caption(context_input) | |
with tap_set: | |
def clear_button_callback(): | |
st.session_state['history' + current_chat] = copy.deepcopy(initial_content_history) | |
write_data() | |
st.button("清空聊天记录", use_container_width=True, on_click=clear_button_callback) | |
st.markdown("包含对话次数:") | |
st.slider("Context Level", 0, 10, st.session_state['context_level' + current_chat + "default"], 1, | |
on_change=write_data, | |
key='context_level' + current_chat, help="表示每次会话中包含的历史对话次数,预设内容不计算在内。") | |
st.markdown("模型参数:") | |
st.selectbox("Model", ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"], index=0, | |
help="[模型选择参考](https://platform.openai.com/docs/models)", | |
on_change=write_data, key='model' + current_chat) | |
st.slider("Temperature", 0.0, 2.0, st.session_state["temperature" + current_chat + "default"], 0.1, | |
help="""在0和2之间,应该使用什么样的采样温度?较高的值(如0.8)会使输出更随机,而较低的值(如0.2)则会使其更加集中和确定性。 | |
我们一般建议只更改这个参数或top_p参数中的一个,而不要同时更改两个。""", | |
on_change=write_data, key='temperature' + current_chat) | |
st.slider("Top P", 0.1, 1.0, st.session_state["top_p" + current_chat + "default"], 0.1, | |
help="""一种替代采用温度进行采样的方法,称为“基于核心概率”的采样。在该方法中,模型会考虑概率最高的top_p个标记的预测结果。 | |
因此,当该参数为0.1时,只有包括前10%概率质量的标记将被考虑。我们一般建议只更改这个参数或采样温度参数中的一个,而不要同时更改两个。""", | |
on_change=write_data, key='top_p' + current_chat) | |
st.slider("Presence Penalty", -2.0, 2.0, | |
st.session_state["presence_penalty" + current_chat + "default"], 0.1, | |
help="""该参数的取值范围为-2.0到2.0。正值会根据新标记是否出现在当前生成的文本中对其进行惩罚,从而增加模型谈论新话题的可能性。""", | |
on_change=write_data, key='presence_penalty' + current_chat) | |
st.slider("Frequency Penalty", -2.0, 2.0, | |
st.session_state["frequency_penalty" + current_chat + "default"], 0.1, | |
help="""该参数的取值范围为-2.0到2.0。正值会根据新标记在当前生成的文本中的已有频率对其进行惩罚,从而减少模型直接重复相同语句的可能性。""", | |
on_change=write_data, key='frequency_penalty' + current_chat) | |
st.caption("[官网参数说明](https://platform.openai.com/docs/api-reference/completions/create)") | |
with tap_input: | |
def input_callback(): | |
if st.session_state['user_input_area'] != "": | |
# 修改窗口名称 | |
user_input_content = st.session_state['user_input_area'] | |
elif st.session_state['audio_input'] != "": | |
user_input_content = st.session_state['audio_input'] | |
else: | |
return | |
df_history = pd.DataFrame(st.session_state["history" + current_chat]) | |
if len(df_history.query('role!="system"')) == 0: | |
remove_data(st.session_state["path"], current_chat) | |
current_chat_index = st.session_state['history_chats'].index(current_chat) | |
new_name = extract_chars(user_input_content, 18) + '_' + str(uuid.uuid4()) | |
st.session_state['history_chats'][current_chat_index] = new_name | |
st.session_state["current_chat_index"] = current_chat_index | |
# 写入新文件 | |
write_data(new_name) | |
with st.form("input_form", clear_on_submit=True): | |
user_input = st.text_area("**输入:**", key="user_input_area") | |
submitted = st.form_submit_button("确认提交", use_container_width=True, on_click=input_callback) | |
if submitted: | |
st.session_state['user_input_content'] = user_input | |
wav_audio_data = st_audiorec() | |
if wav_audio_data is not None: | |
st.audio(wav_audio_data, format='audio/wav') | |
bytes_to_wav(wav_audio_data, output_wav_file='output.wav', sample_rate=16000) | |
user_input = WHISPER_MODEL.transcribe('output.wav', language=WHISPER_LANG)['text'] | |
st.session_state['audio_input'] = user_input | |
st.session_state['user_input_content'] = user_input | |
input_callback() | |
if submitted or wav_audio_data is not None: | |
st.write("User input: ", user_input) | |
if st.session_state['user_input_content'] != '': | |
if 'r' in st.session_state: | |
st.session_state.pop("r") | |
st.session_state[current_chat + 'report'] = "" | |
st.session_state['pre_user_input_content'] = (remove_hashtag_right__space(st.session_state['user_input_content'] | |
.replace('\n', '\n\n'))) | |
st.session_state['user_input_content'] = '' | |
show_each_message(st.session_state['pre_user_input_content'], 'user', | |
[area_user_svg.markdown, area_user_content.markdown]) | |
context_level_tem = st.session_state['context_level' + current_chat] | |
history_tem = get_history_input(st.session_state["history" + current_chat], context_level_tem) + \ | |
[{"role": "user", "content": st.session_state['pre_user_input_content']}] | |
history_need_input = ([{"role": "system", | |
"content": set_context_all[st.session_state['context_select' + current_chat]]}] | |
+ [{"role": "system", | |
"content": st.session_state['context_input' + current_chat]}] | |
+ history_tem) | |
paras_need_input = { | |
"model": st.session_state["model" + current_chat], | |
"temperature": st.session_state["temperature" + current_chat], | |
"top_p": st.session_state["top_p" + current_chat], | |
"presence_penalty": st.session_state["presence_penalty" + current_chat], | |
"frequency_penalty": st.session_state["frequency_penalty" + current_chat], | |
} | |
with st.spinner("🤔"): | |
try: | |
if apikey := st.session_state['apikey_input']: | |
openai.api_key = apikey | |
else: | |
openai.api_key = st.secrets["apikey"] | |
r = openai.ChatCompletion.create(messages=history_need_input, stream=True, | |
**paras_need_input) | |
except (FileNotFoundError, KeyError): | |
area_error.error("缺失 OpenAI API Key,请在复制项目后配置Secrets,或者在设置中进行临时配置。" | |
"详情见[项目仓库](https://github.com/PierXuY/ChatGPT-Assistant)。") | |
except openai.error.AuthenticationError: | |
area_error.error("无效的 OpenAI API Key。") | |
except openai.error.APIConnectionError as e: | |
area_error.error("连接超时,请重试。报错: \n" + str(e.args[0])) | |
except openai.error.InvalidRequestError as e: | |
area_error.error("无效的请求,请重试。报错: \n" + str(e.args[0])) | |
except openai.error.RateLimitError as e: | |
area_error.error("请求速率过快,请重试。报错: \n" + str(e.args[0])) | |
else: | |
st.session_state["chat_of_r"] = current_chat | |
st.session_state["r"] = r | |
st.experimental_rerun() | |
if ("r" in st.session_state) and (current_chat == st.session_state["chat_of_r"]): | |
if current_chat + 'report' not in st.session_state: | |
st.session_state[current_chat + 'report'] = "" | |
try: | |
for e in st.session_state["r"]: | |
if "content" in e["choices"][0]["delta"]: | |
st.session_state[current_chat + 'report'] += e["choices"][0]["delta"]["content"] | |
show_each_message(st.session_state['pre_user_input_content'], 'user', | |
[area_user_svg.markdown, area_user_content.markdown]) | |
show_each_message(st.session_state[current_chat + 'report'], 'assistant', | |
[area_gpt_svg.markdown, area_gpt_content.markdown]) | |
except ChunkedEncodingError: | |
area_error.error("网络状况不佳,请刷新页面重试。") | |
# 应对stop情形 | |
except Exception: | |
pass | |
else: | |
# 保存内容 | |
st.session_state["history" + current_chat].append( | |
{"role": "user", "content": st.session_state['pre_user_input_content']}) | |
st.session_state["history" + current_chat].append( | |
{"role": "assistant", "content": st.session_state[current_chat + 'report']}) | |
write_data() | |
# 用户在网页点击stop时,ss某些情形下会暂时为空 | |
if current_chat + 'report' in st.session_state: | |
st.session_state.pop(current_chat + 'report') | |
if 'r' in st.session_state: | |
st.session_state.pop("r") | |
# 添加事件监听 | |
v1.html(js_code, height=0) | |