Update app.py
Browse files
app.py
CHANGED
@@ -153,7 +153,9 @@ def load_models_and_documents():
|
|
153 |
tensor_parallel_size=1,
|
154 |
max_model_len=131072,
|
155 |
trust_remote_code=True,
|
156 |
-
enforce_eager=True
|
|
|
|
|
157 |
)
|
158 |
model = models.VLLM(llm)
|
159 |
|
@@ -387,20 +389,14 @@ if __name__ == '__main__':
|
|
387 |
if 'chat_id' not in st.session_state:
|
388 |
st.session_state.chat_id = st.selectbox(
|
389 |
label='Choose a conversation',
|
390 |
-
options=[
|
391 |
format_func=lambda x: conversations.get(x, 'New Chat'),
|
392 |
placeholder='_',
|
393 |
)
|
394 |
else:
|
395 |
-
stop_token_ids = [151329, 151336, 151338]
|
396 |
-
sampling_params = SamplingParams(temperature=0.75, max_tokens=35, stop_token_ids=stop_token_ids)
|
397 |
-
prompt = [{"role": "user", "content": f"{}\nExplain the above in one sentence:"}]
|
398 |
-
inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
399 |
-
outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
|
400 |
-
|
401 |
st.session_state.chat_id = st.selectbox(
|
402 |
label='Choose a conversation',
|
403 |
-
options=[
|
404 |
index=1,
|
405 |
format_func=lambda x: conversations.get(x, 'New Chat' if x != st.session_state.chat_id else st.session_state.chat_title),
|
406 |
placeholder='_',
|
@@ -420,6 +416,15 @@ if __name__ == '__main__':
|
|
420 |
|
421 |
if prompt := st.chat_input("Message Video Game Assistant"):
|
422 |
if st.session_state.chat_id not in conversations.keys():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
conversations[st.session_state.chat_id] = st.session_state.chat_title
|
424 |
with lz4.frame.open(conversations_path, mode='wb') as fp:
|
425 |
packed_bytes = msgpack.packb(conversations, use_bin_type=True)
|
|
|
153 |
tensor_parallel_size=1,
|
154 |
max_model_len=131072,
|
155 |
trust_remote_code=True,
|
156 |
+
enforce_eager=True,
|
157 |
+
quantization='gptq',
|
158 |
+
gpu_memory_utilization=0.1
|
159 |
)
|
160 |
model = models.VLLM(llm)
|
161 |
|
|
|
389 |
if 'chat_id' not in st.session_state:
|
390 |
st.session_state.chat_id = st.selectbox(
|
391 |
label='Choose a conversation',
|
392 |
+
options=[None] + list(conversations.keys()),
|
393 |
format_func=lambda x: conversations.get(x, 'New Chat'),
|
394 |
placeholder='_',
|
395 |
)
|
396 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
st.session_state.chat_id = st.selectbox(
|
398 |
label='Choose a conversation',
|
399 |
+
options=[None, st.session_state.chat_id] + list(conversations.keys()),
|
400 |
index=1,
|
401 |
format_func=lambda x: conversations.get(x, 'New Chat' if x != st.session_state.chat_id else st.session_state.chat_title),
|
402 |
placeholder='_',
|
|
|
416 |
|
417 |
if prompt := st.chat_input("Message Video Game Assistant"):
|
418 |
if st.session_state.chat_id not in conversations.keys():
|
419 |
+
stop_token_ids = [151329, 151336, 151338]
|
420 |
+
sampling_params = SamplingParams(temperature=0.75, max_tokens=35, stop_token_ids=stop_token_ids)
|
421 |
+
prompt = [{"role": "user", "content": f"{prompt}\nExplain the above in one sentence:"}]
|
422 |
+
inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
423 |
+
outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
|
424 |
+
|
425 |
+
st.session_state.chat_id = outputs[0].outputs[0].text
|
426 |
+
st.session_state.chat_title = f'ChatSession-{st.session_state.chat_id}'
|
427 |
+
|
428 |
conversations[st.session_state.chat_id] = st.session_state.chat_title
|
429 |
with lz4.frame.open(conversations_path, mode='wb') as fp:
|
430 |
packed_bytes = msgpack.packb(conversations, use_bin_type=True)
|