Spaces:
Sleeping
Sleeping
Asankhaya Sharma
commited on
Commit
·
41fa981
1
Parent(s):
1a47c70
initial file
Browse files- app.py +80 -0
- requirements.txt +68 -0
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
2 |
+
import torch
|
3 |
+
import streamlit as st
|
4 |
+
from streamlit_chat import message
|
5 |
+
|
6 |
+
checkpoint = "/Users/user/Documents/data/model/27-3-2023"
|
7 |
+
#checkpoint = "microsoft/DialoGPT-large"
|
8 |
+
|
9 |
+
@st.cache_resource
|
10 |
+
def get_model():
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint)
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
13 |
+
return model, tokenizer
|
14 |
+
|
15 |
+
st.title("Chat with myGPT 🦄")
|
16 |
+
st.write("This is a LLM that was fine-tuned on a dataset of daily conversations.")
|
17 |
+
|
18 |
+
if 'count' not in st.session_state or st.session_state.count >= 3:
|
19 |
+
st.session_state.count = 0
|
20 |
+
st.session_state.chat_history_ids = None
|
21 |
+
st.session_state.old_response = ''
|
22 |
+
else:
|
23 |
+
st.session_state.count += 1
|
24 |
+
|
25 |
+
if 'message_history' not in st.session_state:
|
26 |
+
st.session_state.message_history = []
|
27 |
+
|
28 |
+
if 'response_history' not in st.session_state:
|
29 |
+
st.session_state.response_history = []
|
30 |
+
|
31 |
+
if 'input' not in st.session_state:
|
32 |
+
st.session_state.input = ''
|
33 |
+
|
34 |
+
def submit():
|
35 |
+
st.session_state.input = st.session_state.user_input
|
36 |
+
st.session_state.user_input = ''
|
37 |
+
|
38 |
+
# prompt = "How long will it take for the poc to finish?"
|
39 |
+
# inputs = tokenizer(prompt, return_tensors="pt")
|
40 |
+
model, tokenizer = get_model()
|
41 |
+
generation_config = GenerationConfig(max_new_tokens=32,
|
42 |
+
num_beams=4,
|
43 |
+
early_stopping=True,
|
44 |
+
no_repeat_ngram_size=2,
|
45 |
+
do_sample=True,
|
46 |
+
penalty_alpha=0.6,
|
47 |
+
top_k=4,
|
48 |
+
#top_p=0.95,
|
49 |
+
#temperature=0.8,
|
50 |
+
pad_token_id=tokenizer.eos_token_id)
|
51 |
+
|
52 |
+
|
53 |
+
for i in range(0, len(st.session_state.message_history)):
|
54 |
+
message(st.session_state.message_history[i], is_user=True, key=str(i)+'_user', avatar_style="identicon", seed='You') # display all the previous message
|
55 |
+
if i in range(0, len(st.session_state.response_history)):
|
56 |
+
message(st.session_state.response_history[i], key=str(i), avatar_style="bottts", seed='mera GPT')
|
57 |
+
|
58 |
+
placeholder = st.empty() # placeholder for latest message
|
59 |
+
st.text_input('You:', key='user_input', on_change=submit)
|
60 |
+
|
61 |
+
if st.session_state.input:
|
62 |
+
st.session_state.message_history.append(st.session_state.input)
|
63 |
+
new_user_input_ids = tokenizer.encode(tokenizer.eos_token + st.session_state.input, return_tensors="pt")
|
64 |
+
bot_input_ids = torch.cat([st.session_state.chat_history_ids, new_user_input_ids], dim=-1) if st.session_state.count > 1 else new_user_input_ids
|
65 |
+
|
66 |
+
st.session_state.chat_history_ids = model.generate(bot_input_ids, generation_config)
|
67 |
+
response = tokenizer.decode(st.session_state.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
|
68 |
+
|
69 |
+
if st.session_state.old_response == response:
|
70 |
+
bot_input_ids = new_user_input_ids
|
71 |
+
st.session_state.chat_history_ids = model.generate(bot_input_ids, generation_config)
|
72 |
+
response = tokenizer.decode(st.session_state.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
|
73 |
+
|
74 |
+
#st.write(f"meraGPT: {response}")
|
75 |
+
st.session_state.old_response = response
|
76 |
+
st.session_state.response_history.append(response)
|
77 |
+
|
78 |
+
with placeholder.container():
|
79 |
+
message(st.session_state.message_history[-1], is_user=True, key=str(-1)+'_user', avatar_style="identicon", seed='You') # display the latest message
|
80 |
+
message(st.session_state.response_history[-1], key=str(-1), avatar_style="bottts", seed='mera GPT') # display the latest message
|
requirements.txt
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aioice==0.7.6
|
2 |
+
aiortc==1.4.0
|
3 |
+
altair==4.2.2
|
4 |
+
attrs==22.2.0
|
5 |
+
av==10.0.0
|
6 |
+
blinker==1.5
|
7 |
+
cachetools==5.3.0
|
8 |
+
certifi==2022.12.7
|
9 |
+
cffi==1.15.1
|
10 |
+
charset-normalizer==3.0.1
|
11 |
+
click==8.1.3
|
12 |
+
cryptography==39.0.1
|
13 |
+
decorator==5.1.1
|
14 |
+
dnspython==2.3.0
|
15 |
+
entrypoints==0.4
|
16 |
+
filelock==3.9.0
|
17 |
+
gitdb==4.0.10
|
18 |
+
GitPython==3.1.31
|
19 |
+
google-crc32c==1.5.0
|
20 |
+
huggingface-hub==0.12.0
|
21 |
+
idna==3.4
|
22 |
+
importlib-metadata==6.0.0
|
23 |
+
Jinja2==3.1.2
|
24 |
+
jsonschema==4.17.3
|
25 |
+
markdown-it-py==2.1.0
|
26 |
+
MarkupSafe==2.1.2
|
27 |
+
mdurl==0.1.2
|
28 |
+
netifaces==0.11.0
|
29 |
+
numpy==1.24.2
|
30 |
+
packaging==23.0
|
31 |
+
pandas==1.5.3
|
32 |
+
Pillow==9.4.0
|
33 |
+
protobuf==3.20.3
|
34 |
+
pyarrow==11.0.0
|
35 |
+
pycparser==2.21
|
36 |
+
pydeck==0.8.0
|
37 |
+
pyee==9.0.4
|
38 |
+
Pygments==2.14.0
|
39 |
+
pylibsrtp==0.8.0
|
40 |
+
Pympler==1.0.1
|
41 |
+
pyOpenSSL==23.0.0
|
42 |
+
pyrsistent==0.19.3
|
43 |
+
python-dateutil==2.8.2
|
44 |
+
pytz==2022.7.1
|
45 |
+
pytz-deprecation-shim==0.1.0.post0
|
46 |
+
PyYAML==6.0
|
47 |
+
regex==2022.10.31
|
48 |
+
requests==2.28.2
|
49 |
+
rich==13.3.1
|
50 |
+
semver==2.13.0
|
51 |
+
six==1.16.0
|
52 |
+
smmap==5.0.0
|
53 |
+
streamlit==1.18.1
|
54 |
+
streamlit-chat==0.0.2.1
|
55 |
+
streamlit-webrtc==0.44.6
|
56 |
+
tokenizers==0.13.2
|
57 |
+
toml==0.10.2
|
58 |
+
toolz==0.12.0
|
59 |
+
torch==1.13.1
|
60 |
+
tornado==6.2
|
61 |
+
tqdm==4.64.1
|
62 |
+
transformers @ git+https://github.com/huggingface/transformers.git@e3d832ff87c6ec997125deaa4f1b239db8f9e613
|
63 |
+
typing_extensions==4.5.0
|
64 |
+
tzdata==2022.7
|
65 |
+
tzlocal==4.2
|
66 |
+
urllib3==1.26.14
|
67 |
+
validators==0.20.0
|
68 |
+
zipp==3.13.0
|