Asankhaya Sharma commited on
Commit
41fa981
·
1 Parent(s): 1a47c70

initial file

Browse files
Files changed (2) hide show
  1. app.py +80 -0
  2. requirements.txt +68 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
2
+ import torch
3
+ import streamlit as st
4
+ from streamlit_chat import message
5
+
6
+ checkpoint = "/Users/user/Documents/data/model/27-3-2023"
7
+ #checkpoint = "microsoft/DialoGPT-large"
8
+
9
+ @st.cache_resource
10
+ def get_model():
11
+ model = AutoModelForCausalLM.from_pretrained(checkpoint)
12
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
13
+ return model, tokenizer
14
+
15
+ st.title("Chat with myGPT 🦄")
16
+ st.write("This is a LLM that was fine-tuned on a dataset of daily conversations.")
17
+
18
+ if 'count' not in st.session_state or st.session_state.count >= 3:
19
+ st.session_state.count = 0
20
+ st.session_state.chat_history_ids = None
21
+ st.session_state.old_response = ''
22
+ else:
23
+ st.session_state.count += 1
24
+
25
+ if 'message_history' not in st.session_state:
26
+ st.session_state.message_history = []
27
+
28
+ if 'response_history' not in st.session_state:
29
+ st.session_state.response_history = []
30
+
31
+ if 'input' not in st.session_state:
32
+ st.session_state.input = ''
33
+
34
+ def submit():
35
+ st.session_state.input = st.session_state.user_input
36
+ st.session_state.user_input = ''
37
+
38
+ # prompt = "How long will it take for the poc to finish?"
39
+ # inputs = tokenizer(prompt, return_tensors="pt")
40
+ model, tokenizer = get_model()
41
+ generation_config = GenerationConfig(max_new_tokens=32,
42
+ num_beams=4,
43
+ early_stopping=True,
44
+ no_repeat_ngram_size=2,
45
+ do_sample=True,
46
+ penalty_alpha=0.6,
47
+ top_k=4,
48
+ #top_p=0.95,
49
+ #temperature=0.8,
50
+ pad_token_id=tokenizer.eos_token_id)
51
+
52
+
53
+ for i in range(0, len(st.session_state.message_history)):
54
+ message(st.session_state.message_history[i], is_user=True, key=str(i)+'_user', avatar_style="identicon", seed='You') # display all the previous message
55
+ if i in range(0, len(st.session_state.response_history)):
56
+ message(st.session_state.response_history[i], key=str(i), avatar_style="bottts", seed='mera GPT')
57
+
58
+ placeholder = st.empty() # placeholder for latest message
59
+ st.text_input('You:', key='user_input', on_change=submit)
60
+
61
+ if st.session_state.input:
62
+ st.session_state.message_history.append(st.session_state.input)
63
+ new_user_input_ids = tokenizer.encode(tokenizer.eos_token + st.session_state.input, return_tensors="pt")
64
+ bot_input_ids = torch.cat([st.session_state.chat_history_ids, new_user_input_ids], dim=-1) if st.session_state.count > 1 else new_user_input_ids
65
+
66
+ st.session_state.chat_history_ids = model.generate(bot_input_ids, generation_config)
67
+ response = tokenizer.decode(st.session_state.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
68
+
69
+ if st.session_state.old_response == response:
70
+ bot_input_ids = new_user_input_ids
71
+ st.session_state.chat_history_ids = model.generate(bot_input_ids, generation_config)
72
+ response = tokenizer.decode(st.session_state.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
73
+
74
+ #st.write(f"meraGPT: {response}")
75
+ st.session_state.old_response = response
76
+ st.session_state.response_history.append(response)
77
+
78
+ with placeholder.container():
79
+ message(st.session_state.message_history[-1], is_user=True, key=str(-1)+'_user', avatar_style="identicon", seed='You') # display the latest message
80
+ message(st.session_state.response_history[-1], key=str(-1), avatar_style="bottts", seed='mera GPT') # display the latest message
requirements.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aioice==0.7.6
2
+ aiortc==1.4.0
3
+ altair==4.2.2
4
+ attrs==22.2.0
5
+ av==10.0.0
6
+ blinker==1.5
7
+ cachetools==5.3.0
8
+ certifi==2022.12.7
9
+ cffi==1.15.1
10
+ charset-normalizer==3.0.1
11
+ click==8.1.3
12
+ cryptography==39.0.1
13
+ decorator==5.1.1
14
+ dnspython==2.3.0
15
+ entrypoints==0.4
16
+ filelock==3.9.0
17
+ gitdb==4.0.10
18
+ GitPython==3.1.31
19
+ google-crc32c==1.5.0
20
+ huggingface-hub==0.12.0
21
+ idna==3.4
22
+ importlib-metadata==6.0.0
23
+ Jinja2==3.1.2
24
+ jsonschema==4.17.3
25
+ markdown-it-py==2.1.0
26
+ MarkupSafe==2.1.2
27
+ mdurl==0.1.2
28
+ netifaces==0.11.0
29
+ numpy==1.24.2
30
+ packaging==23.0
31
+ pandas==1.5.3
32
+ Pillow==9.4.0
33
+ protobuf==3.20.3
34
+ pyarrow==11.0.0
35
+ pycparser==2.21
36
+ pydeck==0.8.0
37
+ pyee==9.0.4
38
+ Pygments==2.14.0
39
+ pylibsrtp==0.8.0
40
+ Pympler==1.0.1
41
+ pyOpenSSL==23.0.0
42
+ pyrsistent==0.19.3
43
+ python-dateutil==2.8.2
44
+ pytz==2022.7.1
45
+ pytz-deprecation-shim==0.1.0.post0
46
+ PyYAML==6.0
47
+ regex==2022.10.31
48
+ requests==2.28.2
49
+ rich==13.3.1
50
+ semver==2.13.0
51
+ six==1.16.0
52
+ smmap==5.0.0
53
+ streamlit==1.18.1
54
+ streamlit-chat==0.0.2.1
55
+ streamlit-webrtc==0.44.6
56
+ tokenizers==0.13.2
57
+ toml==0.10.2
58
+ toolz==0.12.0
59
+ torch==1.13.1
60
+ tornado==6.2
61
+ tqdm==4.64.1
62
+ transformers @ git+https://github.com/huggingface/transformers.git@e3d832ff87c6ec997125deaa4f1b239db8f9e613
63
+ typing_extensions==4.5.0
64
+ tzdata==2022.7
65
+ tzlocal==4.2
66
+ urllib3==1.26.14
67
+ validators==0.20.0
68
+ zipp==3.13.0