Dagfinn1962 dfurman commited on
Commit
681ffab
0 Parent(s):

Duplicate from dfurman/chat-gpt-3.5-turbo

Browse files

Co-authored-by: Daniel Furman <[email protected]>

Files changed (6) hide show
  1. .gitattributes +35 -0
  2. README.md +15 -0
  3. app.py +224 -0
  4. ideas.md +3 -0
  5. requirements.txt +3 -0
  6. src/llm_boilers.py +118 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chat with gpt-3.5-turbo
3
+ emoji: 💡
4
+ colorFrom: gray
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.36.1
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: dfurman/chat-gpt-3.5-turbo
11
+ ---
12
+
13
+ Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
14
+
15
+ To use this app, duplicate this space and add your openai key via the requisite secret OPENAI_API_KEY.
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ import gradio as gr
4
+
5
+ from src.llm_boilers import llm_boiler
6
+
7
+
8
+ logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)
9
+ logging.warning("READY. App started...")
10
+
11
+
12
+ class Chat:
13
+ default_system_prompt = "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers."
14
+ system_format = "<|im_start|>system\n{}<|im_end|>\n"
15
+
16
+ def __init__(
17
+ self, system: str = None, user: str = None, assistant: str = None
18
+ ) -> None:
19
+ if system is not None:
20
+ self.set_system_prompt(system)
21
+ else:
22
+ self.reset_system_prompt()
23
+ self.user = user if user else "<|im_start|>user\n{}<|im_end|>\n"
24
+ self.assistant = (
25
+ assistant if assistant else "<|im_start|>assistant\n{}<|im_end|>\n"
26
+ )
27
+ self.response_prefix = self.assistant.split("{}")[0]
28
+
29
+ def set_system_prompt(self, system_prompt):
30
+ # self.system = self.system_format.format(system_prompt)
31
+ return system_prompt
32
+
33
+ def reset_system_prompt(self):
34
+ return self.set_system_prompt(self.default_system_prompt)
35
+
36
+ def history_as_formatted_str(self, system, history) -> str:
37
+ system = self.system_format.format(system)
38
+ text = system + "".join(
39
+ [
40
+ "\n".join(
41
+ [
42
+ self.user.format(item[0]),
43
+ self.assistant.format(item[1]),
44
+ ]
45
+ )
46
+ for item in history[:-1]
47
+ ]
48
+ )
49
+ text += self.user.format(history[-1][0])
50
+ text += self.response_prefix
51
+ # stopgap solution to too long sequences
52
+ if len(text) > 4500:
53
+ # delete from the middle between <|im_start|> and <|im_end|>
54
+ # find the middle ones, then expand out
55
+ start = text.find("<|im_start|>", 139)
56
+ end = text.find("<|im_end|>", 139)
57
+ while end < len(text) and len(text) > 4500:
58
+ end = text.find("<|im_end|>", end + 1)
59
+ text = text[:start] + text[end + 1 :]
60
+ if len(text) > 4500:
61
+ # the nice way didn't work, just truncate
62
+ # deleting the beginning
63
+ text = text[-4500:]
64
+
65
+ return text
66
+
67
+ def clear_history(self, history):
68
+ return []
69
+
70
+ def turn(self, user_input: str):
71
+ self.user_turn(user_input)
72
+ return self.bot_turn()
73
+
74
+ def user_turn(self, user_input: str, history):
75
+ history.append([user_input, ""])
76
+ return user_input, history
77
+
78
+ def bot_turn(self, system, history, openai_key):
79
+ conversation = self.history_as_formatted_str(system, history)
80
+ assistant_response = call_inf_server(conversation, openai_key)
81
+ # history[-1][-1] = assistant_response
82
+ # return history
83
+ history[-1][1] = ""
84
+ for chunk in assistant_response:
85
+ try:
86
+ decoded_output = chunk["choices"][0]["delta"]["content"]
87
+ history[-1][1] += decoded_output
88
+ yield history
89
+ except KeyError:
90
+ pass
91
+
92
+
93
+ def call_inf_server(prompt, openai_key):
94
+ model_id = "gpt-3.5-turbo" # "gpt-3.5-turbo-16k",
95
+ model = llm_boiler(model_id, openai_key)
96
+ logging.warning(f'Inf via "{model_id}"" for prompt "{prompt}"')
97
+
98
+ try:
99
+ # run text generation
100
+ response = model.run(prompt, temperature=1.0)
101
+ logging.warning(f"Result of text generation: {response}")
102
+ return response
103
+
104
+ except Exception as e:
105
+ # assume it is our error
106
+ # just wait and try one more time
107
+ print(e)
108
+ time.sleep(2)
109
+ response = model.run(prompt, temperature=1.0)
110
+ logging.warning(f"Result of text generation: {response}")
111
+ return response
112
+
113
+
114
+ with gr.Blocks(
115
+ theme=gr.themes.Soft(),
116
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
117
+ ) as demo:
118
+ gr.Markdown(
119
+ """<h1><center>Chat with gpt-3.5-turbo</center></h1>
120
+
121
+ This is a lightweight demo of gpt-3.5-turbo conversation completion. It was designed as a template for in-context learning applications to be built on top of.
122
+ """
123
+ )
124
+ conversation = Chat()
125
+ with gr.Row():
126
+ with gr.Column():
127
+ # to do: change to openaikey input for public release
128
+ openai_key = gr.Textbox(
129
+ label="OpenAI Key",
130
+ value="",
131
+ type="password",
132
+ placeholder="sk..",
133
+ info="You have to provide your own OpenAI API key.",
134
+ )
135
+ chatbot = gr.Chatbot().style(height=400)
136
+ with gr.Row():
137
+ with gr.Column():
138
+ msg = gr.Textbox(
139
+ label="Chat Message Box",
140
+ placeholder="Chat Message Box",
141
+ show_label=False,
142
+ ).style(container=False)
143
+ with gr.Column():
144
+ with gr.Row():
145
+ submit = gr.Button("Submit")
146
+ stop = gr.Button("Stop")
147
+ clear = gr.Button("Clear")
148
+ with gr.Row():
149
+ with gr.Accordion("Advanced Options:", open=False):
150
+ with gr.Row():
151
+ with gr.Column(scale=2):
152
+ system = gr.Textbox(
153
+ label="System Prompt",
154
+ value=Chat.default_system_prompt,
155
+ show_label=False,
156
+ ).style(container=False)
157
+ with gr.Column():
158
+ with gr.Row():
159
+ change = gr.Button("Change System Prompt")
160
+ reset = gr.Button("Reset System Prompt")
161
+ with gr.Row():
162
+ gr.Markdown(
163
+ "Disclaimer: The gpt-3.5-turbo model can produce factually incorrect output, and should not be solely relied on to produce "
164
+ "factually accurate information. The gpt-3.5-turbo model was trained on various public datasets; while great efforts "
165
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
166
+ "biased, or otherwise offensive outputs.",
167
+ elem_classes=["disclaimer"],
168
+ )
169
+ with gr.Row():
170
+ gr.Markdown(
171
+ "[Privacy policy](https://gist.github.com/samhavens/c29c68cdcd420a9aa0202d0839876dac)",
172
+ elem_classes=["disclaimer"],
173
+ )
174
+
175
+ submit_event = msg.submit(
176
+ fn=conversation.user_turn,
177
+ inputs=[msg, chatbot],
178
+ outputs=[msg, chatbot],
179
+ queue=False,
180
+ ).then(
181
+ fn=conversation.bot_turn,
182
+ inputs=[system, chatbot, openai_key],
183
+ outputs=[chatbot],
184
+ queue=True,
185
+ )
186
+ submit_click_event = submit.click(
187
+ fn=conversation.user_turn,
188
+ inputs=[msg, chatbot],
189
+ outputs=[msg, chatbot],
190
+ queue=False,
191
+ ).then(
192
+ fn=conversation.bot_turn,
193
+ inputs=[system, chatbot, openai_key],
194
+ outputs=[chatbot],
195
+ queue=True,
196
+ )
197
+ stop.click(
198
+ fn=None,
199
+ inputs=None,
200
+ outputs=None,
201
+ cancels=[submit_event, submit_click_event],
202
+ queue=False,
203
+ )
204
+ clear.click(lambda: None, None, chatbot, queue=False).then(
205
+ fn=conversation.clear_history,
206
+ inputs=[chatbot],
207
+ outputs=[chatbot],
208
+ queue=False,
209
+ )
210
+ change.click(
211
+ fn=conversation.set_system_prompt,
212
+ inputs=[system],
213
+ outputs=[system],
214
+ queue=False,
215
+ )
216
+ reset.click(
217
+ fn=conversation.reset_system_prompt,
218
+ inputs=[],
219
+ outputs=[system],
220
+ queue=False,
221
+ )
222
+
223
+
224
+ demo.queue(max_size=36, concurrency_count=14).launch(debug=True)
ideas.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ - add examples
2
+ - "The bakers at the Beverly Hills Bakery baked 200 loaves of bread on Monday morning. They sold 93 loaves in the morning and 39 loaves in the afternoon. A grocery store returned 6 unsold loaves. How many loaves of bread did they have left? Let's think this through in a step-by-step fashion to make sure we have the right answer."
3
+ - "Write me a long list of things to do in New York City."
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=3.36.1
2
+ openai
3
+
src/llm_boilers.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # custom text generation llm classes
2
+
3
+ import warnings
4
+ import logging
5
+ import os
6
+
7
+ import openai
8
+
9
+ # supress warnings
10
+ warnings.filterwarnings("ignore")
11
+
12
+
13
+ class llm_boiler:
14
+ def __init__(self, model_id, openai_key):
15
+ self.model_id = model_id
16
+ self.openai_key = openai_key
17
+ for f_idx, run_function in enumerate(MODEL_FUNCTIONS):
18
+ if run_function.__name__.lower() in self.model_id:
19
+ print(
20
+ f"Load function recognized for {self.model_id}: {LOAD_MODEL_FUNCTIONS[f_idx].__name__}"
21
+ )
22
+ self.load_fn = LOAD_MODEL_FUNCTIONS[f_idx]
23
+ for run_function in MODEL_FUNCTIONS:
24
+ if run_function.__name__.lower() in self.model_id:
25
+ print(
26
+ f"Run function recognized for {self.model_id}: {run_function.__name__.lower()}"
27
+ )
28
+ self.run_fn = run_function
29
+ self.model = self.load_fn(self.model_id, self.openai_key)
30
+ self.name = self.run_fn.__name__.lower()
31
+
32
+ def run(
33
+ self,
34
+ prompt,
35
+ temperature,
36
+ ):
37
+ return self.run_fn(
38
+ model=self.model,
39
+ prompt=prompt,
40
+ temperature=temperature,
41
+ )
42
+
43
+
44
+ LOAD_MODEL_FUNCTIONS = []
45
+ MODEL_FUNCTIONS = []
46
+
47
+
48
+ # gpt models
49
+ def gpt_loader(model_id: str, openai_key: str):
50
+ # Load your API key from an environment variable or secret management service
51
+ openai.api_key = openai_key # os.getenv("OPENAI_API_KEY")
52
+ logging.warning(f"model id: {model_id}")
53
+
54
+ return model_id
55
+
56
+
57
+ LOAD_MODEL_FUNCTIONS.append(gpt_loader)
58
+
59
+
60
+ def gpt(
61
+ model: str,
62
+ prompt: str,
63
+ temperature: int,
64
+ ) -> str:
65
+ """
66
+ Initialize the pipeline
67
+ Uses Hugging Face GenerationConfig defaults
68
+ https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig
69
+ Args:
70
+ model (str): openai model key
71
+ tokenizer (str): openai model key
72
+ prompt (str): Prompt for text generation
73
+ max_new_tokens (int, optional): Max new tokens after the prompt to generate. Defaults to 128.
74
+ temperature (float, optional): The value used to modulate the next token probabilities.
75
+ Defaults to 1.0
76
+ """
77
+ conversation = prompt.split("<|im_start|>")
78
+
79
+ messages = []
80
+ for turn in conversation:
81
+ first_word = turn.split("\n")[0]
82
+
83
+ if first_word == "system":
84
+ messages.append(
85
+ {
86
+ "role": "system",
87
+ "content": turn.replace("system\n", "").replace("<|im_end|>\n", ""),
88
+ }
89
+ )
90
+ elif first_word == "user":
91
+ messages.append(
92
+ {
93
+ "role": "user",
94
+ "content": turn.replace("user\n", "").replace("<|im_end|>\n", ""),
95
+ }
96
+ )
97
+ elif first_word == "assistant":
98
+ messages.append(
99
+ {
100
+ "role": "assistant",
101
+ "content": turn.replace("assistant\n", "").replace(
102
+ "<|im_end|>\n", ""
103
+ ),
104
+ }
105
+ )
106
+
107
+ logging.warning(f"Input to openai api call: {messages}")
108
+
109
+ chat_completion = openai.ChatCompletion.create(
110
+ model=model,
111
+ messages=messages,
112
+ temperature=temperature,
113
+ stream=True,
114
+ )
115
+ return chat_completion
116
+
117
+
118
+ MODEL_FUNCTIONS.append(gpt)