cmagganas commited on
Commit
f16502d
1 Parent(s): b78e021

Delete app/output_format.ipynb

Browse files
Files changed (1) hide show
  1. app/output_format.ipynb +0 -393
app/output_format.ipynb DELETED
@@ -1,393 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 3,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "# import OpenAIChatCompletions class from openai_chat_completion.py file and compare_completion_and_prediction function from util.py file\n",
10
- "from openai_chat_completion import OpenAIChatCompletions"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 4,
16
- "metadata": {},
17
- "outputs": [],
18
- "source": [
19
- "import os\n",
20
- "from dotenv import load_dotenv\n",
21
- "load_dotenv()\n",
22
- "\n",
23
- "import openai\n",
24
- "\n",
25
- "# set OPENAI_API_KEY environment variable from .env file\n",
26
- "openai.api_key = os.getenv(\"OPENAI_API_KEY\")"
27
- ]
28
- },
29
- {
30
- "cell_type": "code",
31
- "execution_count": 5,
32
- "metadata": {},
33
- "outputs": [
34
- {
35
- "data": {
36
- "text/plain": [
37
- "'I am going to provide marijuana product information. Using the information I provide, I want you to provide me with the following information about the product.\\n\\n - Brand (brand)\\n - product category (product_category)\\n - sub product category (sub_product_category)\\n - strain name (strain_name)\\n\\nAdditional requirements:\\n\\n- DO NOT EXPLAIN YOUR SELF \\n\\nProduct data below '"
38
- ]
39
- },
40
- "execution_count": 5,
41
- "metadata": {},
42
- "output_type": "execute_result"
43
- }
44
- ],
45
- "source": [
46
- "system_message = open('../prompts/gpt4-system-message.txt', 'r').read()\n",
47
- "system_message"
48
- ]
49
- },
50
- {
51
- "cell_type": "code",
52
- "execution_count": 6,
53
- "metadata": {},
54
- "outputs": [
55
- {
56
- "name": "stdout",
57
- "output_type": "stream",
58
- "text": [
59
- "I am going to provide marijuana product information. Using the information I provide, I want you to provide me with the following information about the product.\n",
60
- "\n",
61
- " - Brand (brand)\n",
62
- " - product category (product_category)\n",
63
- " - sub product category (sub_product_category)\n",
64
- " - strain name (strain_name)\n",
65
- "\n",
66
- "Additional requirements:\n",
67
- "\n",
68
- "- DO NOT EXPLAIN YOUR SELF \n",
69
- "\n",
70
- "Product data below \n"
71
- ]
72
- }
73
- ],
74
- "source": [
75
- "print(system_message)"
76
- ]
77
- },
78
- {
79
- "cell_type": "code",
80
- "execution_count": 7,
81
- "metadata": {},
82
- "outputs": [],
83
- "source": [
84
- "chatInstance = OpenAIChatCompletions(system_message=system_message)\n",
85
- "chat_response = chatInstance.openai_chat_completion(prompt=\"Cookies - London Pound Cake 75 - Gummy - 10ct - 100mg\")"
86
- ]
87
- },
88
- {
89
- "cell_type": "code",
90
- "execution_count": 8,
91
- "metadata": {},
92
- "outputs": [
93
- {
94
- "name": "stdout",
95
- "output_type": "stream",
96
- "text": [
97
- "- Brand: Cookies\n",
98
- "- Product Category: Edibles\n",
99
- "- Sub Product Category: Gummy\n",
100
- "- Strain Name: London Pound Cake 75\n"
101
- ]
102
- }
103
- ],
104
- "source": [
105
- "print(chat_response['choices'][0]['message']['content'])"
106
- ]
107
- },
108
- {
109
- "cell_type": "code",
110
- "execution_count": 9,
111
- "metadata": {},
112
- "outputs": [],
113
- "source": [
114
- "system_message2 = \"\"\"\n",
115
- "I am going to provide marijuana product information. Using the information I provide, I want you to provide me with the following information about the product.\n",
116
- "\n",
117
- " - Brand (brand)\n",
118
- " - product category (product_category)\n",
119
- " - sub product category (sub_product_category)\n",
120
- " - strain name (strain_name)\n",
121
- "\n",
122
- "Additional requirements:\n",
123
- "\n",
124
- "DO NOT EXPLAIN YOUR SELF \n",
125
- "Format output in JSON format\n",
126
- "\n",
127
- "example output:\n",
128
- "{\"col1\": \"value1\", \"col2\": \"value2\", \"col3\": \"value3\"}\n",
129
- "\n",
130
- "---\n",
131
- "\n",
132
- "Product data below \n",
133
- "\"\"\""
134
- ]
135
- },
136
- {
137
- "cell_type": "code",
138
- "execution_count": 10,
139
- "metadata": {},
140
- "outputs": [
141
- {
142
- "name": "stdout",
143
- "output_type": "stream",
144
- "text": [
145
- "{\"brand\": \"Cookies\", \"product_category\": \"Edibles\", \"sub_product_category\": \"Gummy\", \"strain_name\": \"London Pound Cake 75\"}\n"
146
- ]
147
- }
148
- ],
149
- "source": [
150
- "chatInstance2 = OpenAIChatCompletions(system_message=system_message2)\n",
151
- "chat_response2 = chatInstance2.openai_chat_completion(prompt=\"Cookies - London Pound Cake 75 - Gummy - 10ct - 100mg\")\n",
152
- "print(chat_response2['choices'][0]['message']['content'])"
153
- ]
154
- },
155
- {
156
- "cell_type": "code",
157
- "execution_count": 11,
158
- "metadata": {},
159
- "outputs": [],
160
- "source": [
161
- "chat_response2_content = chat_response2['choices'][0]['message']['content']"
162
- ]
163
- },
164
- {
165
- "cell_type": "code",
166
- "execution_count": 12,
167
- "metadata": {},
168
- "outputs": [
169
- {
170
- "data": {
171
- "text/plain": [
172
- "{'brand': 'Cookies',\n",
173
- " 'product_category': 'Edibles',\n",
174
- " 'sub_product_category': 'Gummy',\n",
175
- " 'strain_name': 'LondonPoundCake75'}"
176
- ]
177
- },
178
- "execution_count": 12,
179
- "metadata": {},
180
- "output_type": "execute_result"
181
- }
182
- ],
183
- "source": [
184
- "# write function that takes string in the form of json and returns a dictionary\n",
185
- "\n",
186
- "def json_to_dict(json_string):\n",
187
- " json_string = json_string.replace('\\n', '')\n",
188
- " json_string = json_string.replace('\\t', '')\n",
189
- " json_string = json_string.replace(' ', '')\n",
190
- " json_string = json_string.replace('\"', '')\n",
191
- " json_string = json_string.replace('{', '')\n",
192
- " json_string = json_string.replace('}', '')\n",
193
- " json_string = json_string.replace(':', ',')\n",
194
- " json_string = json_string.split(',')\n",
195
- " return {\n",
196
- " json_string[i]: json_string[i + 1]\n",
197
- " for i in range(0, len(json_string), 2)\n",
198
- " }\n",
199
- "\n",
200
- "output_as_json = json_to_dict(chat_response2_content)\n",
201
- "assert type(output_as_json) == dict\n",
202
- "output_as_json"
203
- ]
204
- },
205
- {
206
- "cell_type": "code",
207
- "execution_count": 13,
208
- "metadata": {},
209
- "outputs": [
210
- {
211
- "data": {
212
- "text/html": [
213
- "<div>\n",
214
- "<style scoped>\n",
215
- " .dataframe tbody tr th:only-of-type {\n",
216
- " vertical-align: middle;\n",
217
- " }\n",
218
- "\n",
219
- " .dataframe tbody tr th {\n",
220
- " vertical-align: top;\n",
221
- " }\n",
222
- "\n",
223
- " .dataframe thead th {\n",
224
- " text-align: right;\n",
225
- " }\n",
226
- "</style>\n",
227
- "<table border=\"1\" class=\"dataframe\">\n",
228
- " <thead>\n",
229
- " <tr style=\"text-align: right;\">\n",
230
- " <th></th>\n",
231
- " <th>brand</th>\n",
232
- " <th>product_category</th>\n",
233
- " <th>sub_product_category</th>\n",
234
- " <th>strain_name</th>\n",
235
- " </tr>\n",
236
- " </thead>\n",
237
- " <tbody>\n",
238
- " <tr>\n",
239
- " <th>0</th>\n",
240
- " <td>Cookies</td>\n",
241
- " <td>Edibles</td>\n",
242
- " <td>Gummy</td>\n",
243
- " <td>LondonPoundCake75</td>\n",
244
- " </tr>\n",
245
- " </tbody>\n",
246
- "</table>\n",
247
- "</div>"
248
- ],
249
- "text/plain": [
250
- " brand product_category sub_product_category strain_name\n",
251
- "0 Cookies Edibles Gummy LondonPoundCake75"
252
- ]
253
- },
254
- "execution_count": 13,
255
- "metadata": {},
256
- "output_type": "execute_result"
257
- }
258
- ],
259
- "source": [
260
- "# write a function that takes a dictionary and returns a dataframe\n",
261
- "import pandas as pd\n",
262
- "\n",
263
- "def dict_to_df(dictionary):\n",
264
- " return pd.DataFrame(dictionary, index=[0])\n",
265
- "\n",
266
- "dict_to_df(output_as_json)"
267
- ]
268
- },
269
- {
270
- "cell_type": "code",
271
- "execution_count": 14,
272
- "metadata": {},
273
- "outputs": [
274
- {
275
- "name": "stdout",
276
- "output_type": "stream",
277
- "text": [
278
- "{\"brand\": \"Cookies\", \"product_category\": \"Edibles\", \"sub_product_category\": \"Gummy\", \"strain_name\": \"London Pound Cake 75\"}\n",
279
- "{\"brand\": \"Berlin\", \"product_category\": \"Edibles\", \"sub_product_category\": \"Brownies\", \"strain_name\": \"Chocolate Hazelnut 69\"}\n"
280
- ]
281
- }
282
- ],
283
- "source": [
284
- "chat_response2a = chatInstance2.openai_chat_completion(prompt=\"Cookies - London Pound Cake 75 - Gummy - 10ct - 100mg\")\n",
285
- "chat_response2b = chatInstance2.openai_chat_completion(prompt=\"Brownies - Berlin Chocolate Hazelnut 69 - Flower - 1ct - 69mg\")\n",
286
- "print(chat_response2a['choices'][0]['message']['content'])\n",
287
- "print(chat_response2b['choices'][0]['message']['content'])"
288
- ]
289
- },
290
- {
291
- "cell_type": "code",
292
- "execution_count": 15,
293
- "metadata": {},
294
- "outputs": [],
295
- "source": [
296
- "def join_dicts(dict1, dict2):\n",
297
- " return {key:[dict1[key], dict2[key]] for key in dict1}"
298
- ]
299
- },
300
- {
301
- "cell_type": "code",
302
- "execution_count": 16,
303
- "metadata": {},
304
- "outputs": [
305
- {
306
- "data": {
307
- "text/plain": [
308
- "{'brand': ['Cookies', 'Berlin'],\n",
309
- " 'product_category': ['Edibles', 'Edibles'],\n",
310
- " 'sub_product_category': ['Gummy', 'Brownies'],\n",
311
- " 'strain_name': ['LondonPoundCake75', 'ChocolateHazelnut69']}"
312
- ]
313
- },
314
- "execution_count": 16,
315
- "metadata": {},
316
- "output_type": "execute_result"
317
- }
318
- ],
319
- "source": [
320
- "out2a_as_json = json_to_dict(chat_response2a['choices'][0]['message']['content'])\n",
321
- "out2b_as_json = json_to_dict(chat_response2b['choices'][0]['message']['content'])\n",
322
- "\n",
323
- "out3_as_json = join_dicts(out2a_as_json, out2b_as_json)\n",
324
- "out3_as_json"
325
- ]
326
- },
327
- {
328
- "cell_type": "markdown",
329
- "metadata": {},
330
- "source": [
331
- "Try via util.py File"
332
- ]
333
- },
334
- {
335
- "cell_type": "code",
336
- "execution_count": 18,
337
- "metadata": {},
338
- "outputs": [],
339
- "source": [
340
- "from util import json_to_dict, join_dicts"
341
- ]
342
- },
343
- {
344
- "cell_type": "code",
345
- "execution_count": 19,
346
- "metadata": {},
347
- "outputs": [
348
- {
349
- "data": {
350
- "text/plain": [
351
- "{'brand': ['Cookies', 'Berlin'],\n",
352
- " 'product_category': ['Edibles', 'Edibles'],\n",
353
- " 'sub_product_category': ['Gummy', 'Brownies'],\n",
354
- " 'strain_name': ['LondonPoundCake75', 'ChocolateHazelnut69']}"
355
- ]
356
- },
357
- "execution_count": 19,
358
- "metadata": {},
359
- "output_type": "execute_result"
360
- }
361
- ],
362
- "source": [
363
- "out2a_as_json = json_to_dict(chat_response2a['choices'][0]['message']['content'])\n",
364
- "out2b_as_json = json_to_dict(chat_response2b['choices'][0]['message']['content'])\n",
365
- "\n",
366
- "out3_as_json = join_dicts(out2a_as_json, out2b_as_json)\n",
367
- "out3_as_json"
368
- ]
369
- }
370
- ],
371
- "metadata": {
372
- "kernelspec": {
373
- "display_name": "kd-llm-dc",
374
- "language": "python",
375
- "name": "python3"
376
- },
377
- "language_info": {
378
- "codemirror_mode": {
379
- "name": "ipython",
380
- "version": 3
381
- },
382
- "file_extension": ".py",
383
- "mimetype": "text/x-python",
384
- "name": "python",
385
- "nbconvert_exporter": "python",
386
- "pygments_lexer": "ipython3",
387
- "version": "3.10.11"
388
- },
389
- "orig_nbformat": 4
390
- },
391
- "nbformat": 4,
392
- "nbformat_minor": 2
393
- }