cmagganas commited on
Commit
9de5882
1 Parent(s): 8a826ee

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env +1 -0
  2. .gitattributes +1 -0
  3. .gitignore +160 -0
  4. README.md +2 -12
  5. app/check_if_csv_loads.ipynb +53 -0
  6. app/cookies_llm_experimental_eval.ipynb +412 -0
  7. app/cookies_openai_model_eval.ipynb +797 -0
  8. app/data_prep.ipynb +283 -0
  9. app/openai_chat_completion.py +76 -0
  10. app/test.py +18 -0
  11. app/util.py +20 -0
  12. app/wandb/run-20230626_114056-rbtf91s6/files/conda-environment.yaml +114 -0
  13. app/wandb/run-20230626_114056-rbtf91s6/files/config.yaml +49 -0
  14. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_0_75f83ac87fa164c83b8e.table.json +1 -0
  15. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_100_f673fdf8da564f8db616.table.json +1 -0
  16. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_101_df8f84304e946465232c.table.json +1 -0
  17. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_102_bf3b4ae7fed0ee456ca9.table.json +1 -0
  18. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_103_9784775529cd9b0c478d.table.json +1 -0
  19. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_104_b69d429713afe585607d.table.json +1 -0
  20. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_105_8f9030467825d6eb9154.table.json +1 -0
  21. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_106_975584bb2f8ac7976684.table.json +1 -0
  22. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_107_353750bd44b04ac7162b.table.json +1 -0
  23. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_108_d4754b6aea82a8c16ed9.table.json +1 -0
  24. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_109_bcb8cb29447c99155339.table.json +1 -0
  25. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_10_d888c45dceed81f58d7a.table.json +1 -0
  26. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_110_6b738aa2166ace2f9d56.table.json +1 -0
  27. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_111_5aed3974ba4952f876eb.table.json +1 -0
  28. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_112_3a7258bfdca8fcd1719b.table.json +1 -0
  29. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_113_71838fad78e890a3e4cd.table.json +1 -0
  30. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_114_200ab5992b87e0e3b2d4.table.json +1 -0
  31. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_115_3e8ce0d79ca9a77c577e.table.json +1 -0
  32. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_116_27261d37b58e97119be1.table.json +1 -0
  33. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_117_8afbd8d9b801892f328e.table.json +1 -0
  34. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_118_f5e4033d82ffd310d340.table.json +1 -0
  35. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_119_6d43ea3093b2b3ecadd6.table.json +1 -0
  36. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_11_81ba18437deae19f6d25.table.json +1 -0
  37. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_120_628c0792a073cd9c405c.table.json +1 -0
  38. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_121_b556a799b6d3b562770d.table.json +1 -0
  39. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_122_b10f20986ab3c17d78d0.table.json +1 -0
  40. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_123_ec51ba8e67e34ec07ce2.table.json +1 -0
  41. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_124_c2c63679dc07c9c48315.table.json +1 -0
  42. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_125_4d67abd74b85c3978623.table.json +1 -0
  43. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_126_8815035ed0a50d11f9e3.table.json +1 -0
  44. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_127_98fa95995325ab388230.table.json +1 -0
  45. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_128_a77f4efc17f0b058d06b.table.json +1 -0
  46. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_129_4ed9178a8330c3b08b77.table.json +1 -0
  47. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_12_418719b4fcd204ed66d8.table.json +1 -0
  48. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_130_691215590d7f6b91716f.table.json +1 -0
  49. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_131_abafdaca35ad3cbbc1f4.table.json +1 -0
  50. app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_132_1b1d305dd76406f7892d.table.json +1 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY="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"
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ app/wandb/run-20230626_114056-rbtf91s6/run-rbtf91s6.wandb filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md CHANGED
@@ -1,12 +1,2 @@
1
- ---
2
- title: Data Cleaning Llm
3
- emoji: 🌍
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.21.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # llm-data-cleaner
2
+ Kaleidoscope Data - LLM Data Cleaner
 
 
 
 
 
 
 
 
 
 
app/check_if_csv_loads.ipynb ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "ename": "FileNotFoundError",
10
+ "evalue": "[Errno 2] No such file or directory: '../data/cookies_llm_eval_predictions.csv'",
11
+ "output_type": "error",
12
+ "traceback": [
13
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
15
+ "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m pd\u001b[39m.\u001b[39;49mread_csv(\u001b[39m'\u001b[39;49m\u001b[39m../data/cookies_llm_eval_predictions.csv\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
16
+ "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 900\u001b[0m dialect,\n\u001b[1;32m 901\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 908\u001b[0m dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[1;32m 909\u001b[0m )\n\u001b[1;32m 910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
17
+ "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[1;32m 576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m 579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[1;32m 580\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n",
18
+ "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1404\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
19
+ "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m mode:\n\u001b[1;32m 1660\u001b[0m mode \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m-> 1661\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[1;32m 1662\u001b[0m f,\n\u001b[1;32m 1663\u001b[0m mode,\n\u001b[1;32m 1664\u001b[0m encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 1665\u001b[0m compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 1666\u001b[0m memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[1;32m 1667\u001b[0m is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[1;32m 1668\u001b[0m errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[1;32m 1669\u001b[0m storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 1670\u001b[0m )\n\u001b[1;32m 1671\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 1672\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n",
20
+ "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/common.py:859\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[1;32m 855\u001b[0m \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 856\u001b[0m \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 857\u001b[0m \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[1;32m 858\u001b[0m \u001b[39m# Encoding\u001b[39;00m\n\u001b[0;32m--> 859\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[1;32m 860\u001b[0m handle,\n\u001b[1;32m 861\u001b[0m ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[1;32m 862\u001b[0m encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[1;32m 863\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[1;32m 864\u001b[0m newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 865\u001b[0m )\n\u001b[1;32m 866\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 867\u001b[0m \u001b[39m# Binary mode\u001b[39;00m\n\u001b[1;32m 868\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n",
21
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../data/cookies_llm_eval_predictions.csv'"
22
+ ]
23
+ }
24
+ ],
25
+ "source": [
26
+ "import pandas as pd\n",
27
+ "pd.read_csv('../data/cookies_llm_eval_predictions.csv')"
28
+ ]
29
+ }
30
+ ],
31
+ "metadata": {
32
+ "kernelspec": {
33
+ "display_name": "kd-llm-dc",
34
+ "language": "python",
35
+ "name": "python3"
36
+ },
37
+ "language_info": {
38
+ "codemirror_mode": {
39
+ "name": "ipython",
40
+ "version": 3
41
+ },
42
+ "file_extension": ".py",
43
+ "mimetype": "text/x-python",
44
+ "name": "python",
45
+ "nbconvert_exporter": "python",
46
+ "pygments_lexer": "ipython3",
47
+ "version": "3.10.11"
48
+ },
49
+ "orig_nbformat": 4
50
+ },
51
+ "nbformat": 4,
52
+ "nbformat_minor": 2
53
+ }
app/cookies_llm_experimental_eval.ipynb ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "## This notebook is used to evaluate the performance of the LLM model to clean the cookies dataset."
9
+ ]
10
+ },
11
+ {
12
+ "attachments": {},
13
+ "cell_type": "markdown",
14
+ "metadata": {},
15
+ "source": [
16
+ "#### Using OpenAI API"
17
+ ]
18
+ },
19
+ {
20
+ "attachments": {},
21
+ "cell_type": "markdown",
22
+ "metadata": {},
23
+ "source": [
24
+ "Try to call function from newly created module `openai_chat_completion.py`"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 3,
30
+ "metadata": {},
31
+ "outputs": [
32
+ {
33
+ "name": "stdout",
34
+ "output_type": "stream",
35
+ "text": [
36
+ "{\n",
37
+ " \"id\": \"chatcmpl-7UecmbvjkbwamEpYXyIvGO4w4Ae8i\",\n",
38
+ " \"object\": \"chat.completion\",\n",
39
+ " \"created\": 1687540356,\n",
40
+ " \"model\": \"gpt-4-0314\",\n",
41
+ " \"choices\": [\n",
42
+ " {\n",
43
+ " \"index\": 0,\n",
44
+ " \"message\": {\n",
45
+ " \"role\": \"assistant\",\n",
46
+ " \"content\": \"Chill Medicated,Edible,Beverage,Watermelon,250\"\n",
47
+ " },\n",
48
+ " \"finish_reason\": \"stop\"\n",
49
+ " }\n",
50
+ " ],\n",
51
+ " \"usage\": {\n",
52
+ " \"prompt_tokens\": 636,\n",
53
+ " \"completion_tokens\": 15,\n",
54
+ " \"total_tokens\": 651\n",
55
+ " }\n",
56
+ "}\n"
57
+ ]
58
+ }
59
+ ],
60
+ "source": [
61
+ "import os\n",
62
+ "from dotenv import load_dotenv\n",
63
+ "load_dotenv()\n",
64
+ "\n",
65
+ "import openai\n",
66
+ "\n",
67
+ "# set OPENAI_API_KEY environment variable from .env file\n",
68
+ "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
69
+ "\n",
70
+ "# import OpenAIChatCompletions class from openai_chat_completion.py file located in llm_data_cleaner/scripts folder (this notebook is located in llm_data_cleaner/notebooks folder)\n",
71
+ "from openai_chat_completion import OpenAIChatCompletions, compare_completion_and_prediction\n",
72
+ "\n",
73
+ "# read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message\n",
74
+ "system_message = open('../prompts/gpt4-system-message.txt', 'r').read()\n",
75
+ "\n",
76
+ "# create an instance of the OpenAIChatCompletions class and use the openai_chat_completion method to get chat completions\n",
77
+ "chat_completions = OpenAIChatCompletions(model=\"gpt-4\", system_message=system_message)\n",
78
+ "\n",
79
+ "prompt = \"co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan\"\n",
80
+ "completion = chat_completions.openai_chat_completion(prompt, n_shot=1)\n",
81
+ "print(completion)"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": 7,
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "from openai_chat_completion import OpenAIChatCompletions\n",
91
+ "\n",
92
+ "test_prompts, test_completions, test_predictions = chat_completions.predict_jsonl(n_shot=1)"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": 44,
98
+ "metadata": {},
99
+ "outputs": [
100
+ {
101
+ "data": {
102
+ "text/html": [
103
+ "<div>\n",
104
+ "<style scoped>\n",
105
+ " .dataframe tbody tr th:only-of-type {\n",
106
+ " vertical-align: middle;\n",
107
+ " }\n",
108
+ "\n",
109
+ " .dataframe tbody tr th {\n",
110
+ " vertical-align: top;\n",
111
+ " }\n",
112
+ "\n",
113
+ " .dataframe thead th {\n",
114
+ " text-align: right;\n",
115
+ " }\n",
116
+ "</style>\n",
117
+ "<table border=\"1\" class=\"dataframe\">\n",
118
+ " <thead>\n",
119
+ " <tr style=\"text-align: right;\">\n",
120
+ " <th></th>\n",
121
+ " <th>prompt</th>\n",
122
+ " <th>completion</th>\n",
123
+ " <th>prediction</th>\n",
124
+ " </tr>\n",
125
+ " </thead>\n",
126
+ " <tbody>\n",
127
+ " <tr>\n",
128
+ " <th>0</th>\n",
129
+ " <td>co-2MFE5QVF,Chill Medicated - Watermelon - Syr...</td>\n",
130
+ " <td>Chill Medicated,Edible,Beverage,nan,nan</td>\n",
131
+ " <td>Chill Medicated,Edible,Beverage,Watermelon,250</td>\n",
132
+ " </tr>\n",
133
+ " <tr>\n",
134
+ " <th>1</th>\n",
135
+ " <td>bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...</td>\n",
136
+ " <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
137
+ " <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
138
+ " </tr>\n",
139
+ " <tr>\n",
140
+ " <th>2</th>\n",
141
+ " <td>fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...</td>\n",
142
+ " <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
143
+ " <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
144
+ " </tr>\n",
145
+ " <tr>\n",
146
+ " <th>3</th>\n",
147
+ " <td>bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...</td>\n",
148
+ " <td>CAM,Flower,Bud,Mellowz #7,7</td>\n",
149
+ " <td>CAM - Mellowz #7 7g,CAM,Flower,Bud,Mellowz,7</td>\n",
150
+ " </tr>\n",
151
+ " <tr>\n",
152
+ " <th>4</th>\n",
153
+ " <td>fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...</td>\n",
154
+ " <td>Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...</td>\n",
155
+ " <td>Backpack Boyz,Edible,Syrup,Bubblegum Gelato,1</td>\n",
156
+ " </tr>\n",
157
+ " <tr>\n",
158
+ " <th>5</th>\n",
159
+ " <td>fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crush...</td>\n",
160
+ " <td>Raw Garden,Concentrate,Diamonds,Pink Lemonade,1</td>\n",
161
+ " <td>Raw Garden,Concentrate,Diamonds,Pink Lemonade,1.0</td>\n",
162
+ " </tr>\n",
163
+ " <tr>\n",
164
+ " <th>6</th>\n",
165
+ " <td>md-1159983,Baby Jeeter Peaches | Infused Prero...</td>\n",
166
+ " <td>Jeeter,Preroll,Infused Joint,Peaches,12.5</td>\n",
167
+ " <td>Jeeter,Preroll,Infused Joint,Peaches,12.5</td>\n",
168
+ " </tr>\n",
169
+ " <tr>\n",
170
+ " <th>7</th>\n",
171
+ " <td>co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle...</td>\n",
172
+ " <td>Ocean Breeze Cultivators,Preroll,Blunt,Truffle...</td>\n",
173
+ " <td>Ocean Breeze Cultivators,Blunt,Truffle Pupil,1.0</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>8</th>\n",
177
+ " <td>fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1...</td>\n",
178
+ " <td>Cookies,Preroll,Joint,Triple Scoop,1</td>\n",
179
+ " <td>Cookies,Preroll,Joint,Triple Scoop,1.0</td>\n",
180
+ " </tr>\n",
181
+ " <tr>\n",
182
+ " <th>9</th>\n",
183
+ " <td>bl-842922110296,STIIIZY - Birthday Cake Pod 1g...</td>\n",
184
+ " <td>STIIIZY,Vape,Vape,Birthday Cake,1</td>\n",
185
+ " <td>Birthday Cake Pod,STIIIZY,Vape,Birthday Cake,1.0</td>\n",
186
+ " </tr>\n",
187
+ " <tr>\n",
188
+ " <th>10</th>\n",
189
+ " <td>co-6GGKA0GV,Cookies x The Grower Circle - Tang...</td>\n",
190
+ " <td>Cookies,Preroll,Infused Joint,TangEray,1</td>\n",
191
+ " <td>Here is the missing information:\\n\\nproduct_na...</td>\n",
192
+ " </tr>\n",
193
+ " <tr>\n",
194
+ " <th>11</th>\n",
195
+ " <td>co-7YGGNHBR,Kanha - Nano Blood Orange Bliss In...</td>\n",
196
+ " <td>Kanha,Edible,Gummies,nan,nan</td>\n",
197
+ " <td>Kanha,Edible,Gummies,nan,nan</td>\n",
198
+ " </tr>\n",
199
+ " <tr>\n",
200
+ " <th>12</th>\n",
201
+ " <td>md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,F...</td>\n",
202
+ " <td>Fresca,Flower,Bud,BOBBI HYLL,3.5</td>\n",
203
+ " <td>Fresca,Flower,Bud,BOBBI HYLL,3.5</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>13</th>\n",
207
+ " <td>fl-9nEPGsnYtY,Flav | Blueberry Belts 100mg,Fla...</td>\n",
208
+ " <td>Flav,Edible,Gummies,nan,nan</td>\n",
209
+ " <td>Flav,Edible,Gummies,Blueberry,100</td>\n",
210
+ " </tr>\n",
211
+ " <tr>\n",
212
+ " <th>14</th>\n",
213
+ " <td>md-1347497,Cherry Pie - 1g - Pod,Left Coast Ex...</td>\n",
214
+ " <td>Left Coast Extracts,Vape,510 cart,Cherry Pie,1</td>\n",
215
+ " <td>Cherry Pie - 1g - Pod,Left Coast Extracts,Vape...</td>\n",
216
+ " </tr>\n",
217
+ " <tr>\n",
218
+ " <th>15</th>\n",
219
+ " <td>co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...</td>\n",
220
+ " <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
221
+ " <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
222
+ " </tr>\n",
223
+ " <tr>\n",
224
+ " <th>16</th>\n",
225
+ " <td>co-5RAWYHYQ,The Growers Circle - Double Down -...</td>\n",
226
+ " <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
227
+ " <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
228
+ " </tr>\n",
229
+ " <tr>\n",
230
+ " <th>17</th>\n",
231
+ " <td>md-1195389,Blue Dream Roll Your Own Sugar Shak...</td>\n",
232
+ " <td>Pacific Stone,Flower,Bud,nan,14</td>\n",
233
+ " <td>Pacific Stone,Flower,Bud,Blue Dream,14</td>\n",
234
+ " </tr>\n",
235
+ " <tr>\n",
236
+ " <th>18</th>\n",
237
+ " <td>co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...</td>\n",
238
+ " <td>The Growers Circle,Preroll,Joint,Zoo Dawg x Co...</td>\n",
239
+ " <td>The Grower Circle,Preroll,Joint,Zoo Dawg x Cos...</td>\n",
240
+ " </tr>\n",
241
+ " <tr>\n",
242
+ " <th>19</th>\n",
243
+ " <td>co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...</td>\n",
244
+ " <td>Flight Bites,Edible,Gummies,nan,nan</td>\n",
245
+ " <td>Flight Bites,Edible,Gummies,S'mores,10</td>\n",
246
+ " </tr>\n",
247
+ " </tbody>\n",
248
+ "</table>\n",
249
+ "</div>"
250
+ ],
251
+ "text/plain": [
252
+ " prompt \\\n",
253
+ "0 co-2MFE5QVF,Chill Medicated - Watermelon - Syr... \n",
254
+ "1 bl-111630024545,Feelz - Space Cowboy 3.5g,nan,... \n",
255
+ "2 fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh... \n",
256
+ "3 bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER... \n",
257
+ "4 fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato... \n",
258
+ "5 fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crush... \n",
259
+ "6 md-1159983,Baby Jeeter Peaches | Infused Prero... \n",
260
+ "7 co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle... \n",
261
+ "8 fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1... \n",
262
+ "9 bl-842922110296,STIIIZY - Birthday Cake Pod 1g... \n",
263
+ "10 co-6GGKA0GV,Cookies x The Grower Circle - Tang... \n",
264
+ "11 co-7YGGNHBR,Kanha - Nano Blood Orange Bliss In... \n",
265
+ "12 md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,F... \n",
266
+ "13 fl-9nEPGsnYtY,Flav | Blueberry Belts 100mg,Fla... \n",
267
+ "14 md-1347497,Cherry Pie - 1g - Pod,Left Coast Ex... \n",
268
+ "15 co-76GP441T,Minntz - Emerald Cut - Indoor - Jo... \n",
269
+ "16 co-5RAWYHYQ,The Growers Circle - Double Down -... \n",
270
+ "17 md-1195389,Blue Dream Roll Your Own Sugar Shak... \n",
271
+ "18 co-847ZXF37,The Grower Circle - Zoo Dawg x Cos... \n",
272
+ "19 co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1... \n",
273
+ "\n",
274
+ " completion \\\n",
275
+ "0 Chill Medicated,Edible,Beverage,nan,nan \n",
276
+ "1 Feelz,Flower,Bud,Space Cowboy,3.5 \n",
277
+ "2 Champelli,Flower,Bud,Xclusivo,3.5 \n",
278
+ "3 CAM,Flower,Bud,Mellowz #7,7 \n",
279
+ "4 Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan... \n",
280
+ "5 Raw Garden,Concentrate,Diamonds,Pink Lemonade,1 \n",
281
+ "6 Jeeter,Preroll,Infused Joint,Peaches,12.5 \n",
282
+ "7 Ocean Breeze Cultivators,Preroll,Blunt,Truffle... \n",
283
+ "8 Cookies,Preroll,Joint,Triple Scoop,1 \n",
284
+ "9 STIIIZY,Vape,Vape,Birthday Cake,1 \n",
285
+ "10 Cookies,Preroll,Infused Joint,TangEray,1 \n",
286
+ "11 Kanha,Edible,Gummies,nan,nan \n",
287
+ "12 Fresca,Flower,Bud,BOBBI HYLL,3.5 \n",
288
+ "13 Flav,Edible,Gummies,nan,nan \n",
289
+ "14 Left Coast Extracts,Vape,510 cart,Cherry Pie,1 \n",
290
+ "15 Minntz,Preroll,Joint,Emerald Cut,1 \n",
291
+ "16 The Growers Circle,Flower,Bud,Double Down,3.5 \n",
292
+ "17 Pacific Stone,Flower,Bud,nan,14 \n",
293
+ "18 The Growers Circle,Preroll,Joint,Zoo Dawg x Co... \n",
294
+ "19 Flight Bites,Edible,Gummies,nan,nan \n",
295
+ "\n",
296
+ " prediction \n",
297
+ "0 Chill Medicated,Edible,Beverage,Watermelon,250 \n",
298
+ "1 Feelz,Flower,Bud,Space Cowboy,3.5 \n",
299
+ "2 Champelli,Flower,Bud,Xclusivo,3.5 \n",
300
+ "3 CAM - Mellowz #7 7g,CAM,Flower,Bud,Mellowz,7 \n",
301
+ "4 Backpack Boyz,Edible,Syrup,Bubblegum Gelato,1 \n",
302
+ "5 Raw Garden,Concentrate,Diamonds,Pink Lemonade,1.0 \n",
303
+ "6 Jeeter,Preroll,Infused Joint,Peaches,12.5 \n",
304
+ "7 Ocean Breeze Cultivators,Blunt,Truffle Pupil,1.0 \n",
305
+ "8 Cookies,Preroll,Joint,Triple Scoop,1.0 \n",
306
+ "9 Birthday Cake Pod,STIIIZY,Vape,Birthday Cake,1.0 \n",
307
+ "10 Here is the missing information:\\n\\nproduct_na... \n",
308
+ "11 Kanha,Edible,Gummies,nan,nan \n",
309
+ "12 Fresca,Flower,Bud,BOBBI HYLL,3.5 \n",
310
+ "13 Flav,Edible,Gummies,Blueberry,100 \n",
311
+ "14 Cherry Pie - 1g - Pod,Left Coast Extracts,Vape... \n",
312
+ "15 Minntz,Preroll,Joint,Emerald Cut,1 \n",
313
+ "16 The Growers Circle,Flower,Bud,Double Down,3.5 \n",
314
+ "17 Pacific Stone,Flower,Bud,Blue Dream,14 \n",
315
+ "18 The Grower Circle,Preroll,Joint,Zoo Dawg x Cos... \n",
316
+ "19 Flight Bites,Edible,Gummies,S'mores,10 "
317
+ ]
318
+ },
319
+ "execution_count": 44,
320
+ "metadata": {},
321
+ "output_type": "execute_result"
322
+ }
323
+ ],
324
+ "source": [
325
+ "import pandas as pd\n",
326
+ "\n",
327
+ "test_results = pd.DataFrame(\n",
328
+ " zip(test_prompts,\n",
329
+ " test_completions,\n",
330
+ " [ele['choices'][0]['message']['content'] for ele in test_predictions])\n",
331
+ " , columns = ['prompt','completion','prediction'])\n",
332
+ "test_results"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": 27,
338
+ "metadata": {},
339
+ "outputs": [
340
+ {
341
+ "data": {
342
+ "text/plain": [
343
+ "{'completion': ['Chill Medicated', 'Edible', 'Beverage', 'nan', 'nan'],\n",
344
+ " 'prediction': ['Chill Medicated', 'Edible', 'Beverage', 'Watermelon', '250'],\n",
345
+ " 'matches': [True, True, True, False, False],\n",
346
+ " 'num_correct': 3}"
347
+ ]
348
+ },
349
+ "execution_count": 27,
350
+ "metadata": {},
351
+ "output_type": "execute_result"
352
+ }
353
+ ],
354
+ "source": [
355
+ "from util import compare_completion_and_prediction\n",
356
+ "\n",
357
+ "compare_completion_and_prediction(test_completions[0], test_predictions[0][\"choices\"][0][\"message\"][\"content\"])"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": 58,
363
+ "metadata": {},
364
+ "outputs": [
365
+ {
366
+ "data": {
367
+ "text/plain": [
368
+ "66"
369
+ ]
370
+ },
371
+ "execution_count": 58,
372
+ "metadata": {},
373
+ "output_type": "execute_result"
374
+ }
375
+ ],
376
+ "source": [
377
+ "# Write function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error\n",
378
+ "def get_num_correct(completion, prediction):\n",
379
+ " try:\n",
380
+ " return compare_completion_and_prediction(completion, prediction)['num_correct']\n",
381
+ " except:\n",
382
+ " return 0 # this will be the case when format is incorrect\n",
383
+ "\n",
384
+ "# Apply get_num_correct function to test_results dataframe\n",
385
+ "test_results['num_correct'] = test_results.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)\n",
386
+ "test_results['num_correct'].sum() # out of 100 possible correct predictions (20 samples * 5 cols per sample)"
387
+ ]
388
+ }
389
+ ],
390
+ "metadata": {
391
+ "kernelspec": {
392
+ "display_name": "kd-llm-dc",
393
+ "language": "python",
394
+ "name": "python3"
395
+ },
396
+ "language_info": {
397
+ "codemirror_mode": {
398
+ "name": "ipython",
399
+ "version": 3
400
+ },
401
+ "file_extension": ".py",
402
+ "mimetype": "text/x-python",
403
+ "name": "python",
404
+ "nbconvert_exporter": "python",
405
+ "pygments_lexer": "ipython3",
406
+ "version": "3.10.11"
407
+ },
408
+ "orig_nbformat": 4
409
+ },
410
+ "nbformat": 4,
411
+ "nbformat_minor": 2
412
+ }
app/cookies_openai_model_eval.ipynb ADDED
@@ -0,0 +1,797 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "This Notebook is to test the various OpenAI models, prompts, and number of few-shot examples to see how they perform on the same task."
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "metadata": {},
15
+ "outputs": [],
16
+ "source": [
17
+ "!pip install wandb --upgrade openai datasets -qU"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 2,
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import os\n",
27
+ "from dotenv import load_dotenv\n",
28
+ "load_dotenv()\n",
29
+ "\n",
30
+ "import openai\n",
31
+ "\n",
32
+ "# set OPENAI_API_KEY environment variable from .env file\n",
33
+ "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
34
+ "\n",
35
+ "# import OpenAIChatCompletions class from openai_chat_completion.py file and compare_completion_and_prediction function from util.py file\n",
36
+ "from openai_chat_completion import OpenAIChatCompletions\n",
37
+ "from util import compare_completion_and_prediction"
38
+ ]
39
+ },
40
+ {
41
+ "attachments": {},
42
+ "cell_type": "markdown",
43
+ "metadata": {},
44
+ "source": [
45
+ "Models:\n",
46
+ "- gpt-3.5-turbo\n",
47
+ "- gpt-4\n",
48
+ "\n",
49
+ "Prompts:\n",
50
+ "- gpt4-system-message.txt\n",
51
+ "\n",
52
+ "Few-shot examples:\n",
53
+ "> 0 ... 10"
54
+ ]
55
+ },
56
+ {
57
+ "attachments": {},
58
+ "cell_type": "markdown",
59
+ "metadata": {},
60
+ "source": [
61
+ "wandb setup:\n",
62
+ "- entity: kaleidoscope-data\n",
63
+ "- project: cookies_llm_experimental_eval\n",
64
+ "- tags: gpt-3.5-turbo, gpt-4, gpt4-system-message, few-shot"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 3,
70
+ "metadata": {},
71
+ "outputs": [
72
+ {
73
+ "name": "stderr",
74
+ "output_type": "stream",
75
+ "text": [
76
+ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
77
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
78
+ "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
79
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[32m\u001b[41mERROR\u001b[0m API key must be 40 characters long, yours was 48\n",
80
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
81
+ "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
82
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/cmagganas/.netrc\n"
83
+ ]
84
+ },
85
+ {
86
+ "data": {
87
+ "text/html": [
88
+ "Tracking run with wandb version 0.15.4"
89
+ ],
90
+ "text/plain": [
91
+ "<IPython.core.display.HTML object>"
92
+ ]
93
+ },
94
+ "metadata": {},
95
+ "output_type": "display_data"
96
+ },
97
+ {
98
+ "data": {
99
+ "text/html": [
100
+ "Run data is saved locally in <code>/home/cmagganas/kaleidoscope/llm_data_cleaner/app/wandb/run-20230626_114056-rbtf91s6</code>"
101
+ ],
102
+ "text/plain": [
103
+ "<IPython.core.display.HTML object>"
104
+ ]
105
+ },
106
+ "metadata": {},
107
+ "output_type": "display_data"
108
+ },
109
+ {
110
+ "data": {
111
+ "text/html": [
112
+ "Syncing run <strong><a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">rose-puddle-7</a></strong> to <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
113
+ ],
114
+ "text/plain": [
115
+ "<IPython.core.display.HTML object>"
116
+ ]
117
+ },
118
+ "metadata": {},
119
+ "output_type": "display_data"
120
+ },
121
+ {
122
+ "data": {
123
+ "text/html": [
124
+ " View project at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval</a>"
125
+ ],
126
+ "text/plain": [
127
+ "<IPython.core.display.HTML object>"
128
+ ]
129
+ },
130
+ "metadata": {},
131
+ "output_type": "display_data"
132
+ },
133
+ {
134
+ "data": {
135
+ "text/html": [
136
+ " View run at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a>"
137
+ ],
138
+ "text/plain": [
139
+ "<IPython.core.display.HTML object>"
140
+ ]
141
+ },
142
+ "metadata": {},
143
+ "output_type": "display_data"
144
+ }
145
+ ],
146
+ "source": [
147
+ "from wandb.integration.openai import autolog\n",
148
+ "\n",
149
+ "autolog({\"project\":\"cookies_llm_experimental_eval\",\n",
150
+ " \"entity\": \"kaleidoscope-data\",\n",
151
+ " \"group\": \"cookies\",\n",
152
+ " \"job_type\": \"eval\"})"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": 4,
158
+ "metadata": {},
159
+ "outputs": [],
160
+ "source": [
161
+ "# create an empty dataframe to store predictions\n",
162
+ "import pandas as pd\n",
163
+ "predictions_df = pd.DataFrame(columns=['model', 'system_message', 'n_shot', 'prompt', 'completion', 'prediction'])\n",
164
+ "\n",
165
+ "models_to_test = [\"gpt-4\", \"gpt-3.5-turbo\"]\n",
166
+ "sys_mes_to_test = [\"../prompts/gpt4-system-message.txt\", \"../prompts/gpt4-system-message2.txt\"] # names are arbitrary, same prompts but with \"####\" in system message 2\n",
167
+ "n_shots_to_test = [None, 1, 2, 3, 5]"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 6,
173
+ "metadata": {},
174
+ "outputs": [],
175
+ "source": [
176
+ "# if rerunning the below cell is required, set the following to True\n",
177
+ "rerun = False\n",
178
+ "if rerun:\n",
179
+ " predictions_df = pd.read_csv('../data/cookies_llm_eval_predictions.csv')"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 178,
185
+ "metadata": {},
186
+ "outputs": [],
187
+ "source": [
188
+ "# get predictions for all combinations of models, prompts, and n_shot values\n",
189
+ "# save predictions to dataframe and then to csv in data folder after each iteration\n",
190
+ "\n",
191
+ "# loop through models_to_test\n",
192
+ "for model in models_to_test:\n",
193
+ " # loop through prompts_to_test\n",
194
+ " for system_message in sys_mes_to_test:\n",
195
+ " # instantiate OpenAIChatCompletions class\n",
196
+ " chat = OpenAIChatCompletions(model=model, system_message=system_message)\n",
197
+ " # loop through n_shots_to_test\n",
198
+ " for n_shot in n_shots_to_test:\n",
199
+ " sys_mes_var = 1 if system_message == \"../prompts/gpt4-system-message.txt\" else 2\n",
200
+ " n_shot_var = 0 if n_shot == None else n_shot\n",
201
+ " # check if predictions for this model, system_message, and n_shot value have already been made\n",
202
+ " if predictions_df[(predictions_df['model'] == model) & (predictions_df['system_message'] == sys_mes_var) & (predictions_df['n_shot'] == n_shot_var)].shape[0] == 0:\n",
203
+ " prompts, completions, predictions = chat.predict_jsonl(n_shot=n_shot)\n",
204
+ " else:\n",
205
+ " # skip if predictions for this model, system_message, and n_shot value have already been made\n",
206
+ " continue\n",
207
+ " # save predictions to dataframe\n",
208
+ " df_to_append = pd.DataFrame({'model': model, 'system_message': sys_mes_var, 'n_shot': n_shot_var, 'prompt': prompts, 'completion': completions, 'prediction': predictions})\n",
209
+ " df_right = df_to_append['prediction'].apply(pd.Series)\n",
210
+ " df_right['prediction'] = df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
211
+ " df_to_append = pd.concat([df_to_append[['model', 'system_message', 'n_shot', 'prompt', 'completion']], df_right], axis=1)\n",
212
+ " df_to_append.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
213
+ " # save predictions to dataframe\n",
214
+ " predictions_df = pd.concat([predictions_df, df_to_append], ignore_index=True)\n",
215
+ " # delete duplicates from dataframe\n",
216
+ " predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]\n",
217
+ " predictions_df.to_csv('../data/cookies_llm_eval_predictions.csv', index=False)"
218
+ ]
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "execution_count": 179,
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 180,
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "data": {
236
+ "text/plain": [
237
+ "(400, 12)"
238
+ ]
239
+ },
240
+ "execution_count": 180,
241
+ "metadata": {},
242
+ "output_type": "execute_result"
243
+ }
244
+ ],
245
+ "source": [
246
+ "predictions_df.shape"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 143,
252
+ "metadata": {},
253
+ "outputs": [],
254
+ "source": [
255
+ "# import numpy as np\n",
256
+ "\n",
257
+ "# ids = predictions_df['id'].isna()\n",
258
+ "# # apply pd.Series to predictions column for rows where id is not null and change system_message {0,1} to {1,2}\n",
259
+ "# new_df_right = predictions_df.loc[ids, 'prediction'].apply(pd.Series)\n",
260
+ "# new_df_right['prediction'] = new_df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
261
+ "# new_df_left = predictions_df.loc[ids, ['model', 'system_message', 'n_shot', 'prompt', 'completion']].replace({0:1, 1:2})\n",
262
+ "# new_df = pd.concat([new_df_left, new_df_right], axis=1)\n",
263
+ "\n",
264
+ "# predictions_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
265
+ "# new_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
266
+ "# predictions_df.loc[ids] = new_df"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": 155,
272
+ "metadata": {},
273
+ "outputs": [],
274
+ "source": [
275
+ "# for col in ['model','system_message','n_shot']:\n",
276
+ "# print(predictions_df[col].value_counts())"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": 84,
282
+ "metadata": {},
283
+ "outputs": [],
284
+ "source": [
285
+ "# import numpy as np\n",
286
+ "\n",
287
+ "# # create a copy of predictions_df to manipulate\n",
288
+ "# new_predictions_df = predictions_df\n",
289
+ "\n",
290
+ "# # replace names with 1 or 2\n",
291
+ "# def replace_sys_mes_name(x):\n",
292
+ "# if x == \"../prompts/gpt4-system-message.txt\":\n",
293
+ "# return \"1\"\n",
294
+ "# elif x == \"../prompts/gpt4-system-message2.txt\":\n",
295
+ "# return \"2\"\n",
296
+ "# else:\n",
297
+ "# return x\n",
298
+ "# new_predictions_df['system_message'] = new_predictions_df['system_message'].apply(lambda x: replace_sys_mes_name(x))\n",
299
+ "# # replace None with 0\n",
300
+ "# new_predictions_df['n_shot'] = new_predictions_df['n_shot'].apply(lambda x: 0 if x == None or np.nan else x)\n",
301
+ "\n",
302
+ "# # break up prediction column into sub columns by each of json keys\n",
303
+ "# new_predictions_df = pd.concat([new_predictions_df, new_predictions_df['prediction'].apply(pd.Series)], axis=1)"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": 168,
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "# predictions_df.drop(columns=['num_correct'], inplace=True)"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 181,
318
+ "metadata": {},
319
+ "outputs": [
320
+ {
321
+ "data": {
322
+ "text/html": [
323
+ "<div>\n",
324
+ "<style scoped>\n",
325
+ " .dataframe tbody tr th:only-of-type {\n",
326
+ " vertical-align: middle;\n",
327
+ " }\n",
328
+ "\n",
329
+ " .dataframe tbody tr th {\n",
330
+ " vertical-align: top;\n",
331
+ " }\n",
332
+ "\n",
333
+ " .dataframe thead th {\n",
334
+ " text-align: right;\n",
335
+ " }\n",
336
+ "</style>\n",
337
+ "<table border=\"1\" class=\"dataframe\">\n",
338
+ " <thead>\n",
339
+ " <tr style=\"text-align: right;\">\n",
340
+ " <th></th>\n",
341
+ " <th>model</th>\n",
342
+ " <th>system_message</th>\n",
343
+ " <th>n_shot</th>\n",
344
+ " <th>prompt</th>\n",
345
+ " <th>completion</th>\n",
346
+ " <th>id</th>\n",
347
+ " <th>object</th>\n",
348
+ " <th>created</th>\n",
349
+ " <th>openai_model</th>\n",
350
+ " <th>choices</th>\n",
351
+ " <th>usage</th>\n",
352
+ " <th>prediction</th>\n",
353
+ " </tr>\n",
354
+ " </thead>\n",
355
+ " <tbody>\n",
356
+ " <tr>\n",
357
+ " <th>0</th>\n",
358
+ " <td>gpt-4</td>\n",
359
+ " <td>1</td>\n",
360
+ " <td>0</td>\n",
361
+ " <td>co-2MFE5QVF,Chill Medicated - Watermelon - Syr...</td>\n",
362
+ " <td>Chill Medicated,Edible,Beverage,nan,nan</td>\n",
363
+ " <td>chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg</td>\n",
364
+ " <td>chat.completion</td>\n",
365
+ " <td>1.687805e+09</td>\n",
366
+ " <td>gpt-4-0314</td>\n",
367
+ " <td>[&lt;OpenAIObject at 0x7fcf7fde94e0&gt; JSON: {\\n \"...</td>\n",
368
+ " <td>{\\n \"prompt_tokens\": 54,\\n \"completion_token...</td>\n",
369
+ " <td>Hello! It looks like you mentioned a product: ...</td>\n",
370
+ " </tr>\n",
371
+ " <tr>\n",
372
+ " <th>1</th>\n",
373
+ " <td>gpt-4</td>\n",
374
+ " <td>1</td>\n",
375
+ " <td>0</td>\n",
376
+ " <td>bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...</td>\n",
377
+ " <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
378
+ " <td>chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v</td>\n",
379
+ " <td>chat.completion</td>\n",
380
+ " <td>1.687805e+09</td>\n",
381
+ " <td>gpt-4-0314</td>\n",
382
+ " <td>[&lt;OpenAIObject at 0x7fcf7f49d2b0&gt; JSON: {\\n \"...</td>\n",
383
+ " <td>{\\n \"prompt_tokens\": 51,\\n \"completion_token...</td>\n",
384
+ " <td>Hello! It seems like you are referring to a pr...</td>\n",
385
+ " </tr>\n",
386
+ " <tr>\n",
387
+ " <th>2</th>\n",
388
+ " <td>gpt-4</td>\n",
389
+ " <td>1</td>\n",
390
+ " <td>0</td>\n",
391
+ " <td>fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...</td>\n",
392
+ " <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
393
+ " <td>chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR</td>\n",
394
+ " <td>chat.completion</td>\n",
395
+ " <td>1.687805e+09</td>\n",
396
+ " <td>gpt-4-0314</td>\n",
397
+ " <td>[&lt;OpenAIObject at 0x7fcf7e306890&gt; JSON: {\\n \"...</td>\n",
398
+ " <td>{\\n \"prompt_tokens\": 71,\\n \"completion_token...</td>\n",
399
+ " <td>Hello! It seems like you're interested in the ...</td>\n",
400
+ " </tr>\n",
401
+ " <tr>\n",
402
+ " <th>3</th>\n",
403
+ " <td>gpt-4</td>\n",
404
+ " <td>1</td>\n",
405
+ " <td>0</td>\n",
406
+ " <td>bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...</td>\n",
407
+ " <td>CAM,Flower,Bud,Mellowz #7,7</td>\n",
408
+ " <td>chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9</td>\n",
409
+ " <td>chat.completion</td>\n",
410
+ " <td>1.687805e+09</td>\n",
411
+ " <td>gpt-4-0314</td>\n",
412
+ " <td>[&lt;OpenAIObject at 0x7fcf7e33d940&gt; JSON: {\\n \"...</td>\n",
413
+ " <td>{\\n \"prompt_tokens\": 49,\\n \"completion_token...</td>\n",
414
+ " <td>It seems like you are looking for information ...</td>\n",
415
+ " </tr>\n",
416
+ " <tr>\n",
417
+ " <th>4</th>\n",
418
+ " <td>gpt-4</td>\n",
419
+ " <td>1</td>\n",
420
+ " <td>0</td>\n",
421
+ " <td>fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...</td>\n",
422
+ " <td>Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...</td>\n",
423
+ " <td>chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw</td>\n",
424
+ " <td>chat.completion</td>\n",
425
+ " <td>1.687805e+09</td>\n",
426
+ " <td>gpt-4-0314</td>\n",
427
+ " <td>[&lt;OpenAIObject at 0x7fcf7e306980&gt; JSON: {\\n \"...</td>\n",
428
+ " <td>{\\n \"prompt_tokens\": 59,\\n \"completion_token...</td>\n",
429
+ " <td>Hello! It seems like you are looking for infor...</td>\n",
430
+ " </tr>\n",
431
+ " <tr>\n",
432
+ " <th>...</th>\n",
433
+ " <td>...</td>\n",
434
+ " <td>...</td>\n",
435
+ " <td>...</td>\n",
436
+ " <td>...</td>\n",
437
+ " <td>...</td>\n",
438
+ " <td>...</td>\n",
439
+ " <td>...</td>\n",
440
+ " <td>...</td>\n",
441
+ " <td>...</td>\n",
442
+ " <td>...</td>\n",
443
+ " <td>...</td>\n",
444
+ " <td>...</td>\n",
445
+ " </tr>\n",
446
+ " <tr>\n",
447
+ " <th>395</th>\n",
448
+ " <td>gpt-3.5-turbo</td>\n",
449
+ " <td>2</td>\n",
450
+ " <td>1</td>\n",
451
+ " <td>co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...</td>\n",
452
+ " <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
453
+ " <td>chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk</td>\n",
454
+ " <td>chat.completion</td>\n",
455
+ " <td>1.687829e+09</td>\n",
456
+ " <td>gpt-3.5-turbo-0301</td>\n",
457
+ " <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
458
+ " <td>{'prompt_tokens': 125, 'completion_tokens': 23...</td>\n",
459
+ " <td>Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T.</td>\n",
460
+ " </tr>\n",
461
+ " <tr>\n",
462
+ " <th>396</th>\n",
463
+ " <td>gpt-3.5-turbo</td>\n",
464
+ " <td>2</td>\n",
465
+ " <td>1</td>\n",
466
+ " <td>co-5RAWYHYQ,The Growers Circle - Double Down -...</td>\n",
467
+ " <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
468
+ " <td>chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ</td>\n",
469
+ " <td>chat.completion</td>\n",
470
+ " <td>1.687829e+09</td>\n",
471
+ " <td>gpt-3.5-turbo-0301</td>\n",
472
+ " <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
473
+ " <td>{'prompt_tokens': 123, 'completion_tokens': 22...</td>\n",
474
+ " <td>The Growers Circle,Double Down,Indoor,3.5g,5RA...</td>\n",
475
+ " </tr>\n",
476
+ " <tr>\n",
477
+ " <th>397</th>\n",
478
+ " <td>gpt-3.5-turbo</td>\n",
479
+ " <td>2</td>\n",
480
+ " <td>1</td>\n",
481
+ " <td>md-1195389,Blue Dream Roll Your Own Sugar Shak...</td>\n",
482
+ " <td>Pacific Stone,Flower,Bud,nan,14</td>\n",
483
+ " <td>chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO</td>\n",
484
+ " <td>chat.completion</td>\n",
485
+ " <td>1.687829e+09</td>\n",
486
+ " <td>gpt-3.5-turbo-0301</td>\n",
487
+ " <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
488
+ " <td>{'prompt_tokens': 119, 'completion_tokens': 20...</td>\n",
489
+ " <td>Pacific Stone,Sugar Shake,Blue Dream,Roll Your...</td>\n",
490
+ " </tr>\n",
491
+ " <tr>\n",
492
+ " <th>398</th>\n",
493
+ " <td>gpt-3.5-turbo</td>\n",
494
+ " <td>2</td>\n",
495
+ " <td>1</td>\n",
496
+ " <td>co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...</td>\n",
497
+ " <td>The Growers Circle,Preroll,Joint,Zoo Dawg x Co...</td>\n",
498
+ " <td>chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF</td>\n",
499
+ " <td>chat.completion</td>\n",
500
+ " <td>1.687829e+09</td>\n",
501
+ " <td>gpt-3.5-turbo-0301</td>\n",
502
+ " <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
503
+ " <td>{'prompt_tokens': 133, 'completion_tokens': 32...</td>\n",
504
+ " <td>Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ...</td>\n",
505
+ " </tr>\n",
506
+ " <tr>\n",
507
+ " <th>399</th>\n",
508
+ " <td>gpt-3.5-turbo</td>\n",
509
+ " <td>2</td>\n",
510
+ " <td>1</td>\n",
511
+ " <td>co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...</td>\n",
512
+ " <td>Flight Bites,Edible,Gummies,nan,nan</td>\n",
513
+ " <td>chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra</td>\n",
514
+ " <td>chat.completion</td>\n",
515
+ " <td>1.687829e+09</td>\n",
516
+ " <td>gpt-3.5-turbo-0301</td>\n",
517
+ " <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
518
+ " <td>{'prompt_tokens': 129, 'completion_tokens': 21...</td>\n",
519
+ " <td>Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ...</td>\n",
520
+ " </tr>\n",
521
+ " </tbody>\n",
522
+ "</table>\n",
523
+ "<p>400 rows × 12 columns</p>\n",
524
+ "</div>"
525
+ ],
526
+ "text/plain": [
527
+ " model system_message n_shot \\\n",
528
+ "0 gpt-4 1 0 \n",
529
+ "1 gpt-4 1 0 \n",
530
+ "2 gpt-4 1 0 \n",
531
+ "3 gpt-4 1 0 \n",
532
+ "4 gpt-4 1 0 \n",
533
+ ".. ... ... ... \n",
534
+ "395 gpt-3.5-turbo 2 1 \n",
535
+ "396 gpt-3.5-turbo 2 1 \n",
536
+ "397 gpt-3.5-turbo 2 1 \n",
537
+ "398 gpt-3.5-turbo 2 1 \n",
538
+ "399 gpt-3.5-turbo 2 1 \n",
539
+ "\n",
540
+ " prompt \\\n",
541
+ "0 co-2MFE5QVF,Chill Medicated - Watermelon - Syr... \n",
542
+ "1 bl-111630024545,Feelz - Space Cowboy 3.5g,nan,... \n",
543
+ "2 fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh... \n",
544
+ "3 bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER... \n",
545
+ "4 fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato... \n",
546
+ ".. ... \n",
547
+ "395 co-76GP441T,Minntz - Emerald Cut - Indoor - Jo... \n",
548
+ "396 co-5RAWYHYQ,The Growers Circle - Double Down -... \n",
549
+ "397 md-1195389,Blue Dream Roll Your Own Sugar Shak... \n",
550
+ "398 co-847ZXF37,The Grower Circle - Zoo Dawg x Cos... \n",
551
+ "399 co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1... \n",
552
+ "\n",
553
+ " completion \\\n",
554
+ "0 Chill Medicated,Edible,Beverage,nan,nan \n",
555
+ "1 Feelz,Flower,Bud,Space Cowboy,3.5 \n",
556
+ "2 Champelli,Flower,Bud,Xclusivo,3.5 \n",
557
+ "3 CAM,Flower,Bud,Mellowz #7,7 \n",
558
+ "4 Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan... \n",
559
+ ".. ... \n",
560
+ "395 Minntz,Preroll,Joint,Emerald Cut,1 \n",
561
+ "396 The Growers Circle,Flower,Bud,Double Down,3.5 \n",
562
+ "397 Pacific Stone,Flower,Bud,nan,14 \n",
563
+ "398 The Growers Circle,Preroll,Joint,Zoo Dawg x Co... \n",
564
+ "399 Flight Bites,Edible,Gummies,nan,nan \n",
565
+ "\n",
566
+ " id object created \\\n",
567
+ "0 chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg chat.completion 1.687805e+09 \n",
568
+ "1 chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v chat.completion 1.687805e+09 \n",
569
+ "2 chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR chat.completion 1.687805e+09 \n",
570
+ "3 chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9 chat.completion 1.687805e+09 \n",
571
+ "4 chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw chat.completion 1.687805e+09 \n",
572
+ ".. ... ... ... \n",
573
+ "395 chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk chat.completion 1.687829e+09 \n",
574
+ "396 chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ chat.completion 1.687829e+09 \n",
575
+ "397 chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO chat.completion 1.687829e+09 \n",
576
+ "398 chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF chat.completion 1.687829e+09 \n",
577
+ "399 chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra chat.completion 1.687829e+09 \n",
578
+ "\n",
579
+ " openai_model choices \\\n",
580
+ "0 gpt-4-0314 [<OpenAIObject at 0x7fcf7fde94e0> JSON: {\\n \"... \n",
581
+ "1 gpt-4-0314 [<OpenAIObject at 0x7fcf7f49d2b0> JSON: {\\n \"... \n",
582
+ "2 gpt-4-0314 [<OpenAIObject at 0x7fcf7e306890> JSON: {\\n \"... \n",
583
+ "3 gpt-4-0314 [<OpenAIObject at 0x7fcf7e33d940> JSON: {\\n \"... \n",
584
+ "4 gpt-4-0314 [<OpenAIObject at 0x7fcf7e306980> JSON: {\\n \"... \n",
585
+ ".. ... ... \n",
586
+ "395 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
587
+ "396 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
588
+ "397 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
589
+ "398 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
590
+ "399 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
591
+ "\n",
592
+ " usage \\\n",
593
+ "0 {\\n \"prompt_tokens\": 54,\\n \"completion_token... \n",
594
+ "1 {\\n \"prompt_tokens\": 51,\\n \"completion_token... \n",
595
+ "2 {\\n \"prompt_tokens\": 71,\\n \"completion_token... \n",
596
+ "3 {\\n \"prompt_tokens\": 49,\\n \"completion_token... \n",
597
+ "4 {\\n \"prompt_tokens\": 59,\\n \"completion_token... \n",
598
+ ".. ... \n",
599
+ "395 {'prompt_tokens': 125, 'completion_tokens': 23... \n",
600
+ "396 {'prompt_tokens': 123, 'completion_tokens': 22... \n",
601
+ "397 {'prompt_tokens': 119, 'completion_tokens': 20... \n",
602
+ "398 {'prompt_tokens': 133, 'completion_tokens': 32... \n",
603
+ "399 {'prompt_tokens': 129, 'completion_tokens': 21... \n",
604
+ "\n",
605
+ " prediction \n",
606
+ "0 Hello! It looks like you mentioned a product: ... \n",
607
+ "1 Hello! It seems like you are referring to a pr... \n",
608
+ "2 Hello! It seems like you're interested in the ... \n",
609
+ "3 It seems like you are looking for information ... \n",
610
+ "4 Hello! It seems like you are looking for infor... \n",
611
+ ".. ... \n",
612
+ "395 Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T. \n",
613
+ "396 The Growers Circle,Double Down,Indoor,3.5g,5RA... \n",
614
+ "397 Pacific Stone,Sugar Shake,Blue Dream,Roll Your... \n",
615
+ "398 Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ... \n",
616
+ "399 Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ... \n",
617
+ "\n",
618
+ "[400 rows x 12 columns]"
619
+ ]
620
+ },
621
+ "execution_count": 181,
622
+ "metadata": {},
623
+ "output_type": "execute_result"
624
+ }
625
+ ],
626
+ "source": [
627
+ "predictions_df"
628
+ ]
629
+ },
630
+ {
631
+ "cell_type": "code",
632
+ "execution_count": 182,
633
+ "metadata": {},
634
+ "outputs": [
635
+ {
636
+ "data": {
637
+ "text/plain": [
638
+ "669"
639
+ ]
640
+ },
641
+ "execution_count": 182,
642
+ "metadata": {},
643
+ "output_type": "execute_result"
644
+ }
645
+ ],
646
+ "source": [
647
+ "from util import compare_completion_and_prediction\n",
648
+ "\n",
649
+ "# Function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error\n",
650
+ "def get_num_correct(completion, prediction):\n",
651
+ " try:\n",
652
+ " return compare_completion_and_prediction(completion, prediction)['num_correct']\n",
653
+ " except:\n",
654
+ " return 0 # this will be the case when format is incorrect\n",
655
+ " \n",
656
+ "# Apply get_num_correct function to predictions_df dataframe\n",
657
+ "predictions_df['num_correct'] = predictions_df.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)\n",
658
+ "predictions_df['num_correct'].sum() # out of 1000 possible correct predictions (20 samples * 5 cols per sample) * (2 system messages * 2 models * 5 n_shot values)"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "execution_count": 187,
664
+ "metadata": {},
665
+ "outputs": [
666
+ {
667
+ "data": {
668
+ "text/plain": [
669
+ "model system_message n_shot\n",
670
+ "gpt-3.5-turbo 1 0 0.00\n",
671
+ " 1 0.00\n",
672
+ " 2 0 0.00\n",
673
+ "gpt-4 1 0 0.00\n",
674
+ " 1 0.00\n",
675
+ " 2 0 0.00\n",
676
+ "gpt-3.5-turbo 1 2 0.24\n",
677
+ " 2 1 0.24\n",
678
+ " 2 0.27\n",
679
+ " 3 0.36\n",
680
+ " 1 3 0.40\n",
681
+ " 5 0.44\n",
682
+ "gpt-4 2 2 0.45\n",
683
+ " 1 2 0.45\n",
684
+ " 2 1 0.47\n",
685
+ "gpt-3.5-turbo 2 5 0.56\n",
686
+ "gpt-4 1 3 0.62\n",
687
+ " 2 3 0.67\n",
688
+ " 5 0.73\n",
689
+ " 1 5 0.79\n",
690
+ "Name: num_correct, dtype: float64"
691
+ ]
692
+ },
693
+ "execution_count": 187,
694
+ "metadata": {},
695
+ "output_type": "execute_result"
696
+ }
697
+ ],
698
+ "source": [
699
+ "predictions_df.groupby(['model', 'system_message', 'n_shot'])['num_correct'].sum().sort_values() / 100 # out of 100 possible correct predictions (20 samples * 5 cols per sample)"
700
+ ]
701
+ },
702
+ {
703
+ "cell_type": "code",
704
+ "execution_count": 184,
705
+ "metadata": {},
706
+ "outputs": [],
707
+ "source": [
708
+ "new_predictions_df.to_csv('../data/cookies_llm_eval_proc_preds.csv', index=False)"
709
+ ]
710
+ },
711
+ {
712
+ "cell_type": "code",
713
+ "execution_count": 76,
714
+ "metadata": {},
715
+ "outputs": [
716
+ {
717
+ "data": {
718
+ "text/html": [
719
+ "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
720
+ ],
721
+ "text/plain": [
722
+ "<IPython.core.display.HTML object>"
723
+ ]
724
+ },
725
+ "metadata": {},
726
+ "output_type": "display_data"
727
+ },
728
+ {
729
+ "data": {
730
+ "text/html": [
731
+ "<style>\n",
732
+ " table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
733
+ " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
734
+ " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
735
+ " </style>\n",
736
+ "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>▆▆▁▁▁▁▁▁▁▁█▄▁▁▁▁▁▁▁▃▁▁▁▆▂▆▃▅▄▅▆▄▃▁▁▁▁▁▁▁</td></tr><tr><td>usage/elapsed_time</td><td>▄▆▁▁▁▁▂▁▂▁█▃▁▁▁▂▁▁▂▁▁▁▁▄▂▄▂▃▃▄▅▂▁▁▁▁▂▁▁▁</td></tr><tr><td>usage/prompt_tokens</td><td>▁▁▂▂▄▄▆▅██▁▁▃▃▄▅▅██▁▁▃▃▁▁▁▁▁▁▂▁▂▁▄▄▆▆██▁</td></tr><tr><td>usage/total_tokens</td><td>▄▄▂▂▃▃▅▅█▇▆▃▂▂▄▅▅▇▇▂▁▃▂▄▂▄▂▄▃▄▄▃▂▄▃▅▆██▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>62</td></tr><tr><td>usage/elapsed_time</td><td>2.40086</td></tr><tr><td>usage/prompt_tokens</td><td>54</td></tr><tr><td>usage/total_tokens</td><td>116</td></tr></table><br/></div></div>"
737
+ ],
738
+ "text/plain": [
739
+ "<IPython.core.display.HTML object>"
740
+ ]
741
+ },
742
+ "metadata": {},
743
+ "output_type": "display_data"
744
+ },
745
+ {
746
+ "data": {
747
+ "text/html": [
748
+ " View run <strong style=\"color:#cdcd00\">rose-puddle-7</strong> at: <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a><br/>Synced 6 W&B file(s), 422 media file(s), 422 artifact file(s) and 0 other file(s)"
749
+ ],
750
+ "text/plain": [
751
+ "<IPython.core.display.HTML object>"
752
+ ]
753
+ },
754
+ "metadata": {},
755
+ "output_type": "display_data"
756
+ },
757
+ {
758
+ "data": {
759
+ "text/html": [
760
+ "Find logs at: <code>./wandb/run-20230626_114056-rbtf91s6/logs</code>"
761
+ ],
762
+ "text/plain": [
763
+ "<IPython.core.display.HTML object>"
764
+ ]
765
+ },
766
+ "metadata": {},
767
+ "output_type": "display_data"
768
+ }
769
+ ],
770
+ "source": [
771
+ "autolog.disable()"
772
+ ]
773
+ }
774
+ ],
775
+ "metadata": {
776
+ "kernelspec": {
777
+ "display_name": "kd-llm-dc",
778
+ "language": "python",
779
+ "name": "python3"
780
+ },
781
+ "language_info": {
782
+ "codemirror_mode": {
783
+ "name": "ipython",
784
+ "version": 3
785
+ },
786
+ "file_extension": ".py",
787
+ "mimetype": "text/x-python",
788
+ "name": "python",
789
+ "nbconvert_exporter": "python",
790
+ "pygments_lexer": "ipython3",
791
+ "version": "3.10.11"
792
+ },
793
+ "orig_nbformat": 4
794
+ },
795
+ "nbformat": 4,
796
+ "nbformat_minor": 2
797
+ }
app/data_prep.ipynb ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "## This notebook is to show how to load csv data and into jsonl format for the LLM data cleaner.\n",
9
+ "\n",
10
+ "First, we load the data."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/html": [
21
+ "<div>\n",
22
+ "<style scoped>\n",
23
+ " .dataframe tbody tr th:only-of-type {\n",
24
+ " vertical-align: middle;\n",
25
+ " }\n",
26
+ "\n",
27
+ " .dataframe tbody tr th {\n",
28
+ " vertical-align: top;\n",
29
+ " }\n",
30
+ "\n",
31
+ " .dataframe thead th {\n",
32
+ " text-align: right;\n",
33
+ " }\n",
34
+ "</style>\n",
35
+ "<table border=\"1\" class=\"dataframe\">\n",
36
+ " <thead>\n",
37
+ " <tr style=\"text-align: right;\">\n",
38
+ " <th></th>\n",
39
+ " <th>sku</th>\n",
40
+ " <th>product_name (pos)</th>\n",
41
+ " <th>brand (pos)</th>\n",
42
+ " <th>product_category (pos)</th>\n",
43
+ " <th>strain_name (pos)</th>\n",
44
+ " <th>product_weight_grams (pos)</th>\n",
45
+ " <th>brand (manual review)</th>\n",
46
+ " <th>product_category (manual review)</th>\n",
47
+ " <th>sub_product_category (manual review)</th>\n",
48
+ " <th>strain_name (manual review)</th>\n",
49
+ " <th>product_weight_grams (manual review)</th>\n",
50
+ " </tr>\n",
51
+ " </thead>\n",
52
+ " <tbody>\n",
53
+ " <tr>\n",
54
+ " <th>0</th>\n",
55
+ " <td>bl-842922110296</td>\n",
56
+ " <td>STIIIZY - Birthday Cake Pod 1g</td>\n",
57
+ " <td>NaN</td>\n",
58
+ " <td>VAPE PENS 1G</td>\n",
59
+ " <td>NaN</td>\n",
60
+ " <td>1.0</td>\n",
61
+ " <td>STIIIZY</td>\n",
62
+ " <td>Vape</td>\n",
63
+ " <td>Vape</td>\n",
64
+ " <td>Birthday Cake</td>\n",
65
+ " <td>1</td>\n",
66
+ " </tr>\n",
67
+ " <tr>\n",
68
+ " <th>1</th>\n",
69
+ " <td>co-6ARLLX12</td>\n",
70
+ " <td>SMASH Hits - Hippie Slayer - Indoor - 1g</td>\n",
71
+ " <td>SMASH Hits</td>\n",
72
+ " <td>NaN</td>\n",
73
+ " <td>Hippie Slayer</td>\n",
74
+ " <td>NaN</td>\n",
75
+ " <td>SMASH Hits</td>\n",
76
+ " <td>Preroll</td>\n",
77
+ " <td>Joint</td>\n",
78
+ " <td>Hippie Slayer</td>\n",
79
+ " <td>1</td>\n",
80
+ " </tr>\n",
81
+ " <tr>\n",
82
+ " <th>2</th>\n",
83
+ " <td>bl-090035986141</td>\n",
84
+ " <td>Eighth Brothers - Black Jack 1g Preroll</td>\n",
85
+ " <td>NaN</td>\n",
86
+ " <td>PREROLLS</td>\n",
87
+ " <td>NaN</td>\n",
88
+ " <td>NaN</td>\n",
89
+ " <td>Eighth Brothers</td>\n",
90
+ " <td>Preroll</td>\n",
91
+ " <td>Joint</td>\n",
92
+ " <td>Black Jack</td>\n",
93
+ " <td>1</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>3</th>\n",
97
+ " <td>bl-850002822274</td>\n",
98
+ " <td>GRIZZLY PEAK - Indica Bone 0.5g 7PK Prerolls</td>\n",
99
+ " <td>NaN</td>\n",
100
+ " <td>PREROLL PACKS</td>\n",
101
+ " <td>NaN</td>\n",
102
+ " <td>NaN</td>\n",
103
+ " <td>GRIZZLY PEAK</td>\n",
104
+ " <td>Preroll</td>\n",
105
+ " <td>Joint</td>\n",
106
+ " <td>NaN</td>\n",
107
+ " <td>3.5</td>\n",
108
+ " </tr>\n",
109
+ " <tr>\n",
110
+ " <th>4</th>\n",
111
+ " <td>co-76GP441T</td>\n",
112
+ " <td>Minntz - Emerald Cut - Indoor - Joint - 1g</td>\n",
113
+ " <td>Minntz</td>\n",
114
+ " <td>NaN</td>\n",
115
+ " <td>Emerald Cut</td>\n",
116
+ " <td>NaN</td>\n",
117
+ " <td>Minntz</td>\n",
118
+ " <td>Preroll</td>\n",
119
+ " <td>Joint</td>\n",
120
+ " <td>Emerald Cut</td>\n",
121
+ " <td>1</td>\n",
122
+ " </tr>\n",
123
+ " </tbody>\n",
124
+ "</table>\n",
125
+ "</div>"
126
+ ],
127
+ "text/plain": [
128
+ " sku product_name (pos) brand (pos) \\\n",
129
+ "0 bl-842922110296 STIIIZY - Birthday Cake Pod 1g NaN \n",
130
+ "1 co-6ARLLX12 SMASH Hits - Hippie Slayer - Indoor - 1g SMASH Hits \n",
131
+ "2 bl-090035986141 Eighth Brothers - Black Jack 1g Preroll NaN \n",
132
+ "3 bl-850002822274 GRIZZLY PEAK - Indica Bone 0.5g 7PK Prerolls NaN \n",
133
+ "4 co-76GP441T Minntz - Emerald Cut - Indoor - Joint - 1g Minntz \n",
134
+ "\n",
135
+ " product_category (pos) strain_name (pos) product_weight_grams (pos) \\\n",
136
+ "0 VAPE PENS 1G NaN 1.0 \n",
137
+ "1 NaN Hippie Slayer NaN \n",
138
+ "2 PREROLLS NaN NaN \n",
139
+ "3 PREROLL PACKS NaN NaN \n",
140
+ "4 NaN Emerald Cut NaN \n",
141
+ "\n",
142
+ " brand (manual review) product_category (manual review) \\\n",
143
+ "0 STIIIZY Vape \n",
144
+ "1 SMASH Hits Preroll \n",
145
+ "2 Eighth Brothers Preroll \n",
146
+ "3 GRIZZLY PEAK Preroll \n",
147
+ "4 Minntz Preroll \n",
148
+ "\n",
149
+ " sub_product_category (manual review) strain_name (manual review) \\\n",
150
+ "0 Vape Birthday Cake \n",
151
+ "1 Joint Hippie Slayer \n",
152
+ "2 Joint Black Jack \n",
153
+ "3 Joint NaN \n",
154
+ "4 Joint Emerald Cut \n",
155
+ "\n",
156
+ " product_weight_grams (manual review) \n",
157
+ "0 1 \n",
158
+ "1 1 \n",
159
+ "2 1 \n",
160
+ "3 3.5 \n",
161
+ "4 1 "
162
+ ]
163
+ },
164
+ "metadata": {},
165
+ "output_type": "display_data"
166
+ }
167
+ ],
168
+ "source": [
169
+ "import warnings\n",
170
+ "warnings.filterwarnings('ignore')\n",
171
+ "\n",
172
+ "import numpy as np\n",
173
+ "import pandas as pd\n",
174
+ "\n",
175
+ "# Load tab-delimited file into pandas dataframe\n",
176
+ "cookies = pd.read_csv('../data/Cookies-AI-Gold-Standard - Cookies-AI-Gold-Standard.csv', sep=',')\n",
177
+ "\n",
178
+ "cookies.head()"
179
+ ]
180
+ },
181
+ {
182
+ "attachments": {},
183
+ "cell_type": "markdown",
184
+ "metadata": {},
185
+ "source": [
186
+ "### Data Preparation\n",
187
+ "We transform the dataset into a pandas dataframe, with a column for prompt and completion.\n",
188
+ "\n",
189
+ "The prompt contains the \"dirty\" columns, and completion contains the \"cleaned\" columns."
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": null,
195
+ "metadata": {},
196
+ "outputs": [],
197
+ "source": [
198
+ "from datasets import Dataset, DatasetDict\n",
199
+ "from sklearn.model_selection import train_test_split\n",
200
+ "\n",
201
+ "# split the dataset into train, val and test datasets 80/20\n",
202
+ "cookies_train, cookies_test = train_test_split(cookies, test_size=0.20, random_state=42)\n",
203
+ "\n",
204
+ "# list of input and output columns\n",
205
+ "input_columns = ['sku','product_name (pos)','brand (pos)','product_category (pos)','strain_name (pos)','product_weight_grams (pos)']\n",
206
+ "output_columns = ['brand (manual review)','product_category (manual review)','sub_product_category (manual review)','strain_name (manual review)','product_weight_grams (manual review)']\n",
207
+ "\n",
208
+ "# functtion to convert pandas dataframe row to csv string\n",
209
+ "def row_to_csv(row):\n",
210
+ " csv_string = ','.join(str(value) for value in row.values)\n",
211
+ " return csv_string\n",
212
+ "\n",
213
+ "# create dataframe with prompt and completion columns\n",
214
+ "\n",
215
+ "# apply row_to_csv function to each row of the training dataframe\n",
216
+ "input_rows = cookies_train[input_columns ].apply(row_to_csv, axis=1)\n",
217
+ "output_rows = cookies_train[output_columns].apply(row_to_csv, axis=1)\n",
218
+ "\n",
219
+ "# create dataframe with prompt and completion columns for training dataset\n",
220
+ "prompt_df = pd.DataFrame(\n",
221
+ " zip(input_rows,\n",
222
+ " output_rows)\n",
223
+ " , columns = ['prompt','completion'])\n",
224
+ "\n",
225
+ "# save dataframe to jsonl file for training\n",
226
+ "prompt_df.to_json(\"../data/cookies_train.jsonl\", orient='records', lines=True)\n",
227
+ "\n",
228
+ "# apply row_to_csv function to each row of the test dataframe\n",
229
+ "input_test_rows = cookies_test[input_columns ].apply(row_to_csv, axis=1)\n",
230
+ "output_test_rows = cookies_test[output_columns].apply(row_to_csv, axis=1)\n",
231
+ "\n",
232
+ "# create dataframe with prompt and completion columns for test dataset\n",
233
+ "test_df = pd.DataFrame(\n",
234
+ " zip(input_test_rows,\n",
235
+ " output_test_rows)\n",
236
+ " , columns = ['prompt','completion'])\n",
237
+ "test_df.head()\n",
238
+ "\n",
239
+ "# save dataframe to jsonl file for test\n",
240
+ "test_df.to_json(\"../data/cookies_test.jsonl\", orient='records', lines=True)"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": null,
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "import pandas as pd\n",
250
+ "\n",
251
+ "# write a function that samples n rows from a jsonl file\n",
252
+ "def sample_jsonl(path_or_buf='../data/cookies_train.jsonl',n_samples=5): \n",
253
+ " jsonObj = pd.read_json(path_or_buf=path_or_buf, lines=True)\n",
254
+ " return jsonObj.sample(n_samples, random_state=42)"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": null,
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "# write a function that adds prompt and completion samples to messages\n",
264
+ "def add_samples(messages, n_samples=None):\n",
265
+ " if n_samples is None:\n",
266
+ " return messages\n",
267
+ " samples = sample_jsonl(n_samples=n_samples)\n",
268
+ " for i in range(n_samples):\n",
269
+ " messages.append({\"role\": \"user\", \"content\": samples.iloc[i]['prompt']})\n",
270
+ " messages.append({\"role\": \"assistant\", \"content\": samples.iloc[i]['completion']})\n",
271
+ " return messages"
272
+ ]
273
+ }
274
+ ],
275
+ "metadata": {
276
+ "language_info": {
277
+ "name": "python"
278
+ },
279
+ "orig_nbformat": 4
280
+ },
281
+ "nbformat": 4,
282
+ "nbformat_minor": 2
283
+ }
app/openai_chat_completion.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
+ import openai
6
+
7
+ # # set OPENAI_API_KEY environment variable from .env file
8
+ # openai.api_key = os.getenv("OPENAI_API_KEY")
9
+
10
+ # # read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message
11
+ # system_message = open('../prompts/gpt4-system-message.txt', 'r').read()
12
+
13
+ class OpenAIChatCompletions:
14
+ def __init__(self, model="gpt-4", system_message=None):
15
+ self.model = model
16
+ self.system_message = system_message
17
+
18
+
19
+ # function to input args such as model, prompt, etc. and return completion
20
+ def openai_chat_completion(self, prompt, n_shot=None):
21
+ messages = [{"role": "system", "content": self.system_message}] if self.system_message else []
22
+
23
+ # add n_shot number of samples to messages list ... if n_shot is None, then only system_message and prompt will be added to messages list
24
+ if n_shot is not None:
25
+ messages = self._add_samples(messages, n_samples=n_shot)
26
+
27
+ messages.append({"role": "user", "content": prompt})
28
+
29
+ # set up the API request parameters for OpenAI
30
+ chat_request_kwargs = dict(
31
+ model=self.model,
32
+ messages=messages,
33
+ )
34
+
35
+ # make the API request to OpenAI
36
+ response = openai.ChatCompletion.create(**chat_request_kwargs)
37
+
38
+ # return only the completion text
39
+ # return response['choices'][0]['message']['content']
40
+ # return response
41
+ return response
42
+
43
+
44
+ # function to use test data to predict completions
45
+ def predict_jsonl(self, path_or_buf='../data/cookies_test.jsonl', n_samples=None, n_shot=None):
46
+ jsonObj = pd.read_json(path_or_buf=path_or_buf, lines=True)
47
+ if n_samples is not None:
48
+ jsonObj = jsonObj.sample(n_samples, random_state=42)
49
+
50
+ iter_range = range(len(jsonObj))
51
+ prompts = [jsonObj.iloc[i]['prompt'] for i in iter_range]
52
+ completions = [jsonObj.iloc[i]['completion'] for i in iter_range]
53
+ predictions = [self.openai_chat_completion(prompt, n_shot=n_shot) for prompt in prompts]
54
+
55
+ return prompts, completions, predictions
56
+
57
+
58
+ # a method that adds prompt and completion samples to messages
59
+ @staticmethod
60
+ def _add_samples(messages, n_samples=None):
61
+ if n_samples is None:
62
+ return messages
63
+
64
+ samples = OpenAIChatCompletions._sample_jsonl(n_samples=n_samples)
65
+ for i in range(n_samples):
66
+ messages.append({"role": "user", "content": samples.iloc[i]['prompt']})
67
+ messages.append({"role": "assistant", "content": samples.iloc[i]['completion']})
68
+
69
+ return messages
70
+
71
+
72
+ # a method that samples n rows from a jsonl file, returning a pandas dataframe
73
+ @staticmethod
74
+ def _sample_jsonl(path_or_buf='../data/cookies_train.jsonl', n_samples=5):
75
+ jsonObj = pd.read_json(path_or_buf=path_or_buf, lines=True)
76
+ return jsonObj.sample(n_samples, random_state=42)
app/test.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai_chat_completion import OpenAIChatCompletions
2
+
3
+ # read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message
4
+ system_message = open('../prompts/gpt4-system-message.txt', 'r').read()
5
+
6
+ # create an instance of the OpenAIChatCompletions class and use the openai_chat_completion method to get chat completions
7
+ chat_completions = OpenAIChatCompletions(model="gpt-4", system_message=system_message)
8
+
9
+ # prompt = "co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan"
10
+ # completion = chat_completions.openai_chat_completion(prompt, n_shot=1)
11
+ # print(completion)
12
+
13
+
14
+
15
+ prompts, completions, predictions = chat_completions.predict_jsonl(n_samples=2, n_shot=2)
16
+ print("Prompts:", prompts)
17
+ print("Completions:", completions)
18
+ print("Predictions:", predictions)
app/util.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # write a function that compares the completion and prediction, separating each string by comma into their respective columns, then compare each column and return a dataframe with the results
2
+ def compare_completion_and_prediction(completion, prediction, verbose=False):
3
+ # if verbose is True, print the completion and prediction strings
4
+ if verbose:
5
+ print("Completion:", completion, f"type({type(completion)}):")
6
+ print("Prediction:", prediction, f"type({type(prediction)}):")
7
+ # split completion and prediction strings on comma character
8
+ completion = completion.split(',')
9
+ prediction = prediction.split(',')
10
+ # create a column that counts the number of matchs between completion and prediction
11
+ matches = [completion[i] == prediction[i] for i in range(len(completion))]
12
+ # create a json dictionary with the completion, prediction, matches, and num_correct fields
13
+ json_dict = {
14
+ "completion": completion,
15
+ "prediction": prediction,
16
+ "matches": matches,
17
+ "num_correct": sum(matches)
18
+ }
19
+ # return the json dictionary
20
+ return json_dict
app/wandb/run-20230626_114056-rbtf91s6/files/conda-environment.yaml ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: kd-llm-dc
2
+ channels:
3
+ - conda-forge
4
+ - defaults
5
+ dependencies:
6
+ - _libgcc_mutex=0.1=main
7
+ - _openmp_mutex=5.1=1_gnu
8
+ - asttokens=2.2.1=pyhd8ed1ab_0
9
+ - backcall=0.2.0=pyh9f0ad1d_0
10
+ - backports=1.0=pyhd8ed1ab_3
11
+ - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
12
+ - bzip2=1.0.8=h7b6447c_0
13
+ - ca-certificates=2023.5.7=hbcca054_0
14
+ - decorator=5.1.1=pyhd8ed1ab_0
15
+ - entrypoints=0.4=pyhd8ed1ab_0
16
+ - executing=1.2.0=pyhd8ed1ab_0
17
+ - ipykernel=6.15.0=pyh210e3f2_0
18
+ - ipython=8.14.0=pyh41d4057_0
19
+ - jedi=0.18.2=pyhd8ed1ab_0
20
+ - jupyter_client=7.3.4=pyhd8ed1ab_0
21
+ - jupyter_core=5.3.1=py310hff52083_0
22
+ - ld_impl_linux-64=2.38=h1181459_1
23
+ - libffi=3.4.4=h6a678d5_0
24
+ - libgcc-ng=11.2.0=h1234567_1
25
+ - libgomp=11.2.0=h1234567_1
26
+ - libsodium=1.0.18=h36c2ea0_1
27
+ - libstdcxx-ng=11.2.0=h1234567_1
28
+ - libuuid=1.41.5=h5eee18b_0
29
+ - matplotlib-inline=0.1.6=pyhd8ed1ab_0
30
+ - ncurses=6.4=h6a678d5_0
31
+ - nest-asyncio=1.5.6=pyhd8ed1ab_0
32
+ - openssl=3.0.8=h7f8727e_0
33
+ - packaging=23.1=pyhd8ed1ab_0
34
+ - parso=0.8.3=pyhd8ed1ab_0
35
+ - pexpect=4.8.0=pyh1a96a4e_2
36
+ - pickleshare=0.7.5=py_1003
37
+ - platformdirs=3.5.3=pyhd8ed1ab_0
38
+ - prompt-toolkit=3.0.38=pyha770c72_0
39
+ - prompt_toolkit=3.0.38=hd8ed1ab_0
40
+ - ptyprocess=0.7.0=pyhd3deb0d_0
41
+ - pure_eval=0.2.2=pyhd8ed1ab_0
42
+ - pygments=2.15.1=pyhd8ed1ab_0
43
+ - python=3.10.11=h955ad1f_3
44
+ - python-dateutil=2.8.2=pyhd8ed1ab_0
45
+ - python_abi=3.10=2_cp310
46
+ - readline=8.2=h5eee18b_0
47
+ - six=1.16.0=pyh6c4a22f_0
48
+ - sqlite=3.41.2=h5eee18b_0
49
+ - stack_data=0.6.2=pyhd8ed1ab_0
50
+ - tk=8.6.12=h1ccaba5_0
51
+ - traitlets=5.9.0=pyhd8ed1ab_0
52
+ - typing-extensions=4.6.3=hd8ed1ab_0
53
+ - typing_extensions=4.6.3=pyha770c72_0
54
+ - wcwidth=0.2.6=pyhd8ed1ab_0
55
+ - xz=5.4.2=h5eee18b_0
56
+ - zeromq=4.3.4=h9c3ff4c_1
57
+ - zlib=1.2.13=h5eee18b_0
58
+ - pip:
59
+ - aiohttp==3.8.4
60
+ - aiosignal==1.3.1
61
+ - appdirs==1.4.4
62
+ - async-timeout==4.0.2
63
+ - attrs==23.1.0
64
+ - certifi==2023.5.7
65
+ - charset-normalizer==3.1.0
66
+ - click==8.1.3
67
+ - datasets==2.13.1
68
+ - debugpy==1.5.1
69
+ - dill==0.3.6
70
+ - docker-pycreds==0.4.0
71
+ - docopt==0.6.2
72
+ - filelock==3.12.2
73
+ - frozenlist==1.3.3
74
+ - fsspec==2023.6.0
75
+ - gitdb==4.0.10
76
+ - gitpython==3.1.31
77
+ - huggingface-hub==0.15.1
78
+ - idna==3.4
79
+ - joblib==1.2.0
80
+ - jupyter-core==5.3.1
81
+ - multidict==6.0.4
82
+ - multiprocess==0.70.14
83
+ - numpy==1.24.3
84
+ - openai==0.27.8
85
+ - pandas==2.0.2
86
+ - pathtools==0.1.2
87
+ - pip==23.1.2
88
+ - pipreqs==0.4.13
89
+ - protobuf==4.23.3
90
+ - psutil==5.9.0
91
+ - pyarrow==12.0.1
92
+ - python-dotenv==1.0.0
93
+ - pytz==2023.3
94
+ - pyyaml==6.0
95
+ - pyzmq==25.1.0
96
+ - requests==2.31.0
97
+ - scikit-learn==1.2.2
98
+ - scipy==1.10.1
99
+ - sentry-sdk==1.25.1
100
+ - setproctitle==1.3.2
101
+ - setuptools==67.8.0
102
+ - smmap==5.0.0
103
+ - tenacity==8.2.2
104
+ - threadpoolctl==3.1.0
105
+ - tornado==6.1
106
+ - tqdm==4.65.0
107
+ - tzdata==2023.3
108
+ - urllib3==2.0.3
109
+ - wandb==0.15.4
110
+ - wheel==0.38.4
111
+ - xxhash==3.2.0
112
+ - yarg==0.1.9
113
+ - yarl==1.9.2
114
+ prefix: /home/cmagganas/anaconda3/envs/kd-llm-dc
app/wandb/run-20230626_114056-rbtf91s6/files/config.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.11
7
+ cli_version: 0.15.4
8
+ is_jupyter_run: true
9
+ is_kaggle_kernel: false
10
+ start_time: 1687804857.029691
11
+ t:
12
+ 1:
13
+ - 55
14
+ - 95
15
+ 2:
16
+ - 55
17
+ - 95
18
+ 3:
19
+ - 2
20
+ - 7
21
+ - 23
22
+ - 51
23
+ 4: 3.10.11
24
+ 5: 0.15.4
25
+ 8:
26
+ - 1
27
+ - 5
28
+ m:
29
+ - 1: _timestamp
30
+ - 1: usage/total_tokens
31
+ 5: 1
32
+ 6:
33
+ - 1
34
+ - 3
35
+ - 1: usage/prompt_tokens
36
+ 5: 1
37
+ 6:
38
+ - 1
39
+ - 3
40
+ - 1: usage/completion_tokens
41
+ 5: 1
42
+ 6:
43
+ - 1
44
+ - 3
45
+ - 1: usage/elapsed_time
46
+ 5: 1
47
+ 6:
48
+ - 1
49
+ - 3
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_0_75f83ac87fa164c83b8e.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message.txt\n\n\n**user**: co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan\n", "\n\n**assistant**: Hello! It looks like you mentioned a product: Chill Medicated Watermelon Syrup with 250mg potency. Chill Medicated is presumably the brand behind this product.\n\nHowever, it is unclear what kind of information you are looking for. Can you please provide more context or ask a specific question about this product? This will help me answer in a way that fits your needs.\n", "gpt-4-0314", "2023-06-26T11:43:52", "2023-06-26T11:43:59.429335", "chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg", "openai", "rbtf91s6", 7.429335340999387, 54, 76, 130]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_100_f673fdf8da564f8db616.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan\n", "\n\n**assistant**: It seems like you are looking for information about Chill Medicated - Watermelon Syrup with 250mg content. Chill Medicated is known for producing cannabis-infused syrups, and their Watermelon flavored syrup is often used for relaxation and recreational purposes.\n\nPlease note that I am an AI language model, and I am not endorsing or promoting the use of cannabis products. It's important to follow local laws and regulations, as well as consult with a healthcare professional if you are considering using cannabis for medicinal purposes.\n", "gpt-4-0314", "2023-06-26T11:52:45", "2023-06-26T11:52:54.504518", "chatcmpl-7VlcLAoSfGbbqW9UIGm5QMc7eah9L", "openai", "rbtf91s6", 9.504517934999967, 55, 103, 158]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_101_df8f84304e946465232c.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: bl-111630024545,Feelz - Space Cowboy 3.5g,nan,FLOWERS 3.5G,nan,nan\n", "\n\n**assistant**: Hello! It seems like you are looking for information regarding a product called \"Feelz - Space Cowboy 3.5g\" which falls under the category of FLOWERS 3.5G. Please provide more context or specific questions you would like answered, and I will do my best to help.\n", "gpt-4-0314", "2023-06-26T11:52:55", "2023-06-26T11:53:03.838794", "chatcmpl-7VlcVcxS9IegeWVDkFy2z8aCcqBs7", "openai", "rbtf91s6", 8.838793879999685, 52, 62, 114]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_102_bf3b4ae7fed0ee456ca9.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eighths,Champelli,PackedBud,Champelli | Xclusivo 3.5g,3.5\n", "\n\n**assistant**: Hello! It looks like you're interested in Champelli Xclusivo 3.5g Eighths. \n\nChampelli Xclusivo is a premium cannabis product known for its high-quality and potent strains. Their 3.5g eighths offer a variety of popular strains which cater to different preferences and needs. PackedBud might be a distributor that provides these products, ensuring that you receive fresh and potent cannabis.\n\nTo learn more or purchase Champelli Xclusivo 3.5g Eighths, I recommend visiting your local dispensary or an online cannabis store, depending on the legality and availability in your area.\n\nPlease note that I am an AI language model, and I am unable to facilitate any purchases or transactions. It's also important to follow the laws and regulations regarding cannabis use in your location.\n\nIf you have any other questions or need further assistance, feel free to ask!\n", "gpt-4-0314", "2023-06-26T11:53:04", "2023-06-26T11:53:19.913999", "chatcmpl-7VlceM4xLntrlxlinLnUJd4Km8vMl", "openai", "rbtf91s6", 15.913999183999294, 72, 185, 257]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_103_9784775529cd9b0c478d.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWERS 7G,nan,nan\n", "\n\n**assistant**: Hello! It looks like you're inquiring about the product \"CAM - Mellowz #7,\" which is a 7-gram package of cannabis flowers. Unfortunately, there is not much information provided, so I cannot give more specific details about this product. If you have any questions or need more information, please provide more context or details, and I will do my best to help!\n", "gpt-4-0314", "2023-06-26T11:53:21", "2023-06-26T11:53:28.756413", "chatcmpl-7VlcvsyAP6UCMxGoViTeMJEorRV4P", "openai", "rbtf91s6", 7.756412714000362, 50, 79, 129]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_104_b69d429713afe585607d.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato Sizzurp 1000mg,Backpack Boyz,Tincture,nan,nan\n", "\n\n**assistant**: Hello! It seems like you are interested in the Backpack Boyz Bubblegum Gelato Sizzurp 1000mg tincture. To better assist you, please provide more information or ask a specific question about the product.\n", "gpt-4-0314", "2023-06-26T11:53:29", "2023-06-26T11:53:33.569959", "chatcmpl-7Vld3cnmYqFqj77789o1B403tNS6U", "openai", "rbtf91s6", 4.569959391000339, 60, 48, 108]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_105_8f9030467825d6eb9154.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crushed Diamonds 1g,Raw Garden,Concentrate,nan,1.0\n", "\n\n**assistant**: Hello! It looks like you're inquiring about the Raw Garden Pink Lemonade Crushed Diamonds 1g concentrate.\n\nRaw Garden is a well-respected brand known for its quality cannabis products. The Pink Lemonade Crushed Diamonds concentrate is a high-potency extract derived from the cannabis plant. Concentrates like this one are often used for their intense effects and unique flavor profiles.\n\nProduct: Raw Garden Pink Lemonade Crushed Diamonds 1g\nBrand: Raw Garden\nCategory: Concentrate\nQuantity: 1.0 gram\n\nPlease remember to always consume cannabis products responsibly, and consult a medical professional if you have any concerns or questions regarding their usage.\n", "gpt-4-0314", "2023-06-26T11:53:34", "2023-06-26T11:53:45.988982", "chatcmpl-7Vld89t8QYfwBULmGxBauA0r7P9gp", "openai", "rbtf91s6", 11.98898184600057, 55, 134, 189]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_106_975584bb2f8ac7976684.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1159983,Baby Jeeter Peaches | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n", "\n\n**assistant**: Product Name: Baby Jeeter Peaches Infused Preroll 5pk (2.5g)\nBrand: Jeeter\nCategory: Infused Prerolls\n\nThis pack includes five infused prerolls (each containing 0.5 grams of flower) for a total of 2.5 grams. The Baby Jeeter Peaches prerolls by Jeeter offer an enjoyable smoking experience with a delicious peach flavor. These infused prerolls are perfect for anyone looking to unwind and enjoy a tasty, hassle-free smoke.\n", "gpt-4-0314", "2023-06-26T11:53:47", "2023-06-26T11:53:59.355972", "chatcmpl-7VldLx6QUKkKfE5q9odEeZVJwYxhW", "openai", "rbtf91s6", 12.355971499000589, 60, 105, 165]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_107_353750bd44b04ac7162b.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle Pupil - Indoor - Blunt - 1g,Ocean Breeze Cultivators,nan,Truffle Pupil,nan\n", "\n\n**assistant**: Hello! It seems like you're looking for information on a product called \"Truffle Pupil\" by Ocean Breeze Cultivators. It is a 1g Indoor Blunt.\n\nUnfortunately, I don't have sufficient information about this specific product. However, I can tell you a little about Ocean Breeze Cultivators and general tips for smoking a blunt.\n\nOcean Breeze Cultivators is a cannabis company that grows and produces various strains of cannabis products. The \"Truffle Pupil\" is likely to be one of their strain offerings. The effects and potency of this specific strain might vary, so it's recommended that you consult the packaging or dispensary for more information on THC and CBD percentages.\n\nWhen smoking a blunt, it's important to take your time and enjoy the experience. Start by inhaling slowly and be cautious not to take in too much smoke at once. Take breaks between puffs and ensure you have water or another beverage handy to stay hydrated. Also, remember that sharing a blunt with others can carry the risk of spreading germs, so it's best to enjoy it individually, especially during the ongoing pandemic.\n\nPlease let me know if you have any other questions or need information on any other topic. I'm here to help!\n", "gpt-4-0314", "2023-06-26T11:54:00", "2023-06-26T11:54:34.819720", "chatcmpl-7VldYbCjPPC839qSooOjqtjTYyjo7", "openai", "rbtf91s6", 34.81971949600029, 67, 253, 320]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_108_d4754b6aea82a8c16ed9.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1g | Joints,Cookies,Joint,Cookies | Triple Scoop Preroll 1g,1.0\n", "\n\n**assistant**: It looks like you are interested in the Cookies Triple Scoop Preroll 1g joint. Cookies Triple Scoop is a popular cannabis strain that offers a delightful mix of flavors and effects. With a 1-gram preroll, you can enjoy the convenience of a pre-rolled joint without the hassle of rolling it yourself. Triple Scoop offers a uniquely satisfying experience for both recreational and medical users.\n\nProduct: Cookies Triple Scoop Preroll\nType: Joint\nWeight: 1 gram\nBrand: Cookies\n\nPlease let me know if you need any more information or have any questions about this product.\n", "gpt-4-0314", "2023-06-26T11:54:35", "2023-06-26T11:54:53.414244", "chatcmpl-7Vle7ITXk2D9234jwInXFdgwcaSoY", "openai", "rbtf91s6", 18.414243819999683, 66, 124, 190]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_109_bcb8cb29447c99155339.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: bl-842922110296,STIIIZY - Birthday Cake Pod 1g,nan,VAPE PENS 1G,nan,1.0\n", "\n\n**assistant**: Hello! It looks like you are mentioning the STIIIZY - Birthday Cake Pod 1g. This is a vape pen cartridge containing 1 gram of cannabis concentrate. The flavor profile is inspired by a birthday cake, which is usually sweet and creamy. Vape pens like this are popular for their convenience and discreet use. If you need any additional information, please don't hesitate to ask!\n", "gpt-4-0314", "2023-06-26T11:54:54", "2023-06-26T11:55:03.183406", "chatcmpl-7VleQ2KysWpsoPwcl3ZI91oM1KAJ7", "openai", "rbtf91s6", 9.183406203000231, 55, 80, 135]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_10_d888c45dceed81f58d7a.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message.txt\n\n\n**user**: co-6GGKA0GV,Cookies x The Grower Circle - TangEray - Indoor - Infused Joint - 1g,nan,nan,nan,nan\n", "\n\n**assistant**: It seems like you're talking about a product from the collaboration between Cookies and The Grower Circle. The product is called TangEray, which is a 1-gram indoor-infused joint. Unfortunately, you didn't provide any specific information or ask a question related to this product. If you would like more information or have any questions, please feel free to ask and I would be happy to help.\n", "gpt-4-0314", "2023-06-26T11:46:18", "2023-06-26T11:46:26.313300", "chatcmpl-7VlW67EpfYEde31aEs26p9iJEecyh", "openai", "rbtf91s6", 8.313300085000265, 56, 82, 138]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_110_6b738aa2166ace2f9d56.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-6GGKA0GV,Cookies x The Grower Circle - TangEray - Indoor - Infused Joint - 1g,nan,nan,nan,nan\n", "\n\n**assistant**: It seems you are looking for information on TangEray, an indoor infused joint by Cookies x The Grower Circle. TangEray is a 1-gram cannabis joint that provides a potent and flavorful experience.\n\nUnfortunately, the information you provided is not enough for me to give a detailed response. Nonetheless, here's some basic information about infused joints and the two brands involved:\n\nInfused Joints: \nInfused joints, also known as pre-rolls, are pre-made cannabis joints that are filled with ground cannabis flower and often enhanced with cannabis concentrates, such as wax or oil. These concentrates are responsible for the \"infused\" label and can offer a stronger, more potent high compared to regular joints.\n\nCookies:\nCookies is a well-known cannabis lifestyle brand founded by rapper and entrepreneur Berner. They are recognized globally for their premium cannabis flower strains and innovative products. Cookies focuses on genetics, innovation, and the overall quality of their products.\n\nThe Grower Circle:\nThe Grower Circle is a group of expert cultivators and cannabis connoisseurs, dedicated to providing high-quality cannabis products. Their collaboration with Cookies further enhances the quality and experience of their cannabis products.\n\nAs for TangEray, it's an indoor, infused joint created as a collaboration between Cookies and The Grower Circle. To know more about it or its effects, you may want to visit a local dispensary or check online resources for more detailed information, keeping in mind the legal status of cannabis in your region.\n", "gpt-4-0314", "2023-06-26T11:55:04", "2023-06-26T11:55:32.640890", "chatcmpl-7VleaFAnv4qWX3G29gNiEAsR4IKVh", "openai", "rbtf91s6", 28.64088961699963, 57, 300, 357]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_111_5aed3974ba4952f876eb.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-7YGGNHBR,Kanha - Nano Blood Orange Bliss Indica - Gummy - 10ct - 100mg,Kanha,nan,nan,nan\n", "\n\n**assistant**: Hello! It looks like you're interested in the Kanha Nano Blood Orange Bliss Indica Gummy. Below, you'll find some information about this product:\n\n- Brand: Kanha\n- Type: Indica Gummy\n- Flavor: Blood Orange Bliss\n- Serving Size: 10 gummies per pack\n- Total THC Content: 100mg\n- THC per gummy: 10mg\n- Key Feature: Nanomolecular Technology for faster, more efficient absorption\n\nKanha's Nano Blood Orange Bliss Indica Gummies are infused with high-quality cannabis oil and nanomolecular technology that provides a faster, more efficient absorption of THC. This means you'll feel the effects more quickly and may experience a smoother experience compared to traditional edibles. These gummies have a delicious blood orange flavor and are perfect for those looking for a relaxing and calming experience.\n\nPlease note that you should start with a low dosage and adjust as needed. It is always best to consult with a healthcare professional or a budtender for personalized advice based on your individual needs and tolerance. Keep in mind that the effects of edibles can vary greatly depending on factors like metabolism and body chemistry, so what works for someone else may not work the same way for you. Remember to enjoy responsibly and keep these gummies away from children.\n", "gpt-4-0314", "2023-06-26T11:55:33", "2023-06-26T11:56:08.063033", "chatcmpl-7Vlf3CbhKfGocBHh6oW4nUTbu69rd", "openai", "rbtf91s6", 35.06303332399966, 58, 266, 324]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_112_3a7258bfdca8fcd1719b.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,Flower,nan,3.5\n", "\n\n**assistant**: Hello! It seems like you're interested in information about the product: BOBBI HYLL Indoor 3.5g - Fresca Flower.\n\nBOBBI HYLL's Indoor 3.5g is a high-quality cannabis flower product that offers an enjoyable and aromatic experience. As a 3.5g (1/8 oz) unit, this product is a convenient option for personal consumption or sharing with friends.\n\nThe strain featured in this product is Fresca, known for its fruity and refreshing aroma. This flower is typically associated with an uplifting and invigorating effect, making it a suitable choice for social or creative activities. Users of Fresca often report a sense of increased energy, focus, and a pleasant head high when consuming this strain. Its balanced profile of THC and other cannabinoids make it a viable option for both recreational and medicinal users.\n\nPlease note that effects may vary based on individual factors such as tolerance, dosage, and consumption methods. Always use cannabis products responsibly and consult a healthcare professional if you have any concerns or questions.\n\nEnjoy your BOBBI HYLL Indoor 3.5g Fresca Flower and let us know if you have any questions or need further assistance!\n", "gpt-4-0314", "2023-06-26T11:56:10", "2023-06-26T11:56:40.822175", "chatcmpl-7VlfehpIxe7ySnByC6Q9UGmf39tnm", "openai", "rbtf91s6", 30.822175170000264, 52, 241, 293]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_113_71838fad78e890a3e4cd.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: fl-9nEPGsnYtY,Flav | Blueberry Belts 100mg,Flav,Edible,nan,nan\n", "\n\n**assistant**: It seems like you're looking for information on Flav's Blueberry Belts 100mg edible. Flav is a company known for producing cannabis-infused products, and one of their popular offerings is the Blueberry Belts, which is an edible that contains 100mg of THC.\n\nThese Blueberry Belts may provide the desirable effects of cannabis without the need for smoking or vaporizing. It's important to consume edibles responsibly, as the effects can be much stronger than other consumption methods. For novice users, it's recommended to start with a low dosage, such as 5-10mg, and wait for at least an hour before consuming more, to avoid overconsumption.\n\nAs always, please ensure you follow your local laws regarding the possession and consumption of cannabis products, and remember that cannabis affects everyone differently. It's always best to start with a low dose and gradually increase it if needed.\n", "gpt-4-0314", "2023-06-26T11:56:40", "2023-06-26T11:56:54.107206", "chatcmpl-7Vlg8GQNsnCMuP12dlbfdh4tqdn0W", "openai", "rbtf91s6", 14.107205561000228, 52, 184, 236]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_114_200ab5992b87e0e3b2d4.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1347497,Cherry Pie - 1g - Pod,Left Coast Extracts,Vape,nan,nan\n", "\n\n**assistant**: You have selected the following product:\n\nProduct ID: md-1347497\nProduct: Cherry Pie - 1g - Pod\nBrand: Left Coast Extracts\nCategory: Vape\nTHC Content: Not specified\nCBD Content: Not specified\n\nThe Cherry Pie pod from Left Coast Extracts is a 1g vape cartridge designed to deliver smooth and satisfying hits. Cherry Pie is a popular strain known for its balanced blend of fruity, tart, and sweet flavors, evoking the sensation of enjoying a delicious slice of cherry pie. It offers a unique vaping experience with its mix of sweet cherry and earthy, herbal notes.\n\nHowever, please note that the THC and CBD content are not specified. If you have specific requirements or concerns about the potency of this product, we recommend contacting Left Coast Extracts directly for more information.\n\nPlease consume responsibly and follow the recommended dosage instructions provided by the manufacturer.\n", "gpt-4-0314", "2023-06-26T11:56:54", "2023-06-26T11:57:10.579232", "chatcmpl-7VlgMzsYnCHB41MM6ZSQPDDC2bHna", "openai", "rbtf91s6", 16.579231394999624, 47, 182, 229]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_115_3e8ce0d79ca9a77c577e.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-76GP441T,Minntz - Emerald Cut - Indoor - Joint - 1g,Minntz,nan,Emerald Cut,nan\n", "\n\n**assistant**: Hello! It looks like you're searching for information on Minntz Emerald Cut, a 1g indoor joint. Unfortunately, I can't provide specific details on this specific product as I am an AI language model and have limitations regarding real-time, updated product information.\n\nHowever, I can provide some general information about cannabis joints.\n\nA joint is a rolled cannabis cigarette, typically containing around 1g of cannabis material. It is used for smoking and offers various effects depending on the strain used. An indoor-grown cannabis plant usually produces higher quality flowers due to the controlled environment it grows in, which may result in a smoother and more potent experience.\n\nEmerald Cut might be the name of the strain used in this joint. Cannabis strains vary in terms of flavor, aroma, and effects. Sativa-dominant strains tend to provide uplifting and energizing experiences, whereas Indica-dominant strains are likely to provide a more relaxing and calming effect. Hybrid strains are a mix of both, with effects depending on the specific combination of Sativa and Indica properties.\n\nIf you're interested in learning more about the specific Minntz Emerald Cut joint or any other cannabis-related topics, I would suggest contacting your local dispensary or exploring online resources dedicated to cannabis products and discussions.\n", "gpt-4-0314", "2023-06-26T11:57:11", "2023-06-26T11:57:35.146583", "chatcmpl-7VlgdhgYBSUuMB38xQt4lYHeb481r", "openai", "rbtf91s6", 24.146582954999758, 54, 256, 310]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_116_27261d37b58e97119be1.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-5RAWYHYQ,The Growers Circle - Double Down - Indoor - 3.5g,nan,nan,nan,nan\n", "\n\n**assistant**: Hello! It seems you are looking for information on a cannabis product called \"The Growers Circle - Double Down - Indoor - 3.5g\". The Growers Circle is a cannabis brand that produces high-quality cannabis strains using indoor growing methods.\n\nThe product you've mentioned, \"Double Down\" is one of their strains with a 3.5g (1/8 oz) package. Since other details were not provided, we cannot provide specifics on the THC and CBD content, strain type (sativa, indica, or hybrid), and other characteristics of this product.\n\nTo get more information, I recommend you to visit the company's website or contact a local dispensary that carries The Growers Circle products. They will be able to provide you with accurate information regarding the specific strain, its effects, and other relevant details.\n", "gpt-4-0314", "2023-06-26T11:57:36", "2023-06-26T11:57:57.028817", "chatcmpl-7Vlh2eeYsWzANKI3YBlblLl1uglVd", "openai", "rbtf91s6", 21.028816743999414, 52, 167, 219]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_117_8afbd8d9b801892f328e.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1195389,Blue Dream Roll Your Own Sugar Shake 14g Bag,Pacific Stone,Flowers,nan,nan\n", "\n\n**assistant**: Product Name: Blue Dream Roll Your Own Sugar Shake 14g Bag\nBrand: Pacific Stone\nProduct Category: Flowers\nDescription:\nThe Blue Dream Roll Your Own Sugar Shake is a 14g bag of premium cannabis flowers from Pacific Stone. Blue Dream, a popular and widely known strain, is a sativa-dominant hybrid that delivers a balanced high with a sweet, berry-like aroma. This sugar shake is an excellent choice for those who enjoy the relaxing and euphoric benefits of cannabis while staying focused and clear-headed. The 14g bag provides a generous amount for consumers who prefer rolling their own joints, or it can be an excellent addition to various cannabis recipes and preparations. The Blue Dream Roll Your Own Sugar Shake is a convenient and cost-effective option for those who appreciate a high-quality cannabis experience.\n", "gpt-4-0314", "2023-06-26T11:57:58", "2023-06-26T11:58:13.038242", "chatcmpl-7VlhOyEAazqJEGuWGg4g4APivkogx", "openai", "rbtf91s6", 15.038242100999923, 48, 164, 212]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_118_f5e4033d82ffd310d340.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-847ZXF37,The Grower Circle - Zoo Dawg x Cosa Nostra - Multi Joint - 5pk - 2.5g,nan,nan,nan,nan\n", "\n\n**assistant**: Hello! It seems like you are looking for information on the Grower Circle's \"Zoo Dawg x Cosa Nostra\" Multi-Joint 5-pack (2.5g total). While I couldn't find specific details about this product, I can provide some general information about its parent strains.\n\n- Zoo Dawg: This strain is a hybrid cannabis with genetic lineage from Zoo Keeper, Strawberry Glue, and Stardawg. Zoo Dawg is known for its relaxing and uplifting effects, making it suitable for stress relief and inducing a positive mood.\n\n- Cosa Nostra: This is an indica-dominant strain with a combination of the genetics from Peyote Purple and SFV OG Kush F4. It is known for its potent effects, providing relaxation, and helping relieve pain, anxiety, and insomnia.\n\nThis Multi-Joint 5-pack would be a combination of these strains, so expect a hybrid experience, which may provide a balanced combination of relaxation, stress relief, and uplifting mood. Please note that each user's experience may vary since individual reactions to cannabis products depend on a variety of factors. Remember to consume cannabis products responsibly and consult with a medical professional if you have any concerns or are using cannabis for medical purposes.\n", "gpt-4-0314", "2023-06-26T11:58:13", "2023-06-26T11:58:35.308054", "chatcmpl-7VlhdjuABCp8pq7UA9hP1nNXE2Cv2", "openai", "rbtf91s6", 22.308054483999513, 62, 250, 312]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_119_6d43ea3093b2b3ecadd6.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-8EMW15ZM,Flight Bites - S'mores - Gummy - 10ct - 100mg,Flight Bites,nan,nan,nan\n", "\n\n**assistant**: Hello! It seems like you're looking for information on a product called \"Flight Bites - S'mores - Gummy - 10ct - 100mg\". Flight Bites are likely a type of edible product, possibly infused with cannabis or CBD, as indicated by the \"100mg\" dosage. The S'mores flavor and Gummy suggest that these might be tasty, chewy treats.\n\nHowever, since you haven't provided a specific question or request, I'd be happy to help further if you'd like to know more about the product or have any specific question in mind. Just let me know what you're looking for!\n", "gpt-4-0314", "2023-06-26T11:58:36", "2023-06-26T11:58:47.884809", "chatcmpl-7Vli0raZNlIIGBpBDSBD6dc0zK0bw", "openai", "rbtf91s6", 11.884809475000111, 58, 129, 187]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_11_81ba18437deae19f6d25.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message.txt\n\n\n**user**: co-7YGGNHBR,Kanha - Nano Blood Orange Bliss Indica - Gummy - 10ct - 100mg,Kanha,nan,nan,nan\n", "\n\n**assistant**: Hello! It looks like you're interested in Kanha Nano Blood Orange Bliss Indica Gummies. These are delicious gummies infused with 100mg of THC, split into 10 individual 10mg servings. They're formulated with a nano-molecular technology, which ensures a quicker and more efficient absorption of THC into your bloodstream.\n\nKanha is known for its high-quality cannabis-infused gummies that provide a consistent edible experience. The Blood Orange Bliss flavor has been specifically designed for those looking for a relaxing indica experience.\n\nPlease keep in mind that the effects of edibles may vary depending on your individual tolerance and metabolism. It's always best to start with a lower dose and gradually increase if needed.\n\nNote: This product is meant for adults 21 years old and over, and should only be used responsibly and in accordance with local laws and regulations.\n", "gpt-4-0314", "2023-06-26T11:46:27", "2023-06-26T11:46:51.996454", "chatcmpl-7VlWFykUwxOhi8CMqDxjakijjrrRj", "openai", "rbtf91s6", 24.996453811999345, 57, 172, 229]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_120_628c0792a073cd9c405c.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan\n", "\n\n**assistant**: Chill Medicated,Syrup,Watermelon,250mg\n", "gpt-4-0314", "2023-06-26T11:58:48", "2023-06-26T11:58:49.841858", "chatcmpl-7VliCz6tI4VLYqAlF9f6pRo1kCzwn", "openai", "rbtf91s6", 1.84185792199969, 122, 13, 135]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_121_b556a799b6d3b562770d.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: bl-111630024545,Feelz - Space Cowboy 3.5g,nan,FLOWERS 3.5G,nan,nan\n", "\n\n**assistant**: Feelz,Flower,Space Cowboy,3.5g,22.5\n", "gpt-4-0314", "2023-06-26T11:58:51", "2023-06-26T11:58:53.211872", "chatcmpl-7VliF2XEa71nrQyvMdkDdlO2bPdpD", "openai", "rbtf91s6", 2.2118716180002593, 119, 17, 136]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_122_b10f20986ab3c17d78d0.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eighths,Champelli,PackedBud,Champelli | Xclusivo 3.5g,3.5\n", "\n\n**assistant**: Champelli,Flower,Packed Bud,Xclusivo,3.5\n", "gpt-4-0314", "2023-06-26T11:58:53", "2023-06-26T11:58:56.364528", "chatcmpl-7VliHONcf8Warr2P4HVS4S8Bkakv6", "openai", "rbtf91s6", 3.36452819599981, 139, 16, 155]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_123_ec51ba8e67e34ec07ce2.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWERS 7G,nan,nan\n", "\n\n**assistant**: CAM,Flower,Mellowz #7,7g,35\n", "gpt-4-0314", "2023-06-26T11:58:57", "2023-06-26T11:58:58.579921", "chatcmpl-7VliLmVXERbzrwGYXPs0AQ60PiEIU", "openai", "rbtf91s6", 1.5799211819994525, 117, 14, 131]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_124_c2c63679dc07c9c48315.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato Sizzurp 1000mg,Backpack Boyz,Tincture,nan,nan\n", "\n\n**assistant**: Backpack Boyz,Tincture,Bubblegum Gelato Sizzurp,1000mg,1\n", "gpt-4-0314", "2023-06-26T11:58:59", "2023-06-26T11:59:01.884846", "chatcmpl-7VliNFfr63Jj1as1Kw6LejymTHfVh", "openai", "rbtf91s6", 2.884845640000094, 127, 23, 150]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_125_4d67abd74b85c3978623.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crushed Diamonds 1g,Raw Garden,Concentrate,nan,1.0\n", "\n\n**assistant**: Raw Garden,Pink Lemonade Crushed Diamonds,Concentrate,1g,10\n", "gpt-4-0314", "2023-06-26T11:59:03", "2023-06-26T11:59:05.431226", "chatcmpl-7VliRBP9MFlmKx5fYdlDcKq7yRqfd", "openai", "rbtf91s6", 2.4312261879995276, 122, 18, 140]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_126_8815035ed0a50d11f9e3.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: md-1159983,Baby Jeeter Peaches | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n", "\n\n**assistant**: Jeeter,Preroll,Infused Joint,Peaches,12.5\n", "gpt-4-0314", "2023-06-26T11:59:06", "2023-06-26T11:59:08.376427", "chatcmpl-7VliUiTsPS5WaqWETrCibHgpqZ2hw", "openai", "rbtf91s6", 2.3764274689992817, 127, 17, 144]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_127_98fa95995325ab388230.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle Pupil - Indoor - Blunt - 1g,Ocean Breeze Cultivators,nan,Truffle Pupil,nan\n", "\n\n**assistant**: Ocean Breeze Cultivators,Blunt,Indoor,Truffle Pupil,1g\n", "gpt-4-0314", "2023-06-26T11:59:09", "2023-06-26T11:59:11.381799", "chatcmpl-7VliX2mW9Mn4DaqV9UJX66RereJdO", "openai", "rbtf91s6", 2.381798388999414, 134, 20, 154]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_128_a77f4efc17f0b058d06b.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1g | Joints,Cookies,Joint,Cookies | Triple Scoop Preroll 1g,1.0\n", "\n\n**assistant**: Cookies,Preroll,Joint,Triple Scoop,1.0\n", "gpt-4-0314", "2023-06-26T11:59:11", "2023-06-26T11:59:12.937167", "chatcmpl-7VliZYKs26JplHmoHF2Jm0f1m7Qx9", "openai", "rbtf91s6", 1.9371670350001295, 133, 16, 149]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_129_4ed9178a8330c3b08b77.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: bl-842922110296,STIIIZY - Birthday Cake Pod 1g,nan,VAPE PENS 1G,nan,1.0\n", "\n\n**assistant**: STIIIZY,Vape Pen,Birthday Cake Pod,1g,1\n", "gpt-4-0314", "2023-06-26T11:59:14", "2023-06-26T11:59:15.691454", "chatcmpl-7VlicqYXgzPIlT5wKdSuY9rgHsDjv", "openai", "rbtf91s6", 1.6914543789998788, 122, 16, 138]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_12_418719b4fcd204ed66d8.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message.txt\n\n\n**user**: md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,Flower,nan,3.5\n", "\n\n**assistant**: Hello! It seems like you are interested in the indoor-grown cannabis strain, BOBBI HYLL, with a product weight of 3.5g. This strain is under the flower category and comes from the brand Fresca. Unfortunately, the price is not provided in your query.\n\nPlease keep in mind that cannabis is a controlled substance and should only be consumed legally and responsibly, in accordance with local laws and regulations. If you have any further questions or need more information, feel free to ask.\n", "gpt-4-0314", "2023-06-26T11:46:53", "2023-06-26T11:47:06.951314", "chatcmpl-7VlWfotzgSyf5AuaAuhvkLIpnJLkn", "openai", "rbtf91s6", 13.951314044000355, 51, 102, 153]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_130_691215590d7f6b91716f.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: co-6GGKA0GV,Cookies x The Grower Circle - TangEray - Indoor - Infused Joint - 1g,nan,nan,nan,nan\n", "\n\n**assistant**: Cookies x The Grower Circle,Infused Joint,TangEray,Indoor,1g\n", "gpt-4-0314", "2023-06-26T11:59:16", "2023-06-26T11:59:21.232802", "chatcmpl-7Vliep8zQeEMRDi84fuOjmqeklGjQ", "openai", "rbtf91s6", 5.23280159100068, 124, 20, 144]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_131_abafdaca35ad3cbbc1f4.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: co-7YGGNHBR,Kanha - Nano Blood Orange Bliss Indica - Gummy - 10ct - 100mg,Kanha,nan,nan,nan\n", "\n\n**assistant**: Kanha,Gummies,Nano Blood Orange Bliss,Indica,100mg\n", "gpt-4-0314", "2023-06-26T11:59:21", "2023-06-26T11:59:23.179755", "chatcmpl-7VlijGnypcBrPs8HjV0MFRAQgaYVX", "openai", "rbtf91s6", 2.1797548040003676, 125, 16, 141]]}
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_132_1b1d305dd76406f7892d.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1230924,Baby Jeeter Maui Wowie | Infused Preroll 5pk - 2.5g,Jeeter,Infused Prerolls,nan,nan\n\n\n**assistant**: Jeeter,Preroll,Infused Joint,Maui Wowie,12.5\n\n\n**user**: md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,Flower,nan,3.5\n", "\n\n**assistant**: Fresca,Flower,Indoor,BOBBI HYLL,3.5\n", "gpt-4-0314", "2023-06-26T11:59:24", "2023-06-26T11:59:27.657098", "chatcmpl-7VlimPRRjNQLdZK2IxiiUn6bBNi59", "openai", "rbtf91s6", 3.65709766100008, 119, 19, 138]]}