Spaces:

kaleidoscope-data
/

data-cleaning-llm

Runtime error

App Files Files Community

cmagganas commited on Jul 5, 2023

Commit

9de5882

•

1 Parent(s): 8a826ee

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env +1 -0
.gitattributes +1 -0
.gitignore +160 -0
README.md +2 -12
app/check_if_csv_loads.ipynb +53 -0
app/cookies_llm_experimental_eval.ipynb +412 -0
app/cookies_openai_model_eval.ipynb +797 -0
app/data_prep.ipynb +283 -0
app/openai_chat_completion.py +76 -0
app/test.py +18 -0
app/util.py +20 -0
app/wandb/run-20230626_114056-rbtf91s6/files/conda-environment.yaml +114 -0
app/wandb/run-20230626_114056-rbtf91s6/files/config.yaml +49 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_0_75f83ac87fa164c83b8e.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_100_f673fdf8da564f8db616.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_101_df8f84304e946465232c.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_102_bf3b4ae7fed0ee456ca9.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_103_9784775529cd9b0c478d.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_104_b69d429713afe585607d.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_105_8f9030467825d6eb9154.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_106_975584bb2f8ac7976684.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_107_353750bd44b04ac7162b.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_108_d4754b6aea82a8c16ed9.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_109_bcb8cb29447c99155339.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_10_d888c45dceed81f58d7a.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_110_6b738aa2166ace2f9d56.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_111_5aed3974ba4952f876eb.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_112_3a7258bfdca8fcd1719b.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_113_71838fad78e890a3e4cd.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_114_200ab5992b87e0e3b2d4.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_115_3e8ce0d79ca9a77c577e.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_116_27261d37b58e97119be1.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_117_8afbd8d9b801892f328e.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_118_f5e4033d82ffd310d340.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_119_6d43ea3093b2b3ecadd6.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_11_81ba18437deae19f6d25.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_120_628c0792a073cd9c405c.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_121_b556a799b6d3b562770d.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_122_b10f20986ab3c17d78d0.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_123_ec51ba8e67e34ec07ce2.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_124_c2c63679dc07c9c48315.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_125_4d67abd74b85c3978623.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_126_8815035ed0a50d11f9e3.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_127_98fa95995325ab388230.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_128_a77f4efc17f0b058d06b.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_129_4ed9178a8330c3b08b77.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_12_418719b4fcd204ed66d8.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_130_691215590d7f6b91716f.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_131_abafdaca35ad3cbbc1f4.table.json +1 -0
app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_132_1b1d305dd76406f7892d.table.json +1 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY="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"

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+app/wandb/run-20230626_114056-rbtf91s6/run-rbtf91s6.wandb filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,12 +1,2 @@
----
-title: Data Cleaning Llm
-emoji: 🌍
-colorFrom: indigo
-colorTo: indigo
-sdk: streamlit
-sdk_version: 1.21.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # llm-data-cleaner
2	+ Kaleidoscope Data - LLM Data Cleaner

app/check_if_csv_loads.ipynb ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '../data/cookies_llm_eval_predictions.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m pd\u001b[39m.\u001b[39;49mread_csv(\u001b[39m'\u001b[39;49m\u001b[39m../data/cookies_llm_eval_predictions.csv\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
+      "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m    899\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m    900\u001b[0m     dialect,\n\u001b[1;32m    901\u001b[0m     delimiter,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    908\u001b[0m     dtype_backend\u001b[39m=\u001b[39mdtype_backend,\n\u001b[1;32m    909\u001b[0m )\n\u001b[1;32m    910\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 912\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
+      "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    574\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[1;32m    576\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 577\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m    579\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[1;32m    580\u001b[0m     \u001b[39mreturn\u001b[39;00m parser\n",
+      "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m   1404\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m   1406\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 1407\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
+      "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m   1659\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m mode:\n\u001b[1;32m   1660\u001b[0m         mode \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m-> 1661\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[1;32m   1662\u001b[0m     f,\n\u001b[1;32m   1663\u001b[0m     mode,\n\u001b[1;32m   1664\u001b[0m     encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m   1665\u001b[0m     compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m   1666\u001b[0m     memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[1;32m   1667\u001b[0m     is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[1;32m   1668\u001b[0m     errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[1;32m   1669\u001b[0m     storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m   1670\u001b[0m )\n\u001b[1;32m   1671\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m   1672\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n",
+      "File \u001b[0;32m~/anaconda3/envs/kd-llm-dc/lib/python3.10/site-packages/pandas/io/common.py:859\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    854\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[1;32m    855\u001b[0m     \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m    856\u001b[0m     \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m    857\u001b[0m     \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[1;32m    858\u001b[0m         \u001b[39m# Encoding\u001b[39;00m\n\u001b[0;32m--> 859\u001b[0m         handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[1;32m    860\u001b[0m             handle,\n\u001b[1;32m    861\u001b[0m             ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[1;32m    862\u001b[0m             encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[1;32m    863\u001b[0m             errors\u001b[39m=\u001b[39;49merrors,\n\u001b[1;32m    864\u001b[0m             newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m    865\u001b[0m         )\n\u001b[1;32m    866\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    867\u001b[0m         \u001b[39m# Binary mode\u001b[39;00m\n\u001b[1;32m    868\u001b[0m         handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../data/cookies_llm_eval_predictions.csv'"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "pd.read_csv('../data/cookies_llm_eval_predictions.csv')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "kd-llm-dc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app/cookies_llm_experimental_eval.ipynb ADDED Viewed

	@@ -0,0 +1,412 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## This notebook is used to evaluate the performance of the LLM model to clean the cookies dataset."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Using OpenAI API"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Try to call function from newly created module `openai_chat_completion.py`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"id\": \"chatcmpl-7UecmbvjkbwamEpYXyIvGO4w4Ae8i\",\n",
+      "  \"object\": \"chat.completion\",\n",
+      "  \"created\": 1687540356,\n",
+      "  \"model\": \"gpt-4-0314\",\n",
+      "  \"choices\": [\n",
+      "    {\n",
+      "      \"index\": 0,\n",
+      "      \"message\": {\n",
+      "        \"role\": \"assistant\",\n",
+      "        \"content\": \"Chill Medicated,Edible,Beverage,Watermelon,250\"\n",
+      "      },\n",
+      "      \"finish_reason\": \"stop\"\n",
+      "    }\n",
+      "  ],\n",
+      "  \"usage\": {\n",
+      "    \"prompt_tokens\": 636,\n",
+      "    \"completion_tokens\": 15,\n",
+      "    \"total_tokens\": 651\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "\n",
+    "import openai\n",
+    "\n",
+    "# set OPENAI_API_KEY environment variable from .env file\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "\n",
+    "# import OpenAIChatCompletions class from openai_chat_completion.py file located in llm_data_cleaner/scripts folder (this notebook is located in llm_data_cleaner/notebooks folder)\n",
+    "from openai_chat_completion import OpenAIChatCompletions, compare_completion_and_prediction\n",
+    "\n",
+    "# read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message\n",
+    "system_message = open('../prompts/gpt4-system-message.txt', 'r').read()\n",
+    "\n",
+    "# create an instance of the OpenAIChatCompletions class and use the openai_chat_completion method to get chat completions\n",
+    "chat_completions = OpenAIChatCompletions(model=\"gpt-4\", system_message=system_message)\n",
+    "\n",
+    "prompt = \"co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan\"\n",
+    "completion = chat_completions.openai_chat_completion(prompt, n_shot=1)\n",
+    "print(completion)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from openai_chat_completion import OpenAIChatCompletions\n",
+    "\n",
+    "test_prompts, test_completions, test_predictions = chat_completions.predict_jsonl(n_shot=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>prompt</th>\n",
+       "      <th>completion</th>\n",
+       "      <th>prediction</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>co-2MFE5QVF,Chill Medicated - Watermelon - Syr...</td>\n",
+       "      <td>Chill Medicated,Edible,Beverage,nan,nan</td>\n",
+       "      <td>Chill Medicated,Edible,Beverage,Watermelon,250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...</td>\n",
+       "      <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
+       "      <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...</td>\n",
+       "      <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
+       "      <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...</td>\n",
+       "      <td>CAM,Flower,Bud,Mellowz #7,7</td>\n",
+       "      <td>CAM - Mellowz #7 7g,CAM,Flower,Bud,Mellowz,7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...</td>\n",
+       "      <td>Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...</td>\n",
+       "      <td>Backpack Boyz,Edible,Syrup,Bubblegum Gelato,1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crush...</td>\n",
+       "      <td>Raw Garden,Concentrate,Diamonds,Pink Lemonade,1</td>\n",
+       "      <td>Raw Garden,Concentrate,Diamonds,Pink Lemonade,1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>md-1159983,Baby Jeeter Peaches | Infused Prero...</td>\n",
+       "      <td>Jeeter,Preroll,Infused Joint,Peaches,12.5</td>\n",
+       "      <td>Jeeter,Preroll,Infused Joint,Peaches,12.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle...</td>\n",
+       "      <td>Ocean Breeze Cultivators,Preroll,Blunt,Truffle...</td>\n",
+       "      <td>Ocean Breeze Cultivators,Blunt,Truffle Pupil,1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1...</td>\n",
+       "      <td>Cookies,Preroll,Joint,Triple Scoop,1</td>\n",
+       "      <td>Cookies,Preroll,Joint,Triple Scoop,1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>bl-842922110296,STIIIZY - Birthday Cake Pod 1g...</td>\n",
+       "      <td>STIIIZY,Vape,Vape,Birthday Cake,1</td>\n",
+       "      <td>Birthday Cake Pod,STIIIZY,Vape,Birthday Cake,1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>co-6GGKA0GV,Cookies x The Grower Circle - Tang...</td>\n",
+       "      <td>Cookies,Preroll,Infused Joint,TangEray,1</td>\n",
+       "      <td>Here is the missing information:\\n\\nproduct_na...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>co-7YGGNHBR,Kanha - Nano Blood Orange Bliss In...</td>\n",
+       "      <td>Kanha,Edible,Gummies,nan,nan</td>\n",
+       "      <td>Kanha,Edible,Gummies,nan,nan</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,F...</td>\n",
+       "      <td>Fresca,Flower,Bud,BOBBI HYLL,3.5</td>\n",
+       "      <td>Fresca,Flower,Bud,BOBBI HYLL,3.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>fl-9nEPGsnYtY,Flav | Blueberry Belts 100mg,Fla...</td>\n",
+       "      <td>Flav,Edible,Gummies,nan,nan</td>\n",
+       "      <td>Flav,Edible,Gummies,Blueberry,100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>md-1347497,Cherry Pie - 1g - Pod,Left Coast Ex...</td>\n",
+       "      <td>Left Coast Extracts,Vape,510 cart,Cherry Pie,1</td>\n",
+       "      <td>Cherry Pie - 1g - Pod,Left Coast Extracts,Vape...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...</td>\n",
+       "      <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
+       "      <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>co-5RAWYHYQ,The Growers Circle - Double Down -...</td>\n",
+       "      <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
+       "      <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>md-1195389,Blue Dream Roll Your Own Sugar Shak...</td>\n",
+       "      <td>Pacific Stone,Flower,Bud,nan,14</td>\n",
+       "      <td>Pacific Stone,Flower,Bud,Blue Dream,14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...</td>\n",
+       "      <td>The Growers Circle,Preroll,Joint,Zoo Dawg x Co...</td>\n",
+       "      <td>The Grower Circle,Preroll,Joint,Zoo Dawg x Cos...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...</td>\n",
+       "      <td>Flight Bites,Edible,Gummies,nan,nan</td>\n",
+       "      <td>Flight Bites,Edible,Gummies,S'mores,10</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               prompt  \\\n",
+       "0   co-2MFE5QVF,Chill Medicated - Watermelon - Syr...   \n",
+       "1   bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...   \n",
+       "2   fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...   \n",
+       "3   bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...   \n",
+       "4   fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...   \n",
+       "5   fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crush...   \n",
+       "6   md-1159983,Baby Jeeter Peaches | Infused Prero...   \n",
+       "7   co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle...   \n",
+       "8   fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1...   \n",
+       "9   bl-842922110296,STIIIZY - Birthday Cake Pod 1g...   \n",
+       "10  co-6GGKA0GV,Cookies x The Grower Circle - Tang...   \n",
+       "11  co-7YGGNHBR,Kanha - Nano Blood Orange Bliss In...   \n",
+       "12  md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,F...   \n",
+       "13  fl-9nEPGsnYtY,Flav | Blueberry Belts 100mg,Fla...   \n",
+       "14  md-1347497,Cherry Pie - 1g - Pod,Left Coast Ex...   \n",
+       "15  co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...   \n",
+       "16  co-5RAWYHYQ,The Growers Circle - Double Down -...   \n",
+       "17  md-1195389,Blue Dream Roll Your Own Sugar Shak...   \n",
+       "18  co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...   \n",
+       "19  co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...   \n",
+       "\n",
+       "                                           completion  \\\n",
+       "0             Chill Medicated,Edible,Beverage,nan,nan   \n",
+       "1                   Feelz,Flower,Bud,Space Cowboy,3.5   \n",
+       "2                   Champelli,Flower,Bud,Xclusivo,3.5   \n",
+       "3                         CAM,Flower,Bud,Mellowz #7,7   \n",
+       "4   Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...   \n",
+       "5     Raw Garden,Concentrate,Diamonds,Pink Lemonade,1   \n",
+       "6           Jeeter,Preroll,Infused Joint,Peaches,12.5   \n",
+       "7   Ocean Breeze Cultivators,Preroll,Blunt,Truffle...   \n",
+       "8                Cookies,Preroll,Joint,Triple Scoop,1   \n",
+       "9                   STIIIZY,Vape,Vape,Birthday Cake,1   \n",
+       "10           Cookies,Preroll,Infused Joint,TangEray,1   \n",
+       "11                       Kanha,Edible,Gummies,nan,nan   \n",
+       "12                   Fresca,Flower,Bud,BOBBI HYLL,3.5   \n",
+       "13                        Flav,Edible,Gummies,nan,nan   \n",
+       "14     Left Coast Extracts,Vape,510 cart,Cherry Pie,1   \n",
+       "15                 Minntz,Preroll,Joint,Emerald Cut,1   \n",
+       "16      The Growers Circle,Flower,Bud,Double Down,3.5   \n",
+       "17                    Pacific Stone,Flower,Bud,nan,14   \n",
+       "18  The Growers Circle,Preroll,Joint,Zoo Dawg x Co...   \n",
+       "19                Flight Bites,Edible,Gummies,nan,nan   \n",
+       "\n",
+       "                                           prediction  \n",
+       "0      Chill Medicated,Edible,Beverage,Watermelon,250  \n",
+       "1                   Feelz,Flower,Bud,Space Cowboy,3.5  \n",
+       "2                   Champelli,Flower,Bud,Xclusivo,3.5  \n",
+       "3        CAM - Mellowz #7 7g,CAM,Flower,Bud,Mellowz,7  \n",
+       "4       Backpack Boyz,Edible,Syrup,Bubblegum Gelato,1  \n",
+       "5   Raw Garden,Concentrate,Diamonds,Pink Lemonade,1.0  \n",
+       "6           Jeeter,Preroll,Infused Joint,Peaches,12.5  \n",
+       "7    Ocean Breeze Cultivators,Blunt,Truffle Pupil,1.0  \n",
+       "8              Cookies,Preroll,Joint,Triple Scoop,1.0  \n",
+       "9    Birthday Cake Pod,STIIIZY,Vape,Birthday Cake,1.0  \n",
+       "10  Here is the missing information:\\n\\nproduct_na...  \n",
+       "11                       Kanha,Edible,Gummies,nan,nan  \n",
+       "12                   Fresca,Flower,Bud,BOBBI HYLL,3.5  \n",
+       "13                  Flav,Edible,Gummies,Blueberry,100  \n",
+       "14  Cherry Pie - 1g - Pod,Left Coast Extracts,Vape...  \n",
+       "15                 Minntz,Preroll,Joint,Emerald Cut,1  \n",
+       "16      The Growers Circle,Flower,Bud,Double Down,3.5  \n",
+       "17             Pacific Stone,Flower,Bud,Blue Dream,14  \n",
+       "18  The Grower Circle,Preroll,Joint,Zoo Dawg x Cos...  \n",
+       "19             Flight Bites,Edible,Gummies,S'mores,10  "
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "test_results = pd.DataFrame(\n",
+    "    zip(test_prompts,\n",
+    "        test_completions,\n",
+    "        [ele['choices'][0]['message']['content'] for ele in test_predictions])\n",
+    "    , columns = ['prompt','completion','prediction'])\n",
+    "test_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'completion': ['Chill Medicated', 'Edible', 'Beverage', 'nan', 'nan'],\n",
+       " 'prediction': ['Chill Medicated', 'Edible', 'Beverage', 'Watermelon', '250'],\n",
+       " 'matches': [True, True, True, False, False],\n",
+       " 'num_correct': 3}"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from util import compare_completion_and_prediction\n",
+    "\n",
+    "compare_completion_and_prediction(test_completions[0], test_predictions[0][\"choices\"][0][\"message\"][\"content\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "66"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Write function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error\n",
+    "def get_num_correct(completion, prediction):\n",
+    "    try:\n",
+    "        return compare_completion_and_prediction(completion, prediction)['num_correct']\n",
+    "    except:\n",
+    "        return 0 # this will be the case when format is incorrect\n",
+    "\n",
+    "# Apply get_num_correct function to test_results dataframe\n",
+    "test_results['num_correct'] = test_results.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)\n",
+    "test_results['num_correct'].sum() # out of 100 possible correct predictions (20 samples * 5 cols per sample)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "kd-llm-dc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app/cookies_openai_model_eval.ipynb ADDED Viewed

	@@ -0,0 +1,797 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This Notebook is to test the various OpenAI models, prompts, and number of few-shot examples to see how they perform on the same task."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install wandb --upgrade openai datasets -qU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "\n",
+    "import openai\n",
+    "\n",
+    "# set OPENAI_API_KEY environment variable from .env file\n",
+    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "\n",
+    "# import OpenAIChatCompletions class from openai_chat_completion.py file and compare_completion_and_prediction function from util.py file\n",
+    "from openai_chat_completion import OpenAIChatCompletions\n",
+    "from util import compare_completion_and_prediction"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Models:\n",
+    "- gpt-3.5-turbo\n",
+    "- gpt-4\n",
+    "\n",
+    "Prompts:\n",
+    "- gpt4-system-message.txt\n",
+    "\n",
+    "Few-shot examples:\n",
+    "> 0 ... 10"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "wandb setup:\n",
+    "- entity: kaleidoscope-data\n",
+    "- project: cookies_llm_experimental_eval\n",
+    "- tags: gpt-3.5-turbo, gpt-4, gpt4-system-message, few-shot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[32m\u001b[41mERROR\u001b[0m API key must be 40 characters long, yours was 48\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/cmagganas/.netrc\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/home/cmagganas/kaleidoscope/llm_data_cleaner/app/wandb/run-20230626_114056-rbtf91s6</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">rose-puddle-7</a></strong> to <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from wandb.integration.openai import autolog\n",
+    "\n",
+    "autolog({\"project\":\"cookies_llm_experimental_eval\",\n",
+    "         \"entity\": \"kaleidoscope-data\",\n",
+    "         \"group\": \"cookies\",\n",
+    "         \"job_type\": \"eval\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create an empty dataframe to store predictions\n",
+    "import pandas as pd\n",
+    "predictions_df = pd.DataFrame(columns=['model', 'system_message', 'n_shot', 'prompt', 'completion', 'prediction'])\n",
+    "\n",
+    "models_to_test = [\"gpt-4\", \"gpt-3.5-turbo\"]\n",
+    "sys_mes_to_test = [\"../prompts/gpt4-system-message.txt\", \"../prompts/gpt4-system-message2.txt\"] # names are arbitrary, same prompts but with \"####\" in system message 2\n",
+    "n_shots_to_test = [None, 1, 2, 3, 5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# if rerunning the below cell is required, set the following to True\n",
+    "rerun = False\n",
+    "if rerun:\n",
+    "    predictions_df = pd.read_csv('../data/cookies_llm_eval_predictions.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 178,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get predictions for all combinations of models, prompts, and n_shot values\n",
+    "# save predictions to dataframe and then to csv in data folder after each iteration\n",
+    "\n",
+    "# loop through models_to_test\n",
+    "for model in models_to_test:\n",
+    "    # loop through prompts_to_test\n",
+    "    for system_message in sys_mes_to_test:\n",
+    "            # instantiate OpenAIChatCompletions class\n",
+    "            chat = OpenAIChatCompletions(model=model, system_message=system_message)\n",
+    "            # loop through n_shots_to_test\n",
+    "            for n_shot in n_shots_to_test:\n",
+    "                sys_mes_var = 1 if system_message == \"../prompts/gpt4-system-message.txt\" else 2\n",
+    "                n_shot_var = 0 if n_shot == None else n_shot\n",
+    "                # check if predictions for this model, system_message, and n_shot value have already been made\n",
+    "                if predictions_df[(predictions_df['model'] == model) & (predictions_df['system_message'] == sys_mes_var) & (predictions_df['n_shot'] == n_shot_var)].shape[0] == 0:\n",
+    "                    prompts, completions, predictions = chat.predict_jsonl(n_shot=n_shot)\n",
+    "                else:\n",
+    "                     # skip if predictions for this model, system_message, and n_shot value have already been made\n",
+    "                    continue\n",
+    "                # save predictions to dataframe\n",
+    "                df_to_append = pd.DataFrame({'model': model, 'system_message': sys_mes_var, 'n_shot': n_shot_var, 'prompt': prompts, 'completion': completions, 'prediction': predictions})\n",
+    "                df_right = df_to_append['prediction'].apply(pd.Series)\n",
+    "                df_right['prediction'] = df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
+    "                df_to_append = pd.concat([df_to_append[['model', 'system_message', 'n_shot', 'prompt', 'completion']], df_right], axis=1)\n",
+    "                df_to_append.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
+    "                # save predictions to dataframe\n",
+    "                predictions_df = pd.concat([predictions_df, df_to_append], ignore_index=True)\n",
+    "                # delete duplicates from dataframe\n",
+    "                predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]\n",
+    "                predictions_df.to_csv('../data/cookies_llm_eval_predictions.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 179,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 180,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(400, 12)"
+      ]
+     },
+     "execution_count": 180,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions_df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import numpy as np\n",
+    "\n",
+    "# ids = predictions_df['id'].isna()\n",
+    "# # apply pd.Series to predictions column for rows where id is not null and change system_message {0,1} to {1,2}\n",
+    "# new_df_right = predictions_df.loc[ids, 'prediction'].apply(pd.Series)\n",
+    "# new_df_right['prediction'] = new_df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
+    "# new_df_left = predictions_df.loc[ids, ['model', 'system_message', 'n_shot', 'prompt', 'completion']].replace({0:1, 1:2})\n",
+    "# new_df = pd.concat([new_df_left, new_df_right], axis=1)\n",
+    "\n",
+    "# predictions_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
+    "# new_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
+    "# predictions_df.loc[ids] = new_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for col in ['model','system_message','n_shot']:\n",
+    "#     print(predictions_df[col].value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import numpy as np\n",
+    "\n",
+    "# # create a copy of predictions_df to manipulate\n",
+    "# new_predictions_df = predictions_df\n",
+    "\n",
+    "# # replace names with 1 or 2\n",
+    "# def replace_sys_mes_name(x):\n",
+    "#     if x == \"../prompts/gpt4-system-message.txt\":\n",
+    "#         return \"1\"\n",
+    "#     elif x == \"../prompts/gpt4-system-message2.txt\":\n",
+    "#         return \"2\"\n",
+    "#     else:\n",
+    "#         return x\n",
+    "# new_predictions_df['system_message'] = new_predictions_df['system_message'].apply(lambda x: replace_sys_mes_name(x))\n",
+    "# # replace None with 0\n",
+    "# new_predictions_df['n_shot'] = new_predictions_df['n_shot'].apply(lambda x: 0 if x == None or np.nan else x)\n",
+    "\n",
+    "# # break up prediction column into sub columns by each of json keys\n",
+    "# new_predictions_df = pd.concat([new_predictions_df, new_predictions_df['prediction'].apply(pd.Series)], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 168,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# predictions_df.drop(columns=['num_correct'], inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 181,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>model</th>\n",
+       "      <th>system_message</th>\n",
+       "      <th>n_shot</th>\n",
+       "      <th>prompt</th>\n",
+       "      <th>completion</th>\n",
+       "      <th>id</th>\n",
+       "      <th>object</th>\n",
+       "      <th>created</th>\n",
+       "      <th>openai_model</th>\n",
+       "      <th>choices</th>\n",
+       "      <th>usage</th>\n",
+       "      <th>prediction</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>gpt-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>co-2MFE5QVF,Chill Medicated - Watermelon - Syr...</td>\n",
+       "      <td>Chill Medicated,Edible,Beverage,nan,nan</td>\n",
+       "      <td>chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687805e+09</td>\n",
+       "      <td>gpt-4-0314</td>\n",
+       "      <td>[&lt;OpenAIObject at 0x7fcf7fde94e0&gt; JSON: {\\n  \"...</td>\n",
+       "      <td>{\\n  \"prompt_tokens\": 54,\\n  \"completion_token...</td>\n",
+       "      <td>Hello! It looks like you mentioned a product: ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>gpt-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...</td>\n",
+       "      <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
+       "      <td>chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687805e+09</td>\n",
+       "      <td>gpt-4-0314</td>\n",
+       "      <td>[&lt;OpenAIObject at 0x7fcf7f49d2b0&gt; JSON: {\\n  \"...</td>\n",
+       "      <td>{\\n  \"prompt_tokens\": 51,\\n  \"completion_token...</td>\n",
+       "      <td>Hello! It seems like you are referring to a pr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>gpt-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...</td>\n",
+       "      <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
+       "      <td>chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687805e+09</td>\n",
+       "      <td>gpt-4-0314</td>\n",
+       "      <td>[&lt;OpenAIObject at 0x7fcf7e306890&gt; JSON: {\\n  \"...</td>\n",
+       "      <td>{\\n  \"prompt_tokens\": 71,\\n  \"completion_token...</td>\n",
+       "      <td>Hello! It seems like you're interested in the ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>gpt-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...</td>\n",
+       "      <td>CAM,Flower,Bud,Mellowz #7,7</td>\n",
+       "      <td>chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687805e+09</td>\n",
+       "      <td>gpt-4-0314</td>\n",
+       "      <td>[&lt;OpenAIObject at 0x7fcf7e33d940&gt; JSON: {\\n  \"...</td>\n",
+       "      <td>{\\n  \"prompt_tokens\": 49,\\n  \"completion_token...</td>\n",
+       "      <td>It seems like you are looking for information ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>gpt-4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...</td>\n",
+       "      <td>Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...</td>\n",
+       "      <td>chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687805e+09</td>\n",
+       "      <td>gpt-4-0314</td>\n",
+       "      <td>[&lt;OpenAIObject at 0x7fcf7e306980&gt; JSON: {\\n  \"...</td>\n",
+       "      <td>{\\n  \"prompt_tokens\": 59,\\n  \"completion_token...</td>\n",
+       "      <td>Hello! It seems like you are looking for infor...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>gpt-3.5-turbo</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...</td>\n",
+       "      <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
+       "      <td>chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687829e+09</td>\n",
+       "      <td>gpt-3.5-turbo-0301</td>\n",
+       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
+       "      <td>{'prompt_tokens': 125, 'completion_tokens': 23...</td>\n",
+       "      <td>Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>gpt-3.5-turbo</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>co-5RAWYHYQ,The Growers Circle - Double Down -...</td>\n",
+       "      <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
+       "      <td>chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687829e+09</td>\n",
+       "      <td>gpt-3.5-turbo-0301</td>\n",
+       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
+       "      <td>{'prompt_tokens': 123, 'completion_tokens': 22...</td>\n",
+       "      <td>The Growers Circle,Double Down,Indoor,3.5g,5RA...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>gpt-3.5-turbo</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>md-1195389,Blue Dream Roll Your Own Sugar Shak...</td>\n",
+       "      <td>Pacific Stone,Flower,Bud,nan,14</td>\n",
+       "      <td>chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687829e+09</td>\n",
+       "      <td>gpt-3.5-turbo-0301</td>\n",
+       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
+       "      <td>{'prompt_tokens': 119, 'completion_tokens': 20...</td>\n",
+       "      <td>Pacific Stone,Sugar Shake,Blue Dream,Roll Your...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>gpt-3.5-turbo</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...</td>\n",
+       "      <td>The Growers Circle,Preroll,Joint,Zoo Dawg x Co...</td>\n",
+       "      <td>chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687829e+09</td>\n",
+       "      <td>gpt-3.5-turbo-0301</td>\n",
+       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
+       "      <td>{'prompt_tokens': 133, 'completion_tokens': 32...</td>\n",
+       "      <td>Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>gpt-3.5-turbo</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...</td>\n",
+       "      <td>Flight Bites,Edible,Gummies,nan,nan</td>\n",
+       "      <td>chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra</td>\n",
+       "      <td>chat.completion</td>\n",
+       "      <td>1.687829e+09</td>\n",
+       "      <td>gpt-3.5-turbo-0301</td>\n",
+       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
+       "      <td>{'prompt_tokens': 129, 'completion_tokens': 21...</td>\n",
+       "      <td>Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>400 rows × 12 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             model  system_message  n_shot  \\\n",
+       "0            gpt-4               1       0   \n",
+       "1            gpt-4               1       0   \n",
+       "2            gpt-4               1       0   \n",
+       "3            gpt-4               1       0   \n",
+       "4            gpt-4               1       0   \n",
+       "..             ...             ...     ...   \n",
+       "395  gpt-3.5-turbo               2       1   \n",
+       "396  gpt-3.5-turbo               2       1   \n",
+       "397  gpt-3.5-turbo               2       1   \n",
+       "398  gpt-3.5-turbo               2       1   \n",
+       "399  gpt-3.5-turbo               2       1   \n",
+       "\n",
+       "                                                prompt  \\\n",
+       "0    co-2MFE5QVF,Chill Medicated - Watermelon - Syr...   \n",
+       "1    bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...   \n",
+       "2    fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...   \n",
+       "3    bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...   \n",
+       "4    fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...   \n",
+       "..                                                 ...   \n",
+       "395  co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...   \n",
+       "396  co-5RAWYHYQ,The Growers Circle - Double Down -...   \n",
+       "397  md-1195389,Blue Dream Roll Your Own Sugar Shak...   \n",
+       "398  co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...   \n",
+       "399  co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...   \n",
+       "\n",
+       "                                            completion  \\\n",
+       "0              Chill Medicated,Edible,Beverage,nan,nan   \n",
+       "1                    Feelz,Flower,Bud,Space Cowboy,3.5   \n",
+       "2                    Champelli,Flower,Bud,Xclusivo,3.5   \n",
+       "3                          CAM,Flower,Bud,Mellowz #7,7   \n",
+       "4    Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...   \n",
+       "..                                                 ...   \n",
+       "395                 Minntz,Preroll,Joint,Emerald Cut,1   \n",
+       "396      The Growers Circle,Flower,Bud,Double Down,3.5   \n",
+       "397                    Pacific Stone,Flower,Bud,nan,14   \n",
+       "398  The Growers Circle,Preroll,Joint,Zoo Dawg x Co...   \n",
+       "399                Flight Bites,Edible,Gummies,nan,nan   \n",
+       "\n",
+       "                                         id           object       created  \\\n",
+       "0    chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg  chat.completion  1.687805e+09   \n",
+       "1    chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v  chat.completion  1.687805e+09   \n",
+       "2    chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR  chat.completion  1.687805e+09   \n",
+       "3    chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9  chat.completion  1.687805e+09   \n",
+       "4    chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw  chat.completion  1.687805e+09   \n",
+       "..                                      ...              ...           ...   \n",
+       "395  chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk  chat.completion  1.687829e+09   \n",
+       "396  chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ  chat.completion  1.687829e+09   \n",
+       "397  chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO  chat.completion  1.687829e+09   \n",
+       "398  chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF  chat.completion  1.687829e+09   \n",
+       "399  chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra  chat.completion  1.687829e+09   \n",
+       "\n",
+       "           openai_model                                            choices  \\\n",
+       "0            gpt-4-0314  [<OpenAIObject at 0x7fcf7fde94e0> JSON: {\\n  \"...   \n",
+       "1            gpt-4-0314  [<OpenAIObject at 0x7fcf7f49d2b0> JSON: {\\n  \"...   \n",
+       "2            gpt-4-0314  [<OpenAIObject at 0x7fcf7e306890> JSON: {\\n  \"...   \n",
+       "3            gpt-4-0314  [<OpenAIObject at 0x7fcf7e33d940> JSON: {\\n  \"...   \n",
+       "4            gpt-4-0314  [<OpenAIObject at 0x7fcf7e306980> JSON: {\\n  \"...   \n",
+       "..                  ...                                                ...   \n",
+       "395  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
+       "396  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
+       "397  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
+       "398  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
+       "399  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
+       "\n",
+       "                                                 usage  \\\n",
+       "0    {\\n  \"prompt_tokens\": 54,\\n  \"completion_token...   \n",
+       "1    {\\n  \"prompt_tokens\": 51,\\n  \"completion_token...   \n",
+       "2    {\\n  \"prompt_tokens\": 71,\\n  \"completion_token...   \n",
+       "3    {\\n  \"prompt_tokens\": 49,\\n  \"completion_token...   \n",
+       "4    {\\n  \"prompt_tokens\": 59,\\n  \"completion_token...   \n",
+       "..                                                 ...   \n",
+       "395  {'prompt_tokens': 125, 'completion_tokens': 23...   \n",
+       "396  {'prompt_tokens': 123, 'completion_tokens': 22...   \n",
+       "397  {'prompt_tokens': 119, 'completion_tokens': 20...   \n",
+       "398  {'prompt_tokens': 133, 'completion_tokens': 32...   \n",
+       "399  {'prompt_tokens': 129, 'completion_tokens': 21...   \n",
+       "\n",
+       "                                            prediction  \n",
+       "0    Hello! It looks like you mentioned a product: ...  \n",
+       "1    Hello! It seems like you are referring to a pr...  \n",
+       "2    Hello! It seems like you're interested in the ...  \n",
+       "3    It seems like you are looking for information ...  \n",
+       "4    Hello! It seems like you are looking for infor...  \n",
+       "..                                                 ...  \n",
+       "395    Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T.  \n",
+       "396  The Growers Circle,Double Down,Indoor,3.5g,5RA...  \n",
+       "397  Pacific Stone,Sugar Shake,Blue Dream,Roll Your...  \n",
+       "398  Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ...  \n",
+       "399  Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ...  \n",
+       "\n",
+       "[400 rows x 12 columns]"
+      ]
+     },
+     "execution_count": 181,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 182,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "669"
+      ]
+     },
+     "execution_count": 182,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from util import compare_completion_and_prediction\n",
+    "\n",
+    "# Function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error\n",
+    "def get_num_correct(completion, prediction):\n",
+    "    try:\n",
+    "        return compare_completion_and_prediction(completion, prediction)['num_correct']\n",
+    "    except:\n",
+    "        return 0 # this will be the case when format is incorrect\n",
+    "        \n",
+    "# Apply get_num_correct function to predictions_df dataframe\n",
+    "predictions_df['num_correct'] = predictions_df.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)\n",
+    "predictions_df['num_correct'].sum() # out of 1000 possible correct predictions (20 samples * 5 cols per sample) * (2 system messages * 2 models * 5 n_shot values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 187,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "model          system_message  n_shot\n",
+       "gpt-3.5-turbo  1               0         0.00\n",
+       "                               1         0.00\n",
+       "               2               0         0.00\n",
+       "gpt-4          1               0         0.00\n",
+       "                               1         0.00\n",
+       "               2               0         0.00\n",
+       "gpt-3.5-turbo  1               2         0.24\n",
+       "               2               1         0.24\n",
+       "                               2         0.27\n",
+       "                               3         0.36\n",
+       "               1               3         0.40\n",
+       "                               5         0.44\n",
+       "gpt-4          2               2         0.45\n",
+       "               1               2         0.45\n",
+       "               2               1         0.47\n",
+       "gpt-3.5-turbo  2               5         0.56\n",
+       "gpt-4          1               3         0.62\n",
+       "               2               3         0.67\n",
+       "                               5         0.73\n",
+       "               1               5         0.79\n",
+       "Name: num_correct, dtype: float64"
+      ]
+     },
+     "execution_count": 187,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions_df.groupby(['model', 'system_message', 'n_shot'])['num_correct'].sum().sort_values() / 100 # out of 100 possible correct predictions (20 samples * 5 cols per sample)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 184,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_predictions_df.to_csv('../data/cookies_llm_eval_proc_preds.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
+       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
+       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
+       "    </style>\n",
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>▆▆▁▁▁▁▁▁▁▁█▄▁▁▁▁▁▁▁▃▁▁▁▆▂▆▃▅▄▅▆▄▃▁▁▁▁▁▁▁</td></tr><tr><td>usage/elapsed_time</td><td>▄▆▁▁▁▁▂▁▂▁█▃▁▁▁▂▁▁▂▁▁▁▁▄▂▄▂▃▃▄▅▂▁▁▁▁▂▁▁▁</td></tr><tr><td>usage/prompt_tokens</td><td>▁▁▂▂▄▄▆▅██▁▁▃▃▄▅▅██▁▁▃▃▁▁▁▁▁▁▂▁▂▁▄▄▆▆██▁</td></tr><tr><td>usage/total_tokens</td><td>▄▄▂▂▃▃▅▅█▇▆▃▂▂▄▅▅▇▇▂▁▃▂▄▂▄▂▄▃▄▄▃▂▄▃▅▆██▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>62</td></tr><tr><td>usage/elapsed_time</td><td>2.40086</td></tr><tr><td>usage/prompt_tokens</td><td>54</td></tr><tr><td>usage/total_tokens</td><td>116</td></tr></table><br/></div></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">rose-puddle-7</strong> at: <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a><br/>Synced 6 W&B file(s), 422 media file(s), 422 artifact file(s) and 0 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230626_114056-rbtf91s6/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "autolog.disable()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "kd-llm-dc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app/data_prep.ipynb ADDED Viewed

	@@ -0,0 +1,283 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## This notebook is to show how to load csv data and into jsonl format for the LLM data cleaner.\n",
+    "\n",
+    "First, we load the data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sku</th>\n",
+       "      <th>product_name (pos)</th>\n",
+       "      <th>brand (pos)</th>\n",
+       "      <th>product_category (pos)</th>\n",
+       "      <th>strain_name (pos)</th>\n",
+       "      <th>product_weight_grams (pos)</th>\n",
+       "      <th>brand (manual review)</th>\n",
+       "      <th>product_category (manual review)</th>\n",
+       "      <th>sub_product_category (manual review)</th>\n",
+       "      <th>strain_name (manual review)</th>\n",
+       "      <th>product_weight_grams (manual review)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>bl-842922110296</td>\n",
+       "      <td>STIIIZY - Birthday Cake Pod 1g</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>VAPE PENS 1G</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>STIIIZY</td>\n",
+       "      <td>Vape</td>\n",
+       "      <td>Vape</td>\n",
+       "      <td>Birthday Cake</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>co-6ARLLX12</td>\n",
+       "      <td>SMASH Hits - Hippie Slayer - Indoor - 1g</td>\n",
+       "      <td>SMASH Hits</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Hippie Slayer</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>SMASH Hits</td>\n",
+       "      <td>Preroll</td>\n",
+       "      <td>Joint</td>\n",
+       "      <td>Hippie Slayer</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>bl-090035986141</td>\n",
+       "      <td>Eighth Brothers - Black Jack 1g Preroll</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>PREROLLS</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Eighth Brothers</td>\n",
+       "      <td>Preroll</td>\n",
+       "      <td>Joint</td>\n",
+       "      <td>Black Jack</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>bl-850002822274</td>\n",
+       "      <td>GRIZZLY PEAK - Indica Bone 0.5g 7PK Prerolls</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>PREROLL PACKS</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>GRIZZLY PEAK</td>\n",
+       "      <td>Preroll</td>\n",
+       "      <td>Joint</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>co-76GP441T</td>\n",
+       "      <td>Minntz - Emerald Cut - Indoor - Joint - 1g</td>\n",
+       "      <td>Minntz</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Emerald Cut</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Minntz</td>\n",
+       "      <td>Preroll</td>\n",
+       "      <td>Joint</td>\n",
+       "      <td>Emerald Cut</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               sku                            product_name (pos) brand (pos)  \\\n",
+       "0  bl-842922110296                STIIIZY - Birthday Cake Pod 1g         NaN   \n",
+       "1      co-6ARLLX12      SMASH Hits - Hippie Slayer - Indoor - 1g  SMASH Hits   \n",
+       "2  bl-090035986141       Eighth Brothers - Black Jack 1g Preroll         NaN   \n",
+       "3  bl-850002822274  GRIZZLY PEAK - Indica Bone 0.5g 7PK Prerolls         NaN   \n",
+       "4      co-76GP441T    Minntz - Emerald Cut - Indoor - Joint - 1g      Minntz   \n",
+       "\n",
+       "  product_category (pos) strain_name (pos)  product_weight_grams (pos)  \\\n",
+       "0           VAPE PENS 1G               NaN                         1.0   \n",
+       "1                    NaN     Hippie Slayer                         NaN   \n",
+       "2               PREROLLS               NaN                         NaN   \n",
+       "3          PREROLL PACKS               NaN                         NaN   \n",
+       "4                    NaN       Emerald Cut                         NaN   \n",
+       "\n",
+       "  brand (manual review) product_category (manual review)  \\\n",
+       "0               STIIIZY                             Vape   \n",
+       "1            SMASH Hits                          Preroll   \n",
+       "2       Eighth Brothers                          Preroll   \n",
+       "3          GRIZZLY PEAK                          Preroll   \n",
+       "4                Minntz                          Preroll   \n",
+       "\n",
+       "  sub_product_category (manual review) strain_name (manual review)  \\\n",
+       "0                                 Vape               Birthday Cake   \n",
+       "1                                Joint               Hippie Slayer   \n",
+       "2                                Joint                  Black Jack   \n",
+       "3                                Joint                         NaN   \n",
+       "4                                Joint                 Emerald Cut   \n",
+       "\n",
+       "  product_weight_grams (manual review)  \n",
+       "0                                    1  \n",
+       "1                                    1  \n",
+       "2                                    1  \n",
+       "3                                  3.5  \n",
+       "4                                    1  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Load tab-delimited file into pandas dataframe\n",
+    "cookies = pd.read_csv('../data/Cookies-AI-Gold-Standard - Cookies-AI-Gold-Standard.csv', sep=',')\n",
+    "\n",
+    "cookies.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Data Preparation\n",
+    "We transform the dataset into a pandas dataframe, with a column for prompt and completion.\n",
+    "\n",
+    "The prompt contains the \"dirty\" columns, and completion contains the \"cleaned\" columns."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import Dataset, DatasetDict\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "# split the dataset into train, val and test datasets 80/20\n",
+    "cookies_train, cookies_test = train_test_split(cookies, test_size=0.20, random_state=42)\n",
+    "\n",
+    "# list of input and output columns\n",
+    "input_columns  = ['sku','product_name (pos)','brand (pos)','product_category (pos)','strain_name (pos)','product_weight_grams (pos)']\n",
+    "output_columns = ['brand (manual review)','product_category (manual review)','sub_product_category (manual review)','strain_name (manual review)','product_weight_grams (manual review)']\n",
+    "\n",
+    "# functtion to convert pandas dataframe row to csv string\n",
+    "def row_to_csv(row):\n",
+    "    csv_string = ','.join(str(value) for value in row.values)\n",
+    "    return csv_string\n",
+    "\n",
+    "# create dataframe with prompt and completion columns\n",
+    "\n",
+    "# apply row_to_csv function to each row of the training dataframe\n",
+    "input_rows  = cookies_train[input_columns ].apply(row_to_csv, axis=1)\n",
+    "output_rows = cookies_train[output_columns].apply(row_to_csv, axis=1)\n",
+    "\n",
+    "# create dataframe with prompt and completion columns for training dataset\n",
+    "prompt_df = pd.DataFrame(\n",
+    "    zip(input_rows,\n",
+    "        output_rows)\n",
+    "    , columns = ['prompt','completion'])\n",
+    "\n",
+    "# save dataframe to jsonl file for training\n",
+    "prompt_df.to_json(\"../data/cookies_train.jsonl\", orient='records', lines=True)\n",
+    "\n",
+    "# apply row_to_csv function to each row of the test dataframe\n",
+    "input_test_rows  = cookies_test[input_columns ].apply(row_to_csv, axis=1)\n",
+    "output_test_rows = cookies_test[output_columns].apply(row_to_csv, axis=1)\n",
+    "\n",
+    "# create dataframe with prompt and completion columns for test dataset\n",
+    "test_df = pd.DataFrame(\n",
+    "    zip(input_test_rows,\n",
+    "        output_test_rows)\n",
+    "    , columns = ['prompt','completion'])\n",
+    "test_df.head()\n",
+    "\n",
+    "# save dataframe to jsonl file for test\n",
+    "test_df.to_json(\"../data/cookies_test.jsonl\", orient='records', lines=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# write a function that samples n rows from a jsonl file\n",
+    "def sample_jsonl(path_or_buf='../data/cookies_train.jsonl',n_samples=5):    \n",
+    "    jsonObj = pd.read_json(path_or_buf=path_or_buf, lines=True)\n",
+    "    return jsonObj.sample(n_samples, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write a function that adds prompt and completion samples to messages\n",
+    "def add_samples(messages, n_samples=None):\n",
+    "    if n_samples is None:\n",
+    "        return messages\n",
+    "    samples = sample_jsonl(n_samples=n_samples)\n",
+    "    for i in range(n_samples):\n",
+    "        messages.append({\"role\": \"user\", \"content\": samples.iloc[i]['prompt']})\n",
+    "        messages.append({\"role\": \"assistant\", \"content\": samples.iloc[i]['completion']})\n",
+    "    return messages"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app/openai_chat_completion.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+import pandas as pd
+from dotenv import load_dotenv
+load_dotenv()
+import openai
+# # set OPENAI_API_KEY environment variable from .env file
+# openai.api_key = os.getenv("OPENAI_API_KEY")
+# # read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message
+# system_message = open('../prompts/gpt4-system-message.txt', 'r').read()
+class OpenAIChatCompletions:
+    def __init__(self, model="gpt-4", system_message=None):
+        self.model = model
+        self.system_message = system_message
+    # function to input args such as model, prompt, etc. and return completion
+    def openai_chat_completion(self, prompt, n_shot=None):
+        messages = [{"role": "system", "content": self.system_message}] if self.system_message else []
+        # add n_shot number of samples to messages list ... if n_shot is None, then only system_message and prompt will be added to messages list
+        if n_shot is not None:
+            messages = self._add_samples(messages, n_samples=n_shot)
+        messages.append({"role": "user", "content": prompt})
+        # set up the API request parameters for OpenAI
+        chat_request_kwargs = dict(
+            model=self.model,
+            messages=messages,
+        )
+        # make the API request to OpenAI
+        response = openai.ChatCompletion.create(**chat_request_kwargs)
+        # return only the completion text
+        # return response['choices'][0]['message']['content']
+        # return response
+        return response
+    # function to use test data to predict completions
+    def predict_jsonl(self, path_or_buf='../data/cookies_test.jsonl', n_samples=None, n_shot=None):
+        jsonObj = pd.read_json(path_or_buf=path_or_buf, lines=True)
+        if n_samples is not None:
+            jsonObj = jsonObj.sample(n_samples, random_state=42)
+        iter_range = range(len(jsonObj))
+        prompts = [jsonObj.iloc[i]['prompt'] for i in iter_range]
+        completions = [jsonObj.iloc[i]['completion'] for i in iter_range]
+        predictions = [self.openai_chat_completion(prompt, n_shot=n_shot) for prompt in prompts]
+        return prompts, completions, predictions
+    # a method that adds prompt and completion samples to messages
+    @staticmethod
+    def _add_samples(messages, n_samples=None):
+        if n_samples is None:
+            return messages
+        samples = OpenAIChatCompletions._sample_jsonl(n_samples=n_samples)
+        for i in range(n_samples):
+            messages.append({"role": "user", "content": samples.iloc[i]['prompt']})
+            messages.append({"role": "assistant", "content": samples.iloc[i]['completion']})
+        return messages
+    # a method that samples n rows from a jsonl file, returning a pandas dataframe
+    @staticmethod
+    def _sample_jsonl(path_or_buf='../data/cookies_train.jsonl', n_samples=5):
+        jsonObj = pd.read_json(path_or_buf=path_or_buf, lines=True)
+        return jsonObj.sample(n_samples, random_state=42)

app/test.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from openai_chat_completion import OpenAIChatCompletions
+# read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message
+system_message = open('../prompts/gpt4-system-message.txt', 'r').read()
+# create an instance of the OpenAIChatCompletions class and use the openai_chat_completion method to get chat completions
+chat_completions = OpenAIChatCompletions(model="gpt-4", system_message=system_message)
+# prompt = "co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan"
+# completion = chat_completions.openai_chat_completion(prompt, n_shot=1)
+# print(completion)
+prompts, completions, predictions = chat_completions.predict_jsonl(n_samples=2, n_shot=2)
+print("Prompts:", prompts)
+print("Completions:", completions)
+print("Predictions:", predictions)

app/util.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# write a function that compares the completion and prediction, separating each string by comma into their respective columns, then compare each column and return a dataframe with the results
+def compare_completion_and_prediction(completion, prediction, verbose=False):
+    # if verbose is True, print the completion and prediction strings
+    if verbose:
+        print("Completion:", completion, f"type({type(completion)}):")
+        print("Prediction:", prediction, f"type({type(prediction)}):")
+    # split completion and prediction strings on comma character
+    completion = completion.split(',')
+    prediction = prediction.split(',')
+    # create a column that counts the number of matchs between completion and prediction
+    matches = [completion[i] == prediction[i] for i in range(len(completion))]
+    # create a json dictionary with the completion, prediction, matches, and num_correct fields
+    json_dict = {
+        "completion": completion,
+        "prediction": prediction,
+        "matches": matches,
+        "num_correct": sum(matches)
+    }
+    # return the json dictionary
+    return json_dict

app/wandb/run-20230626_114056-rbtf91s6/files/conda-environment.yaml ADDED Viewed

	@@ -0,0 +1,114 @@

+name: kd-llm-dc
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - asttokens=2.2.1=pyhd8ed1ab_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=pyhd8ed1ab_3
+  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2023.5.7=hbcca054_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - entrypoints=0.4=pyhd8ed1ab_0
+  - executing=1.2.0=pyhd8ed1ab_0
+  - ipykernel=6.15.0=pyh210e3f2_0
+  - ipython=8.14.0=pyh41d4057_0
+  - jedi=0.18.2=pyhd8ed1ab_0
+  - jupyter_client=7.3.4=pyhd8ed1ab_0
+  - jupyter_core=5.3.1=py310hff52083_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libsodium=1.0.18=h36c2ea0_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - matplotlib-inline=0.1.6=pyhd8ed1ab_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.5.6=pyhd8ed1ab_0
+  - openssl=3.0.8=h7f8727e_0
+  - packaging=23.1=pyhd8ed1ab_0
+  - parso=0.8.3=pyhd8ed1ab_0
+  - pexpect=4.8.0=pyh1a96a4e_2
+  - pickleshare=0.7.5=py_1003
+  - platformdirs=3.5.3=pyhd8ed1ab_0
+  - prompt-toolkit=3.0.38=pyha770c72_0
+  - prompt_toolkit=3.0.38=hd8ed1ab_0
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - pygments=2.15.1=pyhd8ed1ab_0
+  - python=3.10.11=h955ad1f_3
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python_abi=3.10=2_cp310
+  - readline=8.2=h5eee18b_0
+  - six=1.16.0=pyh6c4a22f_0
+  - sqlite=3.41.2=h5eee18b_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - tk=8.6.12=h1ccaba5_0
+  - traitlets=5.9.0=pyhd8ed1ab_0
+  - typing-extensions=4.6.3=hd8ed1ab_0
+  - typing_extensions=4.6.3=pyha770c72_0
+  - wcwidth=0.2.6=pyhd8ed1ab_0
+  - xz=5.4.2=h5eee18b_0
+  - zeromq=4.3.4=h9c3ff4c_1
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+    - aiohttp==3.8.4
+    - aiosignal==1.3.1
+    - appdirs==1.4.4
+    - async-timeout==4.0.2
+    - attrs==23.1.0
+    - certifi==2023.5.7
+    - charset-normalizer==3.1.0
+    - click==8.1.3
+    - datasets==2.13.1
+    - debugpy==1.5.1
+    - dill==0.3.6
+    - docker-pycreds==0.4.0
+    - docopt==0.6.2
+    - filelock==3.12.2
+    - frozenlist==1.3.3
+    - fsspec==2023.6.0
+    - gitdb==4.0.10
+    - gitpython==3.1.31
+    - huggingface-hub==0.15.1
+    - idna==3.4
+    - joblib==1.2.0
+    - jupyter-core==5.3.1
+    - multidict==6.0.4
+    - multiprocess==0.70.14
+    - numpy==1.24.3
+    - openai==0.27.8
+    - pandas==2.0.2
+    - pathtools==0.1.2
+    - pip==23.1.2
+    - pipreqs==0.4.13
+    - protobuf==4.23.3
+    - psutil==5.9.0
+    - pyarrow==12.0.1
+    - python-dotenv==1.0.0
+    - pytz==2023.3
+    - pyyaml==6.0
+    - pyzmq==25.1.0
+    - requests==2.31.0
+    - scikit-learn==1.2.2
+    - scipy==1.10.1
+    - sentry-sdk==1.25.1
+    - setproctitle==1.3.2
+    - setuptools==67.8.0
+    - smmap==5.0.0
+    - tenacity==8.2.2
+    - threadpoolctl==3.1.0
+    - tornado==6.1
+    - tqdm==4.65.0
+    - tzdata==2023.3
+    - urllib3==2.0.3
+    - wandb==0.15.4
+    - wheel==0.38.4
+    - xxhash==3.2.0
+    - yarg==0.1.9
+    - yarl==1.9.2
+prefix: /home/cmagganas/anaconda3/envs/kd-llm-dc

app/wandb/run-20230626_114056-rbtf91s6/files/config.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.10.11
+    cli_version: 0.15.4
+    is_jupyter_run: true
+    is_kaggle_kernel: false
+    start_time: 1687804857.029691
+    t:
+      1:
+      - 55
+      - 95
+      2:
+      - 55
+      - 95
+      3:
+      - 2
+      - 7
+      - 23
+      - 51
+      4: 3.10.11
+      5: 0.15.4
+      8:
+      - 1
+      - 5
+    m:
+    - 1: _timestamp
+    - 1: usage/total_tokens
+      5: 1
+      6:
+      - 1
+      - 3
+    - 1: usage/prompt_tokens
+      5: 1
+      6:
+      - 1
+      - 3
+    - 1: usage/completion_tokens
+      5: 1
+      6:
+      - 1
+      - 3
+    - 1: usage/elapsed_time
+      5: 1
+      6:
+      - 1
+      - 3

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_0_75f83ac87fa164c83b8e.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_100_f673fdf8da564f8db616.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_101_df8f84304e946465232c.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_102_bf3b4ae7fed0ee456ca9.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_103_9784775529cd9b0c478d.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_104_b69d429713afe585607d.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_105_8f9030467825d6eb9154.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_106_975584bb2f8ac7976684.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_107_353750bd44b04ac7162b.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle Pupil - Indoor - Blunt - 1g,Ocean Breeze Cultivators,nan,Truffle Pupil,nan\n", "\n\n**assistant**: Hello! It seems like you're looking for information on a product called \"Truffle Pupil\" by Ocean Breeze Cultivators. It is a 1g Indoor Blunt.\n\nUnfortunately, I don't have sufficient information about this specific product. However, I can tell you a little about Ocean Breeze Cultivators and general tips for smoking a blunt.\n\nOcean Breeze Cultivators is a cannabis company that grows and produces various strains of cannabis products. The \"Truffle Pupil\" is likely to be one of their strain offerings. The effects and potency of this specific strain might vary, so it's recommended that you consult the packaging or dispensary for more information on THC and CBD percentages.\n\nWhen smoking a blunt, it's important to take your time and enjoy the experience. Start by inhaling slowly and be cautious not to take in too much smoke at once. Take breaks between puffs and ensure you have water or another beverage handy to stay hydrated. Also, remember that sharing a blunt with others can carry the risk of spreading germs, so it's best to enjoy it individually, especially during the ongoing pandemic.\n\nPlease let me know if you have any other questions or need information on any other topic. I'm here to help!\n", "gpt-4-0314", "2023-06-26T11:54:00", "2023-06-26T11:54:34.819720", "chatcmpl-7VldYbCjPPC839qSooOjqtjTYyjo7", "openai", "rbtf91s6", 34.81971949600029, 67, 253, 320]]}

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_108_d4754b6aea82a8c16ed9.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_109_bcb8cb29447c99155339.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_10_d888c45dceed81f58d7a.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_110_6b738aa2166ace2f9d56.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-6GGKA0GV,Cookies x The Grower Circle - TangEray - Indoor - Infused Joint - 1g,nan,nan,nan,nan\n", "\n\n**assistant**: It seems you are looking for information on TangEray, an indoor infused joint by Cookies x The Grower Circle. TangEray is a 1-gram cannabis joint that provides a potent and flavorful experience.\n\nUnfortunately, the information you provided is not enough for me to give a detailed response. Nonetheless, here's some basic information about infused joints and the two brands involved:\n\nInfused Joints: \nInfused joints, also known as pre-rolls, are pre-made cannabis joints that are filled with ground cannabis flower and often enhanced with cannabis concentrates, such as wax or oil. These concentrates are responsible for the \"infused\" label and can offer a stronger, more potent high compared to regular joints.\n\nCookies:\nCookies is a well-known cannabis lifestyle brand founded by rapper and entrepreneur Berner. They are recognized globally for their premium cannabis flower strains and innovative products. Cookies focuses on genetics, innovation, and the overall quality of their products.\n\nThe Grower Circle:\nThe Grower Circle is a group of expert cultivators and cannabis connoisseurs, dedicated to providing high-quality cannabis products. Their collaboration with Cookies further enhances the quality and experience of their cannabis products.\n\nAs for TangEray, it's an indoor, infused joint created as a collaboration between Cookies and The Grower Circle. To know more about it or its effects, you may want to visit a local dispensary or check online resources for more detailed information, keeping in mind the legal status of cannabis in your region.\n", "gpt-4-0314", "2023-06-26T11:55:04", "2023-06-26T11:55:32.640890", "chatcmpl-7VleaFAnv4qWX3G29gNiEAsR4IKVh", "openai", "rbtf91s6", 28.64088961699963, 57, 300, 357]]}

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_111_5aed3974ba4952f876eb.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-7YGGNHBR,Kanha - Nano Blood Orange Bliss Indica - Gummy - 10ct - 100mg,Kanha,nan,nan,nan\n", "\n\n**assistant**: Hello! It looks like you're interested in the Kanha Nano Blood Orange Bliss Indica Gummy. Below, you'll find some information about this product:\n\n- Brand: Kanha\n- Type: Indica Gummy\n- Flavor: Blood Orange Bliss\n- Serving Size: 10 gummies per pack\n- Total THC Content: 100mg\n- THC per gummy: 10mg\n- Key Feature: Nanomolecular Technology for faster, more efficient absorption\n\nKanha's Nano Blood Orange Bliss Indica Gummies are infused with high-quality cannabis oil and nanomolecular technology that provides a faster, more efficient absorption of THC. This means you'll feel the effects more quickly and may experience a smoother experience compared to traditional edibles. These gummies have a delicious blood orange flavor and are perfect for those looking for a relaxing and calming experience.\n\nPlease note that you should start with a low dosage and adjust as needed. It is always best to consult with a healthcare professional or a budtender for personalized advice based on your individual needs and tolerance. Keep in mind that the effects of edibles can vary greatly depending on factors like metabolism and body chemistry, so what works for someone else may not work the same way for you. Remember to enjoy responsibly and keep these gummies away from children.\n", "gpt-4-0314", "2023-06-26T11:55:33", "2023-06-26T11:56:08.063033", "chatcmpl-7Vlf3CbhKfGocBHh6oW4nUTbu69rd", "openai", "rbtf91s6", 35.06303332399966, 58, 266, 324]]}

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_112_3a7258bfdca8fcd1719b.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,Flower,nan,3.5\n", "\n\n**assistant**: Hello! It seems like you're interested in information about the product: BOBBI HYLL Indoor 3.5g - Fresca Flower.\n\nBOBBI HYLL's Indoor 3.5g is a high-quality cannabis flower product that offers an enjoyable and aromatic experience. As a 3.5g (1/8 oz) unit, this product is a convenient option for personal consumption or sharing with friends.\n\nThe strain featured in this product is Fresca, known for its fruity and refreshing aroma. This flower is typically associated with an uplifting and invigorating effect, making it a suitable choice for social or creative activities. Users of Fresca often report a sense of increased energy, focus, and a pleasant head high when consuming this strain. Its balanced profile of THC and other cannabinoids make it a viable option for both recreational and medicinal users.\n\nPlease note that effects may vary based on individual factors such as tolerance, dosage, and consumption methods. Always use cannabis products responsibly and consult a healthcare professional if you have any concerns or questions.\n\nEnjoy your BOBBI HYLL Indoor 3.5g Fresca Flower and let us know if you have any questions or need further assistance!\n", "gpt-4-0314", "2023-06-26T11:56:10", "2023-06-26T11:56:40.822175", "chatcmpl-7VlfehpIxe7ySnByC6Q9UGmf39tnm", "openai", "rbtf91s6", 30.822175170000264, 52, 241, 293]]}

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_113_71838fad78e890a3e4cd.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_114_200ab5992b87e0e3b2d4.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_115_3e8ce0d79ca9a77c577e.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-76GP441T,Minntz - Emerald Cut - Indoor - Joint - 1g,Minntz,nan,Emerald Cut,nan\n", "\n\n**assistant**: Hello! It looks like you're searching for information on Minntz Emerald Cut, a 1g indoor joint. Unfortunately, I can't provide specific details on this specific product as I am an AI language model and have limitations regarding real-time, updated product information.\n\nHowever, I can provide some general information about cannabis joints.\n\nA joint is a rolled cannabis cigarette, typically containing around 1g of cannabis material. It is used for smoking and offers various effects depending on the strain used. An indoor-grown cannabis plant usually produces higher quality flowers due to the controlled environment it grows in, which may result in a smoother and more potent experience.\n\nEmerald Cut might be the name of the strain used in this joint. Cannabis strains vary in terms of flavor, aroma, and effects. Sativa-dominant strains tend to provide uplifting and energizing experiences, whereas Indica-dominant strains are likely to provide a more relaxing and calming effect. Hybrid strains are a mix of both, with effects depending on the specific combination of Sativa and Indica properties.\n\nIf you're interested in learning more about the specific Minntz Emerald Cut joint or any other cannabis-related topics, I would suggest contacting your local dispensary or exploring online resources dedicated to cannabis products and discussions.\n", "gpt-4-0314", "2023-06-26T11:57:11", "2023-06-26T11:57:35.146583", "chatcmpl-7VlgdhgYBSUuMB38xQt4lYHeb481r", "openai", "rbtf91s6", 24.146582954999758, 54, 256, 310]]}

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_116_27261d37b58e97119be1.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_117_8afbd8d9b801892f328e.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_118_f5e4033d82ffd310d340.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["request", "response", "model", "start_time", "end_time", "request_id", "api_type", "session_id", "elapsed_time", "prompt_tokens", "completion_tokens", "total_tokens"], "data": [["\n\n**system**: ../prompts/gpt4-system-message2.txt\n\n\n**user**: co-847ZXF37,The Grower Circle - Zoo Dawg x Cosa Nostra - Multi Joint - 5pk - 2.5g,nan,nan,nan,nan\n", "\n\n**assistant**: Hello! It seems like you are looking for information on the Grower Circle's \"Zoo Dawg x Cosa Nostra\" Multi-Joint 5-pack (2.5g total). While I couldn't find specific details about this product, I can provide some general information about its parent strains.\n\n- Zoo Dawg: This strain is a hybrid cannabis with genetic lineage from Zoo Keeper, Strawberry Glue, and Stardawg. Zoo Dawg is known for its relaxing and uplifting effects, making it suitable for stress relief and inducing a positive mood.\n\n- Cosa Nostra: This is an indica-dominant strain with a combination of the genetics from Peyote Purple and SFV OG Kush F4. It is known for its potent effects, providing relaxation, and helping relieve pain, anxiety, and insomnia.\n\nThis Multi-Joint 5-pack would be a combination of these strains, so expect a hybrid experience, which may provide a balanced combination of relaxation, stress relief, and uplifting mood. Please note that each user's experience may vary since individual reactions to cannabis products depend on a variety of factors. Remember to consume cannabis products responsibly and consult with a medical professional if you have any concerns or are using cannabis for medical purposes.\n", "gpt-4-0314", "2023-06-26T11:58:13", "2023-06-26T11:58:35.308054", "chatcmpl-7VlhdjuABCp8pq7UA9hP1nNXE2Cv2", "openai", "rbtf91s6", 22.308054483999513, 62, 250, 312]]}

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_119_6d43ea3093b2b3ecadd6.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_11_81ba18437deae19f6d25.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_120_628c0792a073cd9c405c.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_121_b556a799b6d3b562770d.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_122_b10f20986ab3c17d78d0.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_123_ec51ba8e67e34ec07ce2.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_124_c2c63679dc07c9c48315.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_125_4d67abd74b85c3978623.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_126_8815035ed0a50d11f9e3.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_127_98fa95995325ab388230.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_128_a77f4efc17f0b058d06b.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_129_4ed9178a8330c3b08b77.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_12_418719b4fcd204ed66d8.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_130_691215590d7f6b91716f.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_131_abafdaca35ad3cbbc1f4.table.json ADDED Viewed

	@@ -0,0 +1 @@

app/wandb/run-20230626_114056-rbtf91s6/files/media/table/stats_132_1b1d305dd76406f7892d.table.json ADDED Viewed

	@@ -0,0 +1 @@