diff --git "a/HINDI_GEMMA_9B_A30_all_evals.ipynb" "b/HINDI_GEMMA_9B_A30_all_evals.ipynb" new file mode 100644--- /dev/null +++ "b/HINDI_GEMMA_9B_A30_all_evals.ipynb" @@ -0,0 +1,23320 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JeByetuw4Z8p", + "outputId": "fde61f6f-ed78-4f1a-beb0-dd367acaf5b6" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.11/dist-packages (0.27.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (6.0.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub) (4.12.2)\n", + "Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface_hub) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface_hub) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface_hub) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface_hub) (2024.12.14)\n" + ] + } + ], + "source": [ + "!pip install huggingface_hub pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17, + "referenced_widgets": [ + "2421cfa9a9144c41bc5f224c881301bf", + "2761891d1c634d9f8bf27dd0aab76eaf", + "6dae21739f5a4bfdac454802312bf683", + "a28f00af23d645e8b1fc0b354c4d5119", + "c2967e079a2b488bb1d71043a3c41420", + "15129e0a895d44b4958218da9acb66b6", + "8fc3cca3ac724bddad7614d14d25d6db", + "8552c7e81b594cd9a40f2b8944d0105d", + "2fb9e64285ed4505b0113af49608c069", + "9a030c7fb9c34f41930935fdabc60231", + "cdf0c23a21aa4366830bfb4853075190", + "3d8eeec9495141d9bcacbc37af3193a8", + "8cf98f5d58b440cc97c721e36caa247b", + "598daf89a59a4b39af012af9ffbd0820", + "0a459ed31e524c838772f1c9e54c72a5", + "e532ff104dfd44288064f4e67f17478f", + "10961dbed61e4967bfd1d819c6480cf0", + "fd61e8c07d5741e1b03c75c32541f4d9", + "fc2140881d1f4a2a92aee7d48174c7f4", + "9d275d17747d479e9ec9d65d44432759" + ] + }, + "id": "-KBQcoiuz-0C", + "outputId": "84a00434-1838-4180-ced9-715d8f61b18f" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "VBox(children=(HTML(value='
\n", + " | Input | \n", + "Output | \n", + "LANG | \n", + "type | \n", + "subject_name | \n", + "topic; | \n", + "subject | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "Write a python function to find the index of s... | \n", + "import math \\r\\ndef find_Index(n): \\r\\n x =... | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1 | \n", + "Haemobilia is charactericed by all except ### ... | \n", + "D | \n", + "en | \n", + "NaN | \n", + "Surgery | \n", + "NaN | \n", + "NaN | \n", + "
2 | \n", + "Many people enjoy an adventure story. Others l... | \n", + "C | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
3 | \n", + "On their way driving to Los Angeles, Benjamin ... | \n", + "To find the average number of billboards seen ... | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
4 | \n", + "Nick is better at ping pong than Jason because... | \n", + "B | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
33767 | \n", + "If each spoonful of Mikey's honey cluster of o... | \n", + "5 | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
33768 | \n", + "How often can resolutions be approved for cont... | \n", + "The text does not specify how often resolution... | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
33769 | \n", + "Elizabeth Freeman was born about 1742 to Afric... | \n", + "B | \n", + "en | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
33770 | \n", + "कोलकाता के लिए 5-दिन की यात्रा योजना:\\n\\n**दिन... | \n", + "दिन 1: विक्टोरिया मेमोरियल, सेंट पॉल का कैथेड्... | \n", + "hi | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
33771 | \n", + "सभी मान खोजें $x > 4$ जो संतुष्ट करते हैं\\n\\[\\... | \n", + "दी गई समीकरण से,\\n\\[\\sqrt{x + 4 \\sqrt{x - 4}} ... | \n", + "hi | \n", + "Intermediate Algebra | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
33772 rows × 7 columns
\n", + "Step | \n", + "Training Loss | \n", + "
---|---|
1000 | \n", + "1.109300 | \n", + "
2000 | \n", + "1.022100 | \n", + "
" + ] + }, + "metadata": {} + } + ], + "source": [ + "trainer_stats = trainer.train()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xj6XdJWNF6Pb" + }, + "source": [ + "#**MODEL SAVING**" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "ULl7losCFj9L" + }, + "outputs": [], + "source": [ + "!wait" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 533, + "referenced_widgets": [ + "fc8d49b7f7fa46d5bffac0d4591a2fb3", + "5d50482eae954256b8810f5f68ce282f", + "d3eec490362446948471316f49d40395", + "41e103732216432fa35aba373732d49f", + "f7834cc170454c9eb649369d9d66dc27", + "15efb64d6bfd4ed99d297b86562237d8", + "4a66ffae89bf478094d0fc502d74c790", + "77c5c517a3c34ba0b3c577d21239b26a", + "6defda284abd4b3495fea860d50072e6", + "abd5c28eae8341a6849b0689a0a651ed", + "ff97c8e7cf55442d97486db4ae386c2d", + "8cac1e5604854b7680596913bb44ce55", + "efd12182263a4830831ffa2a05b1f861", + "906f2b7484664daebcecbfc83f486958", + "fef74e1a49004a90864ef09adb1b3e80", + "86e7960e21af4509b442938f6ac1959c", + "e277e5c6edc94fed9bdefe297bbcba88", + "63b878ef09ee4e318ae1c00526c0c4bb", + "07d3ad2afc804864bcdf07ab92aa0886", + "389bb24b86014dacb7d17bd8063f6fc0", + "4334f0155de2466b89881166376023a3", + "b376a415f7ac4fa0aeeb17006ab9dc23", + "4452bf8258bd474c8331452867b6b261", + "e54f6c84ab514df6960ef51d4c86a3b6", + "0092174d808c4fdab9179a312ced02a7", + "3d78d7a6b94e44078a6bacf562474d7b", + "becdebcdb6af435793deacf6ae27c25d", + "c8b3ee6292ff4747aa47e76349c81377", + "1a1a10dcb7d4464c8bfb9e2d5862f97d", + "f3334639ad154f1791fcdee59ebb3a50", + "ce947567e41549179da455f26c063f02", + "c26be0d639694d80897e20f2a54500d4", + "18c93e34d6834839bc770f8fe2412bb1", + "69e6a443d2114e52be185af5929ba94b", + "bd71131a20f84067aeb0bc5ffc547e23", + "0a9eaae6a6c14dc3aed02e4fc9918fa2", + "a8a399f12b264ead8011ad6518aa74f4", + "9a0f61c98fef4e99bcd5dc0e37c1dbda", + "70b6d3f86272419bb08ea754158fef11", + "284ac30b298e4308a42d5d0278fe8087", + "711c401a92f24cba9d583554770f85e9", + "cd4288ec433a48adb06dc1cd4c0ec49e", + "7b86c2659479439689e667f790f66696", + "d3932d1f31034a538488eb0a9ebcd7fd", + "e8060a5a831a4732837387a83fe233b8", + "8fef6350d80d4d84ab42ad174e8ce36f", + "22cc3bb5e7704d5c908b3b0a195bc831", + "a24a27c579364776923862e52b19cc07", + "447b687dc469441dbdde9b5848dbce83", + "377edac8efd346f082ed1a2afa61c6a5", + "b3f6c9de387244e8898ebba3a364ead2", + "b050503e320f476a9aacc853fafa8adc", + "763849d7a0de4de399bb3fe031216657", + "f5e1f07c5e4846658e333e23599266cd", + "fabeb1fa98db45f4af94f85140526db9", + "2070f3a57e3c4ae08ad57c4233cf16c1", + "b208364fb3264357aeabb99454d27480", + "269b1548b066423f9aeb003f0aa47edd", + "6705723ce8aa4121af49a17c7c2a1c9d", + "0d93f63b257b44adb897ebe23b6f875a", + "3c72365bb0174f9789d7f7b2deb70b59", + "67584143096d41b38d4f7ab3f92db3aa", + "2529063c45504b5d885b828a3d616271", + "e75234ebddac4639a139325b7cb0b2f3", + "b8d63ae359124f5ca95b9bea25c80cc6", + "3f5fd2ca812149a8bfad68a630fd6cb0", + "721d4ec0cb644e5aab4bf5ce07381343", + "341a2211913b418e9f4dfe5dc079036a", + "d934798a903b478592717c366d3ed641", + "d5775b066d1e47658ae95d5312b318ea", + "757184dede5f4e97b308450ea1793220", + "dc7b089120ed417096539f3705b252ca", + "8141ec5872624f8499b2f8c0cd8e6b1d", + "350d3ae6c5bb4748b4cbb1b2b43afb2a", + "5952d32741ee48aaaf81181bcb85a6e3", + "3b8ce9fbfffe409f9fc93be9f5196a72", + "8b0d10ed292d46238caa529e42a81006", + "68469fa201ab41b0b3cffc8939fec0f7", + "6c9e64b7eaf04a648f1f34723cc2311a", + "95a73264888a49daacd6a92cc8f5e7d1", + "0e5adda2074842f29028319579ba252f", + "a9d09fd3141c48ee86de9f74e35e6535", + "735ec7ef25ab4631ab78c5599ece3eff", + "3e39e1843d8e4216ba629ba710e4a16d", + "10b1b331b4254f0dbd6ab7e19a018b6b", + "41867ea7c1194499801d3ce70c19bcea", + "66a4facfd96d445dac46c52a9311e734", + "a5092594fb234ee6a02826856cfa7ae5" + ] + }, + "id": "cEGk9Cm1FmoH", + "outputId": "d17aa22c-56be-4b6e-8a9c-3f95d1ba8b4f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Unsloth: You are pushing to hub, but you passed your HF username = DrishtiSharma.\n", + "We shall truncate DrishtiSharma/HINDI-GEMMA-9B-A30 to HINDI-GEMMA-9B-A30\n", + "Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded\n", + "model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.\n", + "Unsloth: Will remove a cached repo with size 18.5G\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Unsloth: Merging 4bit and LoRA weights to 16bit...\n", + "Unsloth: Will use up to 48.85 out of 83.48 RAM for saving.\n", + "Unsloth: Saving model... This might take 5 minutes ...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 62%|██████▏ | 26/42 [00:00<00:00, 69.78it/s]\n", + "We will save to Disk and not RAM now.\n", + "100%|██████████| 42/42 [00:11<00:00, 3.72it/s]\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Unsloth: Saving tokenizer..." + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer.model: 0%| | 0.00/4.24M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "fc8d49b7f7fa46d5bffac0d4591a2fb3" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "8cac1e5604854b7680596913bb44ce55" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer.json: 0%| | 0.00/34.4M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "4452bf8258bd474c8331452867b6b261" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Done.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "model-00004-of-00004.safetensors: 0%| | 0.00/3.67G [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "69e6a443d2114e52be185af5929ba94b" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Upload 4 LFS files: 0%| | 0/4 [00:00, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "e8060a5a831a4732837387a83fe233b8" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "model-00001-of-00004.safetensors: 0%| | 0.00/4.90G [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "2070f3a57e3c4ae08ad57c4233cf16c1" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "model-00003-of-00004.safetensors: 0%| | 0.00/4.96G [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "721d4ec0cb644e5aab4bf5ce07381343" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "model-00002-of-00004.safetensors: 0%| | 0.00/4.95G [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "68469fa201ab41b0b3cffc8939fec0f7" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Done.\n", + "Saved merged model to https://huggingface.co./DrishtiSharma/HINDI-GEMMA-9B-A30\n" + ] + } + ], + "source": [ + "if True: model.push_to_hub_merged(\"DrishtiSharma/HINDI-GEMMA-9B-A30\", tokenizer, save_method = \"merged_16bit\")\n", + "# USE YOU HF ACCOUNT AND NOT THE ORG\n", + "# NAME FORMAT : HINDI-MODELNAME-EXTENSION-A/B-00 - USE A/B is BENCHMARK DATASETS WERE SET TO TRUE/FALSE IN FLAGS , THE NUMBER IS THE RATIO USED ABOVE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U5gGbbSNF1oB" + }, + "outputs": [], + "source": [ + "# SET THE REPO PRIVATE AFTER UPLOAD\n", + "# UPLOAD THE CSV TO THE REPO AFTER IT IS MADE PRIVATE\n", + "# CSV >> THE CSV SAVED IN YOUR RUNTIME" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import runtime\n", + "runtime.unassign()" + ], + "metadata": { + "id": "brN9DtmHhYPS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "3BcP1kfkhcFb" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "2421cfa9a9144c41bc5f224c881301bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [], + "layout": "IPY_MODEL_8fc3cca3ac724bddad7614d14d25d6db" + } + }, + "2761891d1c634d9f8bf27dd0aab76eaf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8552c7e81b594cd9a40f2b8944d0105d", + "placeholder": "", + "style": "IPY_MODEL_2fb9e64285ed4505b0113af49608c069", + "value": "