{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\FYP\\lilt-app-without-fd\\lilt-env\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from transformers import LiltModel, AutoTokenizer, LiltForTokenClassification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download tokenizer" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('models/lilt-tokenizer\\\\tokenizer_config.json',\n", " 'models/lilt-tokenizer\\\\special_tokens_map.json',\n", " 'models/lilt-tokenizer\\\\tokenizer.json')" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TOKENIZER = 'nielsr/lilt-xlm-roberta-base'\n", "tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)\n", "save_dir = 'models/lilt-tokenizer'\n", "tokenizer.save_pretrained(save_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download and save token classification model" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading config.json: 100%|██████████| 1.13k/1.13k [00:00<00:00, 283kB/s]\n", "d:\\FYP\\lilt-app-without-fd\\lilt-env\\lib\\site-packages\\huggingface_hub\\file_download.py:133: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\Gihantha Kavishka\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co./docs/huggingface_hub/how-to-cache#limitations.\n", "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", " warnings.warn(message)\n", "Downloading model.safetensors: 100%|██████████| 1.13G/1.13G [08:02<00:00, 2.35MB/s]\n" ] } ], "source": [ "# download the model\n", "MODEL = \"kavg/LiLT-SER-Sin\"\n", "model = LiltForTokenClassification.from_pretrained(MODEL)\n", "\n", "# save the model\n", "save_dir = \"models/lilt-ser-iob\"\n", "model.save_pretrained(save_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download and save RE model" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from models import LiLTRobertaLikeForRelationExtraction\n", "\n", "# download the model\n", "MODEL = 'kavg/LiLT-RE-IT-Sin'\n", "model = LiLTRobertaLikeForRelationExtraction.from_pretrained(MODEL)\n", "\n", "# save the model\n", "save_dir = \"models/lilt-re\"\n", "model.save_pretrained(save_dir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "lilt-env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.8" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }