{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\FYP\\lilt-app-without-fd\\lilt-env\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from transformers import LiltModel, AutoTokenizer, LiltForTokenClassification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download tokenizer" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('models/lilt-tokenizer\\\\tokenizer_config.json',\n", " 'models/lilt-tokenizer\\\\special_tokens_map.json',\n", " 'models/lilt-tokenizer\\\\tokenizer.json')" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TOKENIZER = 'nielsr/lilt-xlm-roberta-base'\n", "tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)\n", "save_dir = 'models/lilt-tokenizer'\n", "tokenizer.save_pretrained(save_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download and save token classification model" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# download the model\n", "MODEL = \"pierreguillou/lilt-xlm-roberta-base-finetuned-funsd-iob-original\"\n", "model = LiltForTokenClassification.from_pretrained(MODEL)\n", "\n", "# save the model\n", "save_dir = \"models/lilt-ser-iob\"\n", "model.save_pretrained(save_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download and save RE model" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading config.json: 100%|██████████| 794/794 [00:00<00:00, 61.2kB/s]\n", "d:\\FYP\\lilt-app-without-fd\\lilt-env\\lib\\site-packages\\huggingface_hub\\file_download.py:133: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\Gihantha Kavishka\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co./docs/huggingface_hub/how-to-cache#limitations.\n", "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", " warnings.warn(message)\n", "Downloading pytorch_model.bin: 100%|██████████| 1.15G/1.15G [08:10<00:00, 2.34MB/s]\n", "Some weights of the model checkpoint at kavg/layoutxlm-finetuned-xfund-fr-re were not used when initializing LiltModel: ['extractor.rel_classifier.linear.weight', 'extractor.entity_emb.weight', 'extractor.ffnn_tail.0.weight', 'extractor.ffnn_tail.3.bias', 'extractor.ffnn_head.3.weight', 'extractor.ffnn_head.0.weight', 'extractor.ffnn_tail.0.bias', 'extractor.ffnn_head.3.bias', 'extractor.rel_classifier.bilinear.weight', 'extractor.rel_classifier.linear.bias', 'extractor.ffnn_head.0.bias', 'extractor.ffnn_tail.3.weight']\n", "- This IS expected if you are initializing LiltModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing LiltModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of LiltModel were not initialized from the model checkpoint at kavg/layoutxlm-finetuned-xfund-fr-re and are newly initialized: ['lilt.pooler.dense.bias', 'lilt.pooler.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "# download the model\n", "MODEL = 'kavg/layoutxlm-finetuned-xfund-fr-re'\n", "model = LiltModel.from_pretrained(MODEL)\n", "\n", "# save the model\n", "save_dir = \"models/lilt-re\"\n", "model.save_pretrained(save_dir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "lilt-env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.8" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }