{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "2fd77a529c20442ba1a39f16f675e93e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3e72d9a66cd34f5283f7bcfef1bc6d13", "IPY_MODEL_8d9ad47531bf4fb5bbc82d9586aff850", "IPY_MODEL_4872697db9a44af2a005748529cb7e2d" ], "layout": "IPY_MODEL_6fec737ad5f14c76b07ab307b7d697c9" } }, "3e72d9a66cd34f5283f7bcfef1bc6d13": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e4dd53be797549b2b3e6be8369fa6c66", "placeholder": "​", "style": "IPY_MODEL_cf54986f5f5d4ad8a7276fa483ef149c", "value": "Downloading (…)okenizer_config.json: 100%" } }, "8d9ad47531bf4fb5bbc82d9586aff850": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f50487faf7a0474eb2c63f77862addfe", "max": 401, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_06256cff6b5e4c79afd3c8bb25763d45", "value": 401 } }, "4872697db9a44af2a005748529cb7e2d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6491f8f9cf004b8bba9c3675aa393e5a", "placeholder": "​", "style": "IPY_MODEL_7716fc73b71d4de98ac6a0705adf7d58", "value": " 401/401 [00:00<00:00, 6.88kB/s]" } }, "6fec737ad5f14c76b07ab307b7d697c9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e4dd53be797549b2b3e6be8369fa6c66": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cf54986f5f5d4ad8a7276fa483ef149c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f50487faf7a0474eb2c63f77862addfe": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "06256cff6b5e4c79afd3c8bb25763d45": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "6491f8f9cf004b8bba9c3675aa393e5a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7716fc73b71d4de98ac6a0705adf7d58": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "cf7756937a184a828cedda1e13e02f71": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_478dd6db26604417abed6836f2051493", "IPY_MODEL_224a3b7985e84010820e58dbc9ee9bbd", "IPY_MODEL_4e2feb43eb7b4ea8926973c56eb39186" ], "layout": "IPY_MODEL_74e6fe7aa7484d5886b2f0d8b9ff9de3" } }, "478dd6db26604417abed6836f2051493": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_aba3fdcf58eb47908f416e6fe5702dbb", "placeholder": "​", "style": "IPY_MODEL_b6f963f5c4d343019266c2c5f9f3f8c0", "value": "Downloading (…)solve/main/vocab.txt: 100%" } }, "224a3b7985e84010820e58dbc9ee9bbd": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6b28ee37c9eb48969a85e3edf96a2c2d", "max": 1080667, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_08f70325b7ca49fdb79688c715b03aa8", "value": 1080667 } }, "4e2feb43eb7b4ea8926973c56eb39186": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_01311c9a0bcb438aab259bd027d5b9bd", "placeholder": "​", "style": "IPY_MODEL_da4d662aa62145a1b6ac7e29fcbc1308", "value": " 1.08M/1.08M [00:00<00:00, 12.8MB/s]" } }, "74e6fe7aa7484d5886b2f0d8b9ff9de3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "aba3fdcf58eb47908f416e6fe5702dbb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b6f963f5c4d343019266c2c5f9f3f8c0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6b28ee37c9eb48969a85e3edf96a2c2d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "08f70325b7ca49fdb79688c715b03aa8": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "01311c9a0bcb438aab259bd027d5b9bd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "da4d662aa62145a1b6ac7e29fcbc1308": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2317893fe74f4fc4bfc217ada67676fd": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_4e80f244dd0b409ea46f47095ac83c07", "IPY_MODEL_c8e42b4023cb42719d88f5f63c97d1b8", "IPY_MODEL_dc78c19250e54bd9948cba6dfa3ea117" ], "layout": "IPY_MODEL_feac6f8975674f8db5d95a685f0a72a9" } }, "4e80f244dd0b409ea46f47095ac83c07": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f8acfd3328274382b2ad2e6471afa9f5", "placeholder": "​", "style": "IPY_MODEL_3ce012e407f04776ad59baf1b2bf91c6", "value": "Downloading (…)/main/tokenizer.json: 100%" } }, "c8e42b4023cb42719d88f5f63c97d1b8": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ca0d5613b3584c1f8cfa0b21ba5a5953", "max": 1741842, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_2dbd9281e5ab4d689566517853e19c80", "value": 1741842 } }, "dc78c19250e54bd9948cba6dfa3ea117": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c6483eb694854303891a0b86d39ce957", "placeholder": "​", "style": "IPY_MODEL_260b98660dfa4274b1a93da4500ad582", "value": " 1.74M/1.74M [00:00<00:00, 33.4MB/s]" } }, "feac6f8975674f8db5d95a685f0a72a9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f8acfd3328274382b2ad2e6471afa9f5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3ce012e407f04776ad59baf1b2bf91c6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ca0d5613b3584c1f8cfa0b21ba5a5953": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2dbd9281e5ab4d689566517853e19c80": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "c6483eb694854303891a0b86d39ce957": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "260b98660dfa4274b1a93da4500ad582": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c158f0c3abf94caabb09d2738b770f46": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e6d9976c28a7437ebdd64c9c9fdaef2e", "IPY_MODEL_075871b1d7f14cd9a67d585bd3f5d3df", "IPY_MODEL_199d754496814e408dc8ff51da285ef1" ], "layout": "IPY_MODEL_cc4628a9e6a740608b7b4ecff510d656" } }, "e6d9976c28a7437ebdd64c9c9fdaef2e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a86d299047cf4270a4bee4ce69c9c99c", "placeholder": "​", "style": "IPY_MODEL_0d9e3940cdde4830a70b0f3f6d5ee5dd", "value": "Downloading (…)cial_tokens_map.json: 100%" } }, "075871b1d7f14cd9a67d585bd3f5d3df": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_38197716b32b491090ca10e6a43bb658", "max": 112, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ad6db74452b241c1b82168d24e68dddd", "value": 112 } }, "199d754496814e408dc8ff51da285ef1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f9a25f20611e479fb0fcffd7703d0974", "placeholder": "​", "style": "IPY_MODEL_6c574b7fa22742bb86a25a1a9ff390c1", "value": " 112/112 [00:00<00:00, 3.06kB/s]" } }, "cc4628a9e6a740608b7b4ecff510d656": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a86d299047cf4270a4bee4ce69c9c99c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0d9e3940cdde4830a70b0f3f6d5ee5dd": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "38197716b32b491090ca10e6a43bb658": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ad6db74452b241c1b82168d24e68dddd": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f9a25f20611e479fb0fcffd7703d0974": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6c574b7fa22742bb86a25a1a9ff390c1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d9f824e3f8b546bebbfae41189180815": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2df0d75f6ed8426bbfba1a7c83ec56f1", "IPY_MODEL_4295fe81d1194614abc17ae998cd4b19", "IPY_MODEL_4864984f6d4f4c6f9f75d86a8c5c0a41" ], "layout": "IPY_MODEL_48d3f5c0a6914642a8dcd1017b678ea3" } }, "2df0d75f6ed8426bbfba1a7c83ec56f1": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_25045072dde34a2b8d20fbea961b5dca", "placeholder": "​", "style": "IPY_MODEL_b037e76fb16b4d239cc3a619d97d1f24", "value": "Downloading (…)lve/main/config.json: 100%" } }, "4295fe81d1194614abc17ae998cd4b19": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7e7c2c2121d644de9163a47ebf9476cf", "max": 693, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8a5a7cf6528d4e7ab414e48e449d08cc", "value": 693 } }, "4864984f6d4f4c6f9f75d86a8c5c0a41": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_48b7d904baf849bfa465c98eb469636c", "placeholder": "​", "style": "IPY_MODEL_b23acfe1f248463897b2bdf6fbef37af", "value": " 693/693 [00:00<00:00, 32.1kB/s]" } }, "48d3f5c0a6914642a8dcd1017b678ea3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "25045072dde34a2b8d20fbea961b5dca": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b037e76fb16b4d239cc3a619d97d1f24": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7e7c2c2121d644de9163a47ebf9476cf": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8a5a7cf6528d4e7ab414e48e449d08cc": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "48b7d904baf849bfa465c98eb469636c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b23acfe1f248463897b2bdf6fbef37af": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7590f17564dc463481c18e4f67cf8497": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f24d68528d0542f9b62a22e398bf39b3", "IPY_MODEL_65f82163fa514acd818e0157558e394d", "IPY_MODEL_2f36340cc4a649998a4304e187448803" ], "layout": "IPY_MODEL_8e594e946da64734b229f286ff43d45d" } }, "f24d68528d0542f9b62a22e398bf39b3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_471e72546f6b4d84a90c85b03b0bdfb1", "placeholder": "​", "style": "IPY_MODEL_c0ea62c43e7e441090769cc925bbb699", "value": "Downloading model.safetensors: 100%" } }, "65f82163fa514acd818e0157558e394d": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_52d1799889d54ae8bac5782528995516", "max": 117529600, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_c2fdfb28e8e14fa88a957fd53df83cf2", "value": 117529600 } }, "2f36340cc4a649998a4304e187448803": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_efd7b0d174434e5b86f9cfe1f7f77239", "placeholder": "​", "style": "IPY_MODEL_3b598da65e2644d388933787519db6b8", "value": " 118M/118M [00:00<00:00, 151MB/s]" } }, "8e594e946da64734b229f286ff43d45d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "471e72546f6b4d84a90c85b03b0bdfb1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c0ea62c43e7e441090769cc925bbb699": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "52d1799889d54ae8bac5782528995516": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c2fdfb28e8e14fa88a957fd53df83cf2": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "efd7b0d174434e5b86f9cfe1f7f77239": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3b598da65e2644d388933787519db6b8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "b-6SobDpiU4c" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import requests\n", "from bs4 import BeautifulSoup\n", "import pandas as pd" ] }, { "cell_type": "code", "source": [ "pip install transformers" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "j7UIuaeBih3H", "outputId": "afc4f1cf-53af-4cf0-8cda-98176e759c0b" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting transformers\n", " Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", "Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)\n", " Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Collecting tokenizers<0.15,>=0.14 (from transformers)\n", " Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m36.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n", " Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n", "Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)\n", " Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n", "Installing collected packages: safetensors, huggingface-hub, tokenizers, transformers\n", "Successfully installed huggingface-hub-0.17.3 safetensors-0.4.0 tokenizers-0.14.1 transformers-4.35.0\n" ] } ] }, { "cell_type": "code", "source": [ "pip install transformers sentencepiece" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cCf8haNYiqki", "outputId": "f2e6362a-0619-49f5-f842-a215c2ff87e2" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.0)\n", "Collecting sentencepiece\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.17.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.15,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.14.1)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.0)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n", "Installing collected packages: sentencepiece\n", "Successfully installed sentencepiece-0.1.99\n" ] } ] }, { "cell_type": "markdown", "source": [ "Парсинг библиоглобус" ], "metadata": { "id": "GEIJILnxlAoj" } }, { "cell_type": "code", "source": [ "#Создаем пустые списки для сохранения данных\n", "data = []\n", "cur_item = []\n", "\n", "#Проходим циклом по всем страницам раздела на сайте Библио-глобуса\n", "for i in range(1, 137):\n", " print(i)\n", "\n", " url1 = f'https://www.biblio-globus.ru/catalog/category?id=226&page={i}&sort=0&instock=&isdiscount='\n", "\n", " #Берем страницу раздела по url\n", " r1 = requests.get(url1)\n", "\n", " #Превращаем страницу в объект супа\n", " soup1 = BeautifulSoup(r1.text, 'lxml')\n", "\n", " #Находим все книги на странице\n", " s1 = soup1.find_all(class_=\"product\")\n", "\n", " #В цикле пробегаем по всем найденным книгам\n", " for item in s1:\n", "\n", " #Создаем пустой список для сохранения данных о книге\n", " cur_item = []\n", "\n", " #Из объекта супа извлекаем ссылку на текущую книгу\n", " s2 = item.find('h3').find('a').get('href')\n", "\n", " #Соединяем ссылку и url сайта\n", " url2 = 'https://www.biblio-globus.ru' + s2\n", "\n", " #Сохраняем url книги\n", " cur_item.append(url2)\n", "\n", " #Получае страницу книги\n", " r2 = requests.get(url2)\n", "\n", " #Превращаем страницу в объект супа\n", " soup2 = BeautifulSoup(r2.text, 'lxml')\n", "\n", " #Сохраняем url обложки\n", " t4 = soup2.find('meta', attrs={'property': \"og:image\"})\n", " cur_item.append(t4['content'])\n", "\n", " #Сохраняем данные об авторе\n", " t2 = soup2.find('meta', attrs={'property': \"og:book:author\"})\n", " cur_item.append(t2['content'])\n", "\n", " #Сохраняем название книги\n", " t1 = soup2.find('meta', attrs={'property': 'og:title'})\n", " cur_item.append(t1['content'])\n", "\n", " #Сохраняем аннотацию к книге\n", " t3 = soup2.find(id=\"module\").find(id=\"collapseExample\").contents[0].strip()\n", " cur_item.append(t3)\n", "\n", " #Добавляем все данные в список списков питона\n", " data.append(cur_item)\n", "\n", "#Создаем из списка списков датафрейм с заданными названиями колонок\n", "columns = ['page_url', 'image_url', 'author', 'title', 'annotation']\n", "df = pd.DataFrame(data, columns=columns)\n", "\n", "#Сохраняем датафрейм в csv файл\n", "df.to_csv('classic.csv', index=False)" ], "metadata": { "id": "ZO0xMGwVjyZD" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "bookurl = requests.get(f'https://www.biblio-globus.ru/catalog/category?id=611b2&page={i}&sort=0&instock=&isdiscount=')" ], "metadata": { "id": "mqhqeAfgj-WF" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "url_list = []\n", "for id in range(1,122):\n", " i = id\n", " bookurl = requests.get(f'https://www.biblio-globus.ru/catalog/category?id=6112&page={i}&sort=0&instock=&isdiscount=')\n", " a = BeautifulSoup(bookurl.text, 'html.parser')\n", " all_page_songs = a.find_all('a', attrs = {'class' : 'img_link'})\n", " for item in all_page_songs:\n", " item_href = 'https://www.biblio-globus.ru' + item.get('href')\n", "\n", " url_list.append(item_href)" ], "metadata": { "id": "spctUaapj_XJ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "title_names = []\n", "images_urls = []\n", "anotation_list = []\n", "authors = []\n", "url_detect = []\n", "for url in url_list:\n", " get_url = requests.get(url)\n", " a = BeautifulSoup(get_url.text, 'html.parser')\n", " try:\n", " titles = a.find('h1',).text\n", " images = a.find('img', attrs = {'class' : 'product_photo zoomIn img-fluid'}).get('src')\n", " author = a.find('a', attrs = {'class' : 'scroll-to'}).text.strip()\n", " description = a.find('div', {'class': 'collapse', 'id': 'collapseExample'}).text.strip()\n", " characteristics = a.find('h3', text='Характеристики')\n", " if characteristics:\n", " next_sibling = characteristics.find_next_sibling()\n", " if next_sibling:\n", " description = description.split(characteristics.text, 1)[0].strip()\n", " if author == '' or titles == '' or description == '' or images == '':\n", " print(f'{url}')\n", " url_detect.append(url)\n", " else:\n", "\n", " images_urls.append(images)\n", " authors.append(author)\n", " anotation_list.append(description)\n", " title_names.append(titles)\n", " except (requests.RequestException, ValueError, AttributeError) as e:\n", " print(f\"Ошибка {url}\")\n", "\n", "for url in url_detect:\n", " url_list.remove(url)" ], "metadata": { "id": "eAYvZRRQkFpX" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "url_list.remove('https://www.biblio-globus.ru/product/10631772')" ], "metadata": { "id": "RVTpSrc6kIol" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df = pd.DataFrame({\n", " 'page_url': url_list,\n", " 'image_url': images_urls,\n", " 'author': authors,\n", " 'title': title_names,\n", " 'annotation': anotation_list\n", "})" ], "metadata": { "id": "IEUkEYdmkJFC" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import torch\n", "from transformers import AutoTokenizer, AutoModel\n", "tokenizer = AutoTokenizer.from_pretrained(\"cointegrated/rubert-tiny2\")\n", "model = AutoModel.from_pretrained(\"cointegrated/rubert-tiny2\")\n", "# model.cuda() # uncomment it if you have a GPU\n", "\n", "def embed_bert_cls(text, model, tokenizer):\n", " t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')\n", " with torch.no_grad():\n", " model_output = model(**{k: v.to(model.device) for k, v in t.items()})\n", " embeddings = model_output.last_hidden_state[:, 0, :]\n", " embeddings = torch.nn.functional.normalize(embeddings)\n", " return embeddings[0].cpu().numpy()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 209, "referenced_widgets": [ "2fd77a529c20442ba1a39f16f675e93e", "3e72d9a66cd34f5283f7bcfef1bc6d13", "8d9ad47531bf4fb5bbc82d9586aff850", "4872697db9a44af2a005748529cb7e2d", "6fec737ad5f14c76b07ab307b7d697c9", "e4dd53be797549b2b3e6be8369fa6c66", "cf54986f5f5d4ad8a7276fa483ef149c", "f50487faf7a0474eb2c63f77862addfe", "06256cff6b5e4c79afd3c8bb25763d45", "6491f8f9cf004b8bba9c3675aa393e5a", "7716fc73b71d4de98ac6a0705adf7d58", "cf7756937a184a828cedda1e13e02f71", "478dd6db26604417abed6836f2051493", "224a3b7985e84010820e58dbc9ee9bbd", "4e2feb43eb7b4ea8926973c56eb39186", "74e6fe7aa7484d5886b2f0d8b9ff9de3", "aba3fdcf58eb47908f416e6fe5702dbb", "b6f963f5c4d343019266c2c5f9f3f8c0", "6b28ee37c9eb48969a85e3edf96a2c2d", "08f70325b7ca49fdb79688c715b03aa8", "01311c9a0bcb438aab259bd027d5b9bd", "da4d662aa62145a1b6ac7e29fcbc1308", "2317893fe74f4fc4bfc217ada67676fd", "4e80f244dd0b409ea46f47095ac83c07", "c8e42b4023cb42719d88f5f63c97d1b8", "dc78c19250e54bd9948cba6dfa3ea117", "feac6f8975674f8db5d95a685f0a72a9", "f8acfd3328274382b2ad2e6471afa9f5", "3ce012e407f04776ad59baf1b2bf91c6", "ca0d5613b3584c1f8cfa0b21ba5a5953", "2dbd9281e5ab4d689566517853e19c80", "c6483eb694854303891a0b86d39ce957", "260b98660dfa4274b1a93da4500ad582", "c158f0c3abf94caabb09d2738b770f46", "e6d9976c28a7437ebdd64c9c9fdaef2e", "075871b1d7f14cd9a67d585bd3f5d3df", "199d754496814e408dc8ff51da285ef1", "cc4628a9e6a740608b7b4ecff510d656", "a86d299047cf4270a4bee4ce69c9c99c", "0d9e3940cdde4830a70b0f3f6d5ee5dd", "38197716b32b491090ca10e6a43bb658", "ad6db74452b241c1b82168d24e68dddd", "f9a25f20611e479fb0fcffd7703d0974", "6c574b7fa22742bb86a25a1a9ff390c1", "d9f824e3f8b546bebbfae41189180815", "2df0d75f6ed8426bbfba1a7c83ec56f1", "4295fe81d1194614abc17ae998cd4b19", "4864984f6d4f4c6f9f75d86a8c5c0a41", "48d3f5c0a6914642a8dcd1017b678ea3", "25045072dde34a2b8d20fbea961b5dca", "b037e76fb16b4d239cc3a619d97d1f24", "7e7c2c2121d644de9163a47ebf9476cf", "8a5a7cf6528d4e7ab414e48e449d08cc", "48b7d904baf849bfa465c98eb469636c", "b23acfe1f248463897b2bdf6fbef37af", "7590f17564dc463481c18e4f67cf8497", "f24d68528d0542f9b62a22e398bf39b3", "65f82163fa514acd818e0157558e394d", "2f36340cc4a649998a4304e187448803", "8e594e946da64734b229f286ff43d45d", "471e72546f6b4d84a90c85b03b0bdfb1", "c0ea62c43e7e441090769cc925bbb699", "52d1799889d54ae8bac5782528995516", "c2fdfb28e8e14fa88a957fd53df83cf2", "efd7b0d174434e5b86f9cfe1f7f77239", "3b598da65e2644d388933787519db6b8" ] }, "id": "Du37mn6NivtR", "outputId": "444a9c53-1c4a-4dd0-e9c5-2d08d32bd507" }, "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/401 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mfaiss\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'faiss'", "", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n" ], "errorDetails": { "actions": [ { "action": "open_url", "actionText": "Open Examples", "url": "/notebooks/snippets/importing_libraries.ipynb" } ] } } ] }, { "cell_type": "code", "source": [ "index = faiss.IndexFlatL2(d)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 182 }, "id": "5qFKN5YvUwH6", "outputId": "f0bc5e86-f903-4c6b-c2e2-f1a8b43ac182" }, "execution_count": 14, "outputs": [ { "output_type": "error", "ename": "NameError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfaiss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIndexFlatL2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'faiss' is not defined" ] } ] }, { "cell_type": "code", "source": [ "index.add(embeddings_list_np)" ], "metadata": { "id": "yWvfky-Ek3dl" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "faiss.write_index(index, \"faiss.index\")" ], "metadata": { "id": "S1CNWPZnk5pQ" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Объедение разных спаршенных разделов в один датафрейм" ], "metadata": { "id": "Tpq5oL-JlQrU" } }, { "cell_type": "code", "source": [ " a = pd.read_csv('/content/ezo_classic_sovr_proza_merged_200.csv')" ], "metadata": { "id": "VF212DH7g8sy" }, "execution_count": 19, "outputs": [] }, { "cell_type": "code", "source": [ "c = pd.read_csv('/content/data (2).csv')" ], "metadata": { "id": "lTHX8TKXhAXT" }, "execution_count": 21, "outputs": [] }, { "cell_type": "code", "source": [ "df_new = pd.concat([a, c], ignore_index=True)" ], "metadata": { "id": "-dqUPRTWhUDP" }, "execution_count": 25, "outputs": [] }, { "cell_type": "code", "source": [ "df_final = df_new[df_new['annotation'].apply(lambda x: len(str(x)) >= 200)]" ], "metadata": { "id": "tVA9CwYXigM9" }, "execution_count": 29, "outputs": [] }, { "cell_type": "code", "source": [ "df_final['embeddings'] = df_final['annotation'].apply(lambda x: embed_bert_cls(x, model, tokenizer))\n", "\n", "# Сохраняем результат в файл формата txt\n", "with open('embeddings.txt', 'w') as file:\n", " for emb in df_final['embeddings']:\n", " file.write(' '.join(map(str, emb)) + '\\n')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XqPKfysQh6Xq", "outputId": "3b804b98-8338-4dbb-f2c3-b0d0b39e2ebf" }, "execution_count": 33, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_final['embeddings'] = df_final['annotation'].apply(lambda x: embed_bert_cls(x, model, tokenizer))\n" ] } ] } ] }