Spaces:

romnatall
/

film_recomendations

Sleeping

App Files Files Community

romnatall commited on Apr 19, 2024

Commit

0514b29

1 Parent(s): e633090

прогнозирование по трем моделям

Browse files

Files changed (5) hide show

app.py +50 -23
data/books_model (2).ipynb +620 -0
data/data.csv +2 -2
data/{embeddings.npy → embeddings_bert.npy} +2 -2
data/tf_idf_vectorizer.pkl +3 -0

app.py CHANGED Viewed

@@ -7,17 +7,27 @@ import torch
 from transformers import AutoTokenizer, AutoModel
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
-movies = pd.read_csv('data/data.csv')
 toggle_state = st.sidebar.checkbox("режим разметки")
 input_search = st.text_input('Search')
-data = np.load('data/embeddings.npy')
 @st.cache_resource
 def get_embeddings():
@@ -36,27 +46,50 @@ def embed_bert_cls(text, ):
     embeddings = torch.nn.functional.normalize(embeddings)
     return embeddings[0].cpu().numpy()
 @st.cache_data
 def predict_rating(input_search):
     emb = embed_bert_cls(input_search)
     X=np.column_stack((data, np.tile(emb, (data.shape[0], 1))))
-    st.session_state["X"]=X
-    # from catboost import CatBoostRanker
-    # cb= CatBoostRanker()
-    # cb.load_model('model.cbm')
-    # y = cb.predict(X)
-    # import pickle
-    # with open('logreg.pkl', 'rb') as f:
-    #     logreg = pickle.load(f)
-    # y = logreg.predict(X)
-    y= cosine_similarity(data, emb.reshape(1, -1)).reshape(-1)
-    return top_indices(y, 10)
 def saverank(index, new_X,new_y):
@@ -174,19 +207,13 @@ def getnums(df,size=0,text=''):
         return reqs[text]
-def top_indices(array, n):
-    # Получаем индексы элементов, отсортированных по убыванию
-    st.session_state["pred"] = array
-    sorted_indices = np.argsort(array)[::-1]
-    # Выбираем первые n индексов
-    top_n_indices = sorted_indices[:n]
-    return top_n_indices
-for i in predict_rating(input_search):
-    display_movie_card(movies, i )

 from transformers import AutoTokenizer, AutoModel
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.metrics.pairwise import pairwise_distances
+import faiss
+from sklearn.feature_extraction.text import TfidfVectorizer
+import pickle
+movies = pd.read_csv('data/data.csv')
 toggle_state = st.sidebar.checkbox("режим разметки")
 input_search = st.text_input('Search')
+data = np.load('data/embeddings_bert.npy')
+def top_indices(array, n,upsc=False):
+    # Получаем индексы элементов, отсортированных по убыванию
+    st.session_state["pred"] = array
+    sorted_indices = np.argsort(array)[::1 if upsc else -1]
+    # Выбираем первые n индексов
+    top_n_indices = sorted_indices[:n]
+    return top_n_indices
 @st.cache_resource
 def get_embeddings():
     embeddings = torch.nn.functional.normalize(embeddings)
     return embeddings[0].cpu().numpy()
+@st.cache_resource
+def getmodels():
+    with open('data/logreg.pkl', 'rb') as f:
+        logreg = pickle.load(f)
+    with open('data/tf_idf_vectorizer.pkl', 'rb') as f:
+        vectorizer = pickle.load(f)
+    return logreg, vectorizer
 @st.cache_data
 def predict_rating(input_search):
+    logreg, vectorizer=getmodels()
     emb = embed_bert_cls(input_search)
     X=np.column_stack((data, np.tile(emb, (data.shape[0], 1))))
+    user_tfidf = vectorizer.transform([input_search])
+    tfidf_matrix = vectorizer.transform(movies['description'])
+    tfidf_matrix2 = vectorizer.transform(movies['name'])
+    similarity_scores_desc = cosine_similarity(user_tfidf, tfidf_matrix)
+    similarity_scores_name = cosine_similarity(user_tfidf, tfidf_matrix2)
+    y_log = logreg.predict(X)
+    y_emb = cosine_similarity(data, emb.reshape(1, -1)).reshape(-1)
+    y=(similarity_scores_desc*0.9+similarity_scores_name*0.035+y_emb*.4+y_log*0.4).reshape(-1)
+    st.session_state["pred"]=y
+    return top_indices(y, 10,upsc=False)
 def saverank(index, new_X,new_y):
         return reqs[text]
+if input_search:
+    for i in predict_rating(input_search):
+        display_movie_card(movies, i )

data/books_model (2).ipynb ADDED Viewed

	@@ -0,0 +1,620 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "IlvYwT4VD8Bd"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Collecting sentence_transformers\n",
+            "  Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)\n",
+            "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (4.39.3)\n",
+            "Requirement already satisfied: tqdm in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (4.66.2)\n",
+            "Requirement already satisfied: torch>=1.11.0 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (2.2.2)\n",
+            "Requirement already satisfied: numpy in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (1.26.4)\n",
+            "Requirement already satisfied: scikit-learn in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (1.4.1.post1)\n",
+            "Requirement already satisfied: scipy in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (1.13.0)\n",
+            "Requirement already satisfied: huggingface-hub>=0.15.1 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (0.22.2)\n",
+            "Requirement already satisfied: Pillow in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sentence_transformers) (10.3.0)\n",
+            "Requirement already satisfied: filelock in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (3.13.3)\n",
+            "Requirement already satisfied: fsspec>=2023.5.0 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2024.3.1)\n",
+            "Requirement already satisfied: packaging>=20.9 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (24.0)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (6.0.1)\n",
+            "Requirement already satisfied: requests in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (2.31.0)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from huggingface-hub>=0.15.1->sentence_transformers) (4.10.0)\n",
+            "Requirement already satisfied: sympy in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (1.12)\n",
+            "Requirement already satisfied: networkx in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (3.2.1)\n",
+            "Requirement already satisfied: jinja2 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (3.1.3)\n",
+            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (8.9.2.26)\n",
+            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.3.1)\n",
+            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (11.0.2.54)\n",
+            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (10.3.2.106)\n",
+            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (11.4.5.107)\n",
+            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.0.106)\n",
+            "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (2.19.3)\n",
+            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence_transformers) (12.4.127)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (2023.12.25)\n",
+            "Requirement already satisfied: tokenizers<0.19,>=0.14 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.15.2)\n",
+            "Requirement already satisfied: safetensors>=0.4.1 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.4.2)\n",
+            "Requirement already satisfied: joblib>=1.2.0 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from scikit-learn->sentence_transformers) (1.3.2)\n",
+            "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from scikit-learn->sentence_transformers) (3.4.0)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from jinja2->torch>=1.11.0->sentence_transformers) (2.1.5)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (3.6)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (1.26.18)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from requests->huggingface-hub>=0.15.1->sentence_transformers) (2024.2.2)\n",
+            "Requirement already satisfied: mpmath>=0.19 in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from sympy->torch>=1.11.0->sentence_transformers) (1.3.0)\n",
+            "Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m566.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: sentence_transformers\n",
+            "Successfully installed sentence_transformers-2.7.0\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install sentence_transformers"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "id": "sdvkA3cwEVoZ"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Collecting faiss-cpu\n",
+            "  Downloading faiss_cpu-1.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
+            "Requirement already satisfied: numpy in /home/roma/anaconda3/envs/cv/lib/python3.12/site-packages (from faiss-cpu) (1.26.4)\n",
+            "Downloading faiss_cpu-1.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.0/27.0 MB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: faiss-cpu\n",
+            "Successfully installed faiss-cpu-1.8.0\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install faiss-cpu\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "Hv0VlA_ZAtjH"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "from sentence_transformers import SentenceTransformer\n",
+        "import faiss\n",
+        "import torch\n",
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 701
+        },
+        "id": "ENfI_teQBvxa",
+        "outputId": "a23427df-f5aa-40f0-ac35-2974803c66b4"
+      },
+      "outputs": [],
+      "source": [
+        "data = pd.read_csv('data.csv')\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "id": "uLOmQB8VP_rH"
+      },
+      "outputs": [],
+      "source": [
+        "data = data.sample(frac=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6eRXlMBBOyjr"
+      },
+      "outputs": [],
+      "source": [
+        "model = SentenceTransformer('distiluse-base-multilingual-cased')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 48,
+      "metadata": {
+        "id": "HkhTwHXtO5xk"
+      },
+      "outputs": [],
+      "source": [
+        "data['annotation_len'] = data['description'].apply(lambda x: len(str(x).split()) if pd.notnull(x) else 0)\n",
+        "\n",
+        "embedings =np.load('embeddings.npy')[data['annotation_len'] > 10]\n",
+        "data = data[data['annotation_len'] > 10] # Отсечение слишком коротких аннотаций\n",
+        "\n",
+        "\n",
+        "data.to_csv('data.csv')\n",
+        "np.save('embeddings.npy',embedings)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 45,
+      "metadata": {},
+      "outputs": [
+        {
+          "ename": "AttributeError",
+          "evalue": "'numpy.ndarray' object has no attribute 'to_csv'",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+            "Cell \u001b[0;32mIn[45], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_csv\u001b[49m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m      2\u001b[0m np\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124membeddings.npy\u001b[39m\u001b[38;5;124m'\u001b[39m,embedings)\n",
+            "\u001b[0;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'to_csv'"
+          ]
+        }
+      ],
+      "source": [
+        "data.to_csv('data.csv')\n",
+        "np.save('embeddings.npy',embedings)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 50,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>name</th>\n",
+              "      <th>description</th>\n",
+              "      <th>link</th>\n",
+              "      <th>year</th>\n",
+              "      <th>imdb</th>\n",
+              "      <th>kp</th>\n",
+              "      <th>country</th>\n",
+              "      <th>age</th>\n",
+              "      <th>actors</th>\n",
+              "      <th>genres</th>\n",
+              "      <th>poster</th>\n",
+              "      <th>annotation_len</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Уэнсдэй</td>\n",
+              "      <td>В американской хоррор-комедии показана детект...</td>\n",
+              "      <td>https://www.lordfilm.bot/48211-ujensdjej-2022....</td>\n",
+              "      <td>2022.0</td>\n",
+              "      <td>8.1</td>\n",
+              "      <td>8.0</td>\n",
+              "      <td>США</td>\n",
+              "      <td>0+</td>\n",
+              "      <td>Дженна Ортега, Гвендолин Кристи, Рики Линдхоум...</td>\n",
+              "      <td>Сериалы, Фильмы про подростков</td>\n",
+              "      <td>https://www.lordfilm.bot/uploads/posts/2022-12...</td>\n",
+              "      <td>157</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Слово пацана. Кровь на асфальте</td>\n",
+              "      <td>Перестройка уже шагнула с кремлевских трибун ...</td>\n",
+              "      <td>https://www.lordfilm.bot/50219-slovo-pacana-kr...</td>\n",
+              "      <td>2023.0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>Россия</td>\n",
+              "      <td>18+</td>\n",
+              "      <td>Иван Янковский, Елизавета Базыкина, Ольга Лапш...</td>\n",
+              "      <td>Сериалы, ru</td>\n",
+              "      <td>https://www.lordfilm.bot/uploads/posts/2023-11...</td>\n",
+              "      <td>150</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Элементарно</td>\n",
+              "      <td>В Городе Стихий обитатели огня, воды, земли и...</td>\n",
+              "      <td>https://www.lordfilm.bot/48863-jelementarno-20...</td>\n",
+              "      <td>2023.0</td>\n",
+              "      <td>7.0</td>\n",
+              "      <td>7.7</td>\n",
+              "      <td>США</td>\n",
+              "      <td>6+</td>\n",
+              "      <td>Леа Льюис, Мамуду Ати, Роналдо Дель Кармен, Ши...</td>\n",
+              "      <td>Мультфильмы</td>\n",
+              "      <td>https://www.lordfilm.bot/uploads/posts/2023-06...</td>\n",
+              "      <td>34</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Лука</td>\n",
+              "      <td>Свои незабываемые каникулы, в которых есть ме...</td>\n",
+              "      <td>https://www.lordfilm.bot/27172-luka-11-12-2021...</td>\n",
+              "      <td>2021.0</td>\n",
+              "      <td>7.4</td>\n",
+              "      <td>7.8</td>\n",
+              "      <td>США</td>\n",
+              "      <td>6+</td>\n",
+              "      <td>Джейкоб Тремблей, Джек Дилан Грейзер, Саша Бар...</td>\n",
+              "      <td>Мультфильмы</td>\n",
+              "      <td>https://www.lordfilm.bot/uploads/posts/2021-06...</td>\n",
+              "      <td>68</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Локи</td>\n",
+              "      <td>Увлекательные приключения скандинавского бога...</td>\n",
+              "      <td>https://www.lordfilm.bot/27119-loki-g1.html</td>\n",
+              "      <td>2021.0</td>\n",
+              "      <td>8.2</td>\n",
+              "      <td>7.7</td>\n",
+              "      <td>США</td>\n",
+              "      <td>0+</td>\n",
+              "      <td>Том Хиддлстон, Софи Ди Мартино, Ричард Э. Гран...</td>\n",
+              "      <td>Сериалы, Фильмы Marvel</td>\n",
+              "      <td>https://www.lordfilm.bot/uploads/posts/2023-10...</td>\n",
+              "      <td>162</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                              name  \\\n",
+              "0                          Уэнсдэй   \n",
+              "1  Слово пацана. Кровь на асфальте   \n",
+              "2                      Элементарно   \n",
+              "3                             Лука   \n",
+              "4                             Локи   \n",
+              "\n",
+              "                                         description  \\\n",
+              "0   В американской хоррор-комедии показана детект...   \n",
+              "1   Перестройка уже шагнула с кремлевских трибун ...   \n",
+              "2   В Городе Стихий обитатели огня, воды, земли и...   \n",
+              "3   Свои незабываемые каникулы, в которых есть ме...   \n",
+              "4   Увлекательные приключения скандинавского бога...   \n",
+              "\n",
+              "                                                link    year  imdb   kp  \\\n",
+              "0  https://www.lordfilm.bot/48211-ujensdjej-2022....  2022.0   8.1  8.0   \n",
+              "1  https://www.lordfilm.bot/50219-slovo-pacana-kr...  2023.0   NaN  NaN   \n",
+              "2  https://www.lordfilm.bot/48863-jelementarno-20...  2023.0   7.0  7.7   \n",
+              "3  https://www.lordfilm.bot/27172-luka-11-12-2021...  2021.0   7.4  7.8   \n",
+              "4        https://www.lordfilm.bot/27119-loki-g1.html  2021.0   8.2  7.7   \n",
+              "\n",
+              "  country  age                                             actors  \\\n",
+              "0     США   0+  Дженна Ортега, Гвендолин Кристи, Рики Линдхоум...   \n",
+              "1  Россия  18+  Иван Янковский, Елизавета Базыкина, Ольга Лапш...   \n",
+              "2     США   6+  Леа Льюис, Мамуду Ати, Роналдо Дель Кармен, Ши...   \n",
+              "3     США   6+  Джейкоб Тремблей, Джек Дилан Грейзер, Саша Бар...   \n",
+              "4     США   0+  Том Хиддлстон, Софи Ди Мартино, Ричард Э. Гран...   \n",
+              "\n",
+              "                           genres  \\\n",
+              "0  Сериалы, Фильмы про подростков   \n",
+              "1                     Сериалы, ru   \n",
+              "2                     Мультфильмы   \n",
+              "3                     Мультфильмы   \n",
+              "4          Сериалы, Фильмы Marvel   \n",
+              "\n",
+              "                                              poster  annotation_len  \n",
+              "0  https://www.lordfilm.bot/uploads/posts/2022-12...             157  \n",
+              "1  https://www.lordfilm.bot/uploads/posts/2023-11...             150  \n",
+              "2  https://www.lordfilm.bot/uploads/posts/2023-06...              34  \n",
+              "3  https://www.lordfilm.bot/uploads/posts/2021-06...              68  \n",
+              "4  https://www.lordfilm.bot/uploads/posts/2023-10...             162  "
+            ]
+          },
+          "execution_count": 50,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Recommended Movies:\n",
+            "Интерстеллар\n",
+            "Летящие сквозь ночь\n",
+            "Спящая сквозь время\n",
+            "Любовь сквозь время\n",
+            "Лагерь Холодный Ручей\n",
+            "Парадокс Кловерфилда\n",
+            "Путешествие сквозь ночь\n",
+            "Сквозь огонь\n",
+            "Живое\n",
+            "Моцзинь: Долина червя\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "from sklearn.metrics.pairwise import cosine_similarity\n",
+        "\n",
+        "\n",
+        "\n",
+        "# Vectorize the movie descriptions using TF-IDF\n",
+        "vectorizer = TfidfVectorizer()\n",
+        "tfidf_matrix = vectorizer.fit_transform(data['description'])\n",
+        "tfidf_matrix2 = vectorizer.transform(data['name'])\n",
+        "\n",
+        "# Function to recommend movies based on user input\n",
+        "def recommend_movies(user_input, df, vectorizer, tfidf_matrix, top_n=10):\n",
+        "    # Vectorize the user input\n",
+        "    user_tfidf = vectorizer.transform([user_input])\n",
+        "\n",
+        "    # Calculate cosine similarity between user input and movie descriptions\n",
+        "    similarity_scores_desc = cosine_similarity(user_tfidf, tfidf_matrix)\n",
+        "    similarity_scores_name = cosine_similarity(user_tfidf, tfidf_matrix2)\n",
+        "    similarity_scores=0.7*similarity_scores_desc+0.3*similarity_scores_name\n",
+        "\n",
+        "    # Get indices of top N most similar movies\n",
+        "    top_indices = similarity_scores.argsort(axis=1)[:, ::-1][:, :top_n]\n",
+        "\n",
+        "    # Get movie names based on indices\n",
+        "    recommended_movies = df.iloc[top_indices.ravel()]['name'].values\n",
+        "\n",
+        "    return recommended_movies\n",
+        "\n",
+        "# Example usage\n",
+        "user_input = \"коллектив исследователей и учёных отправляется сквозь червоточину\" #input(\"Enter some words to get movie recommendations: \")\n",
+        "recommended_movies = recommend_movies(user_input, data, vectorizer, tfidf_matrix)\n",
+        "print(\"Recommended Movies:\")\n",
+        "for movie in recommended_movies:\n",
+        "    print(movie)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 56,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import pickle\n",
+        "with open('vectorizer.pkl', 'wb') as f:\n",
+        "    pickle.dump(vectorizer, f)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 49,
+      "metadata": {},
+      "outputs": [
+        {
+          "ename": "KeyboardInterrupt",
+          "evalue": "",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+            "Cell \u001b[0;32mIn[49], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Создание эмбеддингов текстовых аннотаций\u001b[39;00m\n\u001b[1;32m      2\u001b[0m annotations \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdescription\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[0;32m----> 3\u001b[0m annotation_embeddings \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mannotations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# Инициализация поискового индекса\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/sentence_transformers/SentenceTransformer.py:371\u001b[0m, in \u001b[0;36mSentenceTransformer.encode\u001b[0;34m(self, sentences, prompt_name, prompt, batch_size, show_progress_bar, output_value, precision, convert_to_numpy, convert_to_tensor, device, normalize_embeddings)\u001b[0m\n\u001b[1;32m    368\u001b[0m features\u001b[38;5;241m.\u001b[39mupdate(extra_features)\n\u001b[1;32m    370\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 371\u001b[0m     out_features \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    372\u001b[0m     out_features[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msentence_embedding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m truncate_embeddings(\n\u001b[1;32m    373\u001b[0m         out_features[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msentence_embedding\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtruncate_dim\n\u001b[1;32m    374\u001b[0m     )\n\u001b[1;32m    376\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m output_value \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_embeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/container.py:217\u001b[0m, in \u001b[0;36mSequential.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\n\u001b[1;32m    216\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m--> 217\u001b[0m         \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    218\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/sentence_transformers/models/Transformer.py:98\u001b[0m, in \u001b[0;36mTransformer.forward\u001b[0;34m(self, features)\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_type_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m features:\n\u001b[1;32m     96\u001b[0m     trans_features[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_type_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m features[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_type_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 98\u001b[0m output_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauto_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtrans_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m     99\u001b[0m output_tokens \u001b[38;5;241m=\u001b[39m output_states[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    101\u001b[0m features\u001b[38;5;241m.\u001b[39mupdate({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_embeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m: output_tokens, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mattention_mask\u001b[39m\u001b[38;5;124m\"\u001b[39m: features[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mattention_mask\u001b[39m\u001b[38;5;124m\"\u001b[39m]})\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/transformers/models/distilbert/modeling_distilbert.py:822\u001b[0m, in \u001b[0;36mDistilBertModel.forward\u001b[0;34m(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m    819\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    820\u001b[0m         attention_mask \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mones(input_shape, device\u001b[38;5;241m=\u001b[39mdevice)  \u001b[38;5;66;03m# (bs, seq_length)\u001b[39;00m\n\u001b[0;32m--> 822\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransformer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    823\u001b[0m \u001b[43m    \u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43membeddings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    824\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattn_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    825\u001b[0m \u001b[43m    \u001b[49m\u001b[43mhead_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    826\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    827\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    828\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    829\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/transformers/models/distilbert/modeling_distilbert.py:587\u001b[0m, in \u001b[0;36mTransformer.forward\u001b[0;34m(self, x, attn_mask, head_mask, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m    579\u001b[0m     layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m    580\u001b[0m         layer_module\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m    581\u001b[0m         hidden_state,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    584\u001b[0m         output_attentions,\n\u001b[1;32m    585\u001b[0m     )\n\u001b[1;32m    586\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 587\u001b[0m     layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mlayer_module\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    588\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhidden_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    589\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattn_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    590\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhead_mask\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    591\u001b[0m \u001b[43m        \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    592\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    594\u001b[0m hidden_state \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m    596\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output_attentions:\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/transformers/models/distilbert/modeling_distilbert.py:531\u001b[0m, in \u001b[0;36mTransformerBlock.forward\u001b[0;34m(self, x, attn_mask, head_mask, output_attentions)\u001b[0m\n\u001b[1;32m    528\u001b[0m sa_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msa_layer_norm(sa_output \u001b[38;5;241m+\u001b[39m x)  \u001b[38;5;66;03m# (bs, seq_length, dim)\u001b[39;00m\n\u001b[1;32m    530\u001b[0m \u001b[38;5;66;03m# Feed Forward Network\u001b[39;00m\n\u001b[0;32m--> 531\u001b[0m ffn_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mffn\u001b[49m\u001b[43m(\u001b[49m\u001b[43msa_output\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# (bs, seq_length, dim)\u001b[39;00m\n\u001b[1;32m    532\u001b[0m ffn_output: torch\u001b[38;5;241m.\u001b[39mTensor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_layer_norm(ffn_output \u001b[38;5;241m+\u001b[39m sa_output)  \u001b[38;5;66;03m# (bs, seq_length, dim)\u001b[39;00m\n\u001b[1;32m    534\u001b[0m output \u001b[38;5;241m=\u001b[39m (ffn_output,)\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/transformers/models/distilbert/modeling_distilbert.py:466\u001b[0m, in \u001b[0;36mFFN.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    465\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: torch\u001b[38;5;241m.\u001b[39mTensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m--> 466\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mapply_chunking_to_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mff_chunk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchunk_size_feed_forward\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mseq_len_dim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/transformers/pytorch_utils.py:237\u001b[0m, in \u001b[0;36mapply_chunking_to_forward\u001b[0;34m(forward_fn, chunk_size, chunk_dim, *input_tensors)\u001b[0m\n\u001b[1;32m    234\u001b[0m     \u001b[38;5;66;03m# concatenate output at same dimension\u001b[39;00m\n\u001b[1;32m    235\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcat(output_chunks, dim\u001b[38;5;241m=\u001b[39mchunk_dim)\n\u001b[0;32m--> 237\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minput_tensors\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/transformers/models/distilbert/modeling_distilbert.py:471\u001b[0m, in \u001b[0;36mFFN.ff_chunk\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    469\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlin1(\u001b[38;5;28minput\u001b[39m)\n\u001b[1;32m    470\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mactivation(x)\n\u001b[0;32m--> 471\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlin2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    472\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropout(x)\n\u001b[1;32m    473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+            "File \u001b[0;32m~/anaconda3/envs/cv/lib/python3.12/site-packages/torch/nn/modules/linear.py:116\u001b[0m, in \u001b[0;36mLinear.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    115\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 116\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+          ]
+        }
+      ],
+      "source": [
+        "# Создание эмбеддингов текстовых аннотаций\n",
+        "annotations = data['description'].tolist()\n",
+        "annotation_embeddings = model.encode(annotations)\n",
+        "\n",
+        "# Инициализация поискового индекса\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 35,
+      "metadata": {
+        "id": "MOjSw4ahPMmh"
+      },
+      "outputs": [],
+      "source": [
+        "index = faiss.IndexFlatL2(embedings.shape[1])\n",
+        "index.add(np.array(embedings))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 40,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\n",
+        "from transformers import AutoTokenizer, AutoModel\n",
+        "import numpy as np\n",
+        "from sklearn.metrics.pairwise import cosine_similarity\n",
+        "import torch\n",
+        "import pandas as pd\n",
+        "\n",
+        "\n",
+        "data = np.load('embeddings.npy')\n",
+        "movies = pd.read_csv('data.csv')\n",
+        "\n",
+        "def get_embeddings():\n",
+        "    tokenizer = AutoTokenizer.from_pretrained(\"cointegrated/rubert-tiny2\")\n",
+        "    model = AutoModel.from_pretrained(\"cointegrated/rubert-tiny2\")\n",
+        "    # model.cuda()  \n",
+        "    return model, tokenizer\n",
+        "\n",
+        "def embed_bert_cls(text ):\n",
+        "    model, tokenizer = get_embeddings()\n",
+        "    t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')\n",
+        "    with torch.no_grad():\n",
+        "        model_output = model(**{k: v.to(model.device) for k, v in t.items()})\n",
+        "    embeddings = model_output.last_hidden_state[:, 0, :]\n",
+        "    embeddings = torch.nn.functional.normalize(embeddings)\n",
+        "    return embeddings[0].cpu().numpy()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 43,
+      "metadata": {
+        "id": "CMruSPejPUBu"
+      },
+      "outputs": [],
+      "source": [
+        "def search_books(query, k=5):\n",
+        "    query_embedding = embed_bert_cls(query)\n",
+        "\n",
+        "    # Поиск ближайших соседей в индексе\n",
+        "    D, I = index.search(np.array([query_embedding]), k)\n",
+        "\n",
+        "    return D,I\n",
+        "    # for i, idx in enumerate(I[0]):\n",
+        "    #     book = data.iloc[idx]\n",
+        "    #     print(f'Рекомендуемая книга {i + 1}: {book[\"title\"]} by {book[\"author\"]}')\n",
+        "    #     print(f'Жанр: {book[\"genre\"]}')\n",
+        "    #     print(f'URL страницы книги: {book[\"page_url\"]}')\n",
+        "    #     print(f'Описание: {book[\"annotation\"]}')\n",
+        "    #     print(f'Мера подходящести под запрос: {1/(D[0][i]+1):.2f}\\n')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 42,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(312,)"
+            ]
+          },
+          "execution_count": 42,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "embed_bert_cls(\"query\").shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 44,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "krz9AxlqPZBl",
+        "outputId": "d4428884-f616-4963-b82d-1907c11f2304"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "(array([[0.8288138 , 0.84766877, 0.8478927 ]], dtype=float32),\n",
+              " array([[ 2967,  1486, 19329]]))"
+            ]
+          },
+          "execution_count": 44,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "query = \"страшные заклинания\"\n",
+        "search_books(query, k=3)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

data/data.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3880998a33fa7f246482272f6c0e8270c6d759ee594a94030cf9d722373f604
-size 34515511

 version https://git-lfs.github.com/spec/v1
+oid sha256:4aaeb836325f8966ed7b0ed5e18ea4a29ec24300ea8afa615f201d571843e358
+size 34361210

data/{embeddings.npy → embeddings_bert.npy} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f6ebe9af14012e5d2572f995ef84a2f43f07f0235a09e79312ade95b02179d0
-size 36520352

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a2243510b8892ac2f353478d28fc4d9707f2a3e0aec4fb4c17639f4a861ec1c
+size 35503232

data/tf_idf_vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7ce22b069723ecfaecb88e16129a29ab56074106d076679703666a8648240dc
+size 5236615