Norgan97 commited on
Commit
ebdb067
1 Parent(s): b8d0a69
Dataset/embeddingsrecipes.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5202b41888fd390fe421bdfcac1b57867260d58426834cbd71900f2d385cba
3
+ size 98568532
Dataset/{faiss.index → faissbooks.index} RENAMED
File without changes
Dataset/faissrecipes.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16751ffdb3319faf7cb5b01b726af9612598354d1e6783263e49f66429df0454
3
+ size 32326989
Dataset/recipesdataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b13aa75d0ad9b9e9d168fce0f36d67cd5734ffd090ca09a6f5c8643f71caa95
3
+ size 14171628
app.py CHANGED
@@ -24,7 +24,7 @@ def load_data():
24
  df = pd.read_csv('Dataset/parcedbooks.csv')
25
  with open('Dataset/embeddingsbooks.txt', 'r') as file:
26
  embeddings_list = [list(map(float, line.split())) for line in file.readlines()]
27
- index = faiss.read_index('Dataset/faiss.index')
28
  return df, embeddings_list, index
29
 
30
  df, embeddings_list, index = load_data()
 
24
  df = pd.read_csv('Dataset/parcedbooks.csv')
25
  with open('Dataset/embeddingsbooks.txt', 'r') as file:
26
  embeddings_list = [list(map(float, line.split())) for line in file.readlines()]
27
+ index = faiss.read_index('Dataset/faissbooks.index')
28
  return df, embeddings_list, index
29
 
30
  df, embeddings_list, index = load_data()
pages/recipes.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from PIL import Image
5
+ from io import BytesIO
6
+ import requests
7
+ import faiss
8
+
9
+
10
+ from transformers import AutoTokenizer, AutoModel
11
+ import numpy as np
12
+ st.set_page_config(layout="wide")
13
+
14
+ @st.cache_resource()
15
+ def load_model():
16
+ model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
17
+ tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
18
+ return model , tokenizer
19
+
20
+ model, tokenizer = load_model()
21
+
22
+ @st.cache_data()
23
+ def load_data():
24
+ df = pd.read_csv('Dataset/recipesdataset.csv')
25
+ with open('Dataset/embeddingsrecipes.txt', 'r') as file:
26
+ embeddings_list = [list(map(float, line.split())) for line in file.readlines()]
27
+ index = faiss.read_index('Dataset/faissrecipes.index')
28
+ return df, embeddings_list, index
29
+
30
+ df, embeddings_list, index = load_data()
31
+
32
+ def embed_bert_cls(text, model, tokenizer):
33
+ t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
34
+ with torch.no_grad():
35
+ model_output = model(**{k: v.to(model.device) for k, v in t.items()})
36
+ embeddings = model_output.last_hidden_state[:, 0, :]
37
+ embeddings = torch.nn.functional.normalize(embeddings)
38
+ return embeddings[0].cpu().numpy()
39
+
40
+
41
+ text = st.text_input('Введите ваше предпочтение для рекомендации')
42
+
43
+ button = st.button('Отправить запрос')
44
+ num = st.number_input('Укажите количество блюд для рекомендации', step=1, value=1)
45
+
46
+
47
+ if text and button:
48
+ decode_text = embed_bert_cls(text, model, tokenizer) # Получение вектора для введенного текста
49
+ k = num
50
+ D, I = index.search(decode_text.reshape(1, -1), k)
51
+
52
+ top_similar_indices = I[0]
53
+ top_similar_annotations = [df['annotation'].iloc[i] for i in top_similar_indices]
54
+ top_similar_images = [df['image_url'].iloc[i] for i in top_similar_indices]
55
+ images = [Image.open(BytesIO(requests.get(url).content)) for url in top_similar_images]
56
+ top_similar_title = [df['title'].iloc[i] for i in top_similar_indices]
57
+ top_similar_url = [df['page_url'].iloc[i] for i in top_similar_indices]
58
+ top_cosine_similarities = [1 - d / 2 for d in D[0]] # Преобразование расстояний в косинусное сходство
59
+
60
+ # Отображение изображений и названий
61
+ for similarity, image, annotation, title, url in zip(top_cosine_similarities, images, top_similar_annotations, top_similar_title, top_similar_url):
62
+ col1, col2 = st.columns([3, 4])
63
+ with col1:
64
+ st.image(image, width=300)
65
+ with col2:
66
+ st.write(f"***Название:*** {title}")
67
+ st.write(f"***Аннотация:*** {annotation}")
68
+ similarity = float(similarity)
69
+ st.write(f"***Cosine Similarity : {round(similarity, 3)}***")
70
+ st.write(f"***Ссылка на блюдо : {url}***")
71
+
72
+ st.markdown(
73
+ "<hr style='border: 2px solid #000; margin-top: 10px; margin-bottom: 10px;'>",
74
+ unsafe_allow_html=True
75
+ )