qsaheeb
commited on
Commit
·
6d67ef6
1
Parent(s):
58c5774
Addsome changes 2
Browse files- app.py +1 -1
- embeddings.py +2 -2
- preprocess.py +1 -4
- recommender.py +1 -1
app.py
CHANGED
@@ -5,7 +5,7 @@ import torch
|
|
5 |
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
6 |
from recommender import BookRecommender
|
7 |
# Load book dataset
|
8 |
-
df = pd.read_csv("
|
9 |
|
10 |
# Load precomputed SBERT embeddings
|
11 |
with open("model/sbert_embeddings2.pkl", "rb") as f:
|
|
|
5 |
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
6 |
from recommender import BookRecommender
|
7 |
# Load book dataset
|
8 |
+
df = pd.read_csv("data/books_summary_cleaned.csv")
|
9 |
|
10 |
# Load precomputed SBERT embeddings
|
11 |
with open("model/sbert_embeddings2.pkl", "rb") as f:
|
embeddings.py
CHANGED
@@ -3,7 +3,7 @@ from preprocess import preprocess_books
|
|
3 |
import pickle
|
4 |
import numpy as np
|
5 |
|
6 |
-
def extract_sbert_embeddings(df, save_path="
|
7 |
"""Extracts SBERT embeddings from book summaries."""
|
8 |
model = SentenceTransformer('all-mpnet-base-v2') # Small, fast, high-performance
|
9 |
|
@@ -15,7 +15,7 @@ def extract_sbert_embeddings(df, save_path="/model/sbert_embeddings2.pkl"):
|
|
15 |
|
16 |
return embeddings
|
17 |
|
18 |
-
def load_book_data(filepath="
|
19 |
"""Loads book dataset and ensures necessary columns exist."""
|
20 |
df = pd.read_csv(filepath)
|
21 |
|
|
|
3 |
import pickle
|
4 |
import numpy as np
|
5 |
|
6 |
+
def extract_sbert_embeddings(df, save_path="model/sbert_embeddings2.pkl"):
|
7 |
"""Extracts SBERT embeddings from book summaries."""
|
8 |
model = SentenceTransformer('all-mpnet-base-v2') # Small, fast, high-performance
|
9 |
|
|
|
15 |
|
16 |
return embeddings
|
17 |
|
18 |
+
def load_book_data(filepath="data/books_summary_cleaned.csv"):
|
19 |
"""Loads book dataset and ensures necessary columns exist."""
|
20 |
df = pd.read_csv(filepath)
|
21 |
|
preprocess.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
|
3 |
import pandas as pd
|
4 |
import re
|
5 |
|
@@ -10,9 +8,8 @@ def clean_text(text):
|
|
10 |
text = re.sub(r"\s+", " ", text) # Remove extra spaces
|
11 |
text = re.sub(r"[^a-zA-Z0-9.,!?;:()'\" ]", "", text) # Keep only relevant characters
|
12 |
return text.strip()
|
13 |
-
import pandas as pd
|
14 |
|
15 |
-
def preprocess_books(input_path="
|
16 |
"""Preprocesses book dataset by handling duplicates, missing values, and text cleaning."""
|
17 |
|
18 |
# Load dataset
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import re
|
3 |
|
|
|
8 |
text = re.sub(r"\s+", " ", text) # Remove extra spaces
|
9 |
text = re.sub(r"[^a-zA-Z0-9.,!?;:()'\" ]", "", text) # Keep only relevant characters
|
10 |
return text.strip()
|
|
|
11 |
|
12 |
+
def preprocess_books(input_path="data/books_summary.csv", output_path="data/books_summary_cleaned.csv"):
|
13 |
"""Preprocesses book dataset by handling duplicates, missing values, and text cleaning."""
|
14 |
|
15 |
# Load dataset
|
recommender.py
CHANGED
@@ -5,7 +5,7 @@ from sentence_transformers import SentenceTransformer, util
|
|
5 |
from embeddings import load_book_data
|
6 |
|
7 |
class BookRecommender:
|
8 |
-
def __init__(self, data_path="
|
9 |
"""Loads book dataset and precomputed embeddings."""
|
10 |
# from data_loader import load_book_data
|
11 |
self.df = load_book_data(data_path)
|
|
|
5 |
from embeddings import load_book_data
|
6 |
|
7 |
class BookRecommender:
|
8 |
+
def __init__(self, data_path="data/books_summary.csv", emb_path="/model/sbert_embeddings2.pkl"):
|
9 |
"""Loads book dataset and precomputed embeddings."""
|
10 |
# from data_loader import load_book_data
|
11 |
self.df = load_book_data(data_path)
|