from sentence_transformers import SentenceTransformer from preprocess import preprocess_books import pickle import pandas as pd def extract_sbert_embeddings(df, save_path="model/sbert_embeddings2.pkl"): """Extracts BERT embeddings from book summaries.""" model = SentenceTransformer('all-mpnet-base-v2') # Generate embeddings for book summaries embeddings = model.encode(df["combined_text"].fillna(""), show_progress_bar=True) with open(save_path, "wb") as f: pickle.dump(embeddings, f) return embeddings def load_book_data(filepath="data/books_summary_cleaned.csv"): """Loads book dataset and ensures necessary columns exist.""" df = pd.read_csv(filepath) return df # preprocess_books() # df = load_book_data() # embeddings = extract_sbert_embeddings(df)