Monto-Solutions / create_and_store_embeddings.py
ujalaarshad17's picture
Added application files
5ded781
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
import os
from dotenv import load_dotenv
import logging
load_dotenv()
logging.basicConfig(level=logging.INFO,format = '[%(asctime)s]: %(message)s')
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key="AIzaSyAhgj1-KUauE7QhOOUdVJrvffZ9mHNvCms")
# print(os.path.exists("Data/")) # Check if directory exists
# print(os.listdir("Data/"))
loader = TextLoader("Data/monto-solutions.txt")
docs = loader.load()
logging.info("Documents created successfully")
splitter = RecursiveCharacterTextSplitter(chunk_size = 500 , chunk_overlap = 100)
chunks = splitter.split_documents(docs)
logging.info("Chunks created successfully")
# print(len(chunks))
logging.info("Initializing pinecone database")
try:
doc_search = PineconeVectorStore.from_documents(
documents=chunks,
index_name = 'customer-support',
embedding = embeddings
)
logging.info("Chunks and embeddings stored successfully")
except Exception as e:
logging.info(f"Failed to create the embeddings, Error occured: {e}")