import os from langchain_groq import ChatGroq from langchain_openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain.chains import create_retrieval_chain from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.embeddings import HuggingFaceEmbeddings from dotenv import load_dotenv load_dotenv() #download embedding model def download_hugging_face_embeddings(): embeddings= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") return embeddings # Load the GROQ and OpenAI API keys #os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY") groq_api_key = ('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') # Initialize the LLM llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192") from langchain_core.prompts import ChatPromptTemplate prompt_template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context} Question: {input} Only return the helpful answer below and nothing else. Helpful answer:""" prompt = ChatPromptTemplate.from_template(prompt_template) def vector_embedding(): """Embeds the documents and stores them in a FAISS vector store.""" #embeddings = OpenAIEmbeddings() embeddings= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") loader = PyPDFDirectoryLoader("/kaggle/input/book-pdf-1") # Data Ingestion docs = loader.load() # Document Loading text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Chunk Creation final_documents = text_splitter.split_documents(docs[:20]) # Splitting vectors = FAISS.from_documents(final_documents, embeddings) # Vector OpenAI embeddings return vectors # Get user input prompt1 = input("Enter Your Question From Documents: ") # Embed the documents vectors = vector_embedding() print("Vector Store DB Is Ready") import time if prompt1: document_chain = create_stuff_documents_chain(llm, prompt) retriever = vectors.as_retriever() retrieval_chain = create_retrieval_chain(retriever, document_chain) start = time.process_time() response = retrieval_chain.invoke({'input': prompt1}) print("Response time :", time.process_time() - start) print(response['answer']) # Print similar documents print("\nDocument Similarity Search:") for i, doc in enumerate(response["context"]): print(doc.page_content) print("--------------------------------")