Spaces:

PRIYANSHUDHAKED
/

Intelligent-chat-with-multiple-pdfs

Runtime error

File size: 5,476 Bytes

import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from htmlTemplates import css, bot_template, user_template
import logging
import faiss

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')

load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        try:
            pdf_reader = PdfReader(pdf)
            for page in pdf_reader.pages:
                text += page.extract_text()
        except Exception as e:
            logging.error(f"Error processing PDF file: {e}")
    return text

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=1000
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    logging.info("Starting vector store creation")
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    logging.info("Embeddings created")
    
    # Create the FAISS vector store
    vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    logging.info("FAISS vector store created")

    # Define the directory where the FAISS index will be saved
    faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
    os.makedirs(faiss_index_dir, exist_ok=True)

    # Save the entire FAISS vector store, including the docstore and index_to_docstore_id
    vector_store.save_local(faiss_index_dir)
    logging.info("FAISS vector store saved successfully.")

def get_conversation_chain():
    prompt_template = """
        Answer the question clear and precise. If not provided the context return the result as
        "Sorry I dont know the answer", don't provide the wrong answer.
        Context:\n {context}?\n
        Question:\n{question}\n
        Answer:
    """
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
    chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)
    return chain

def user_input(user_question):
    logging.info("Processing user input")
    
    # Reload the FAISS vector store from the saved directory
    faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
    
    if not os.path.exists(faiss_index_dir):
        st.warning("Please upload and process PDF files before asking questions.")
        return

    try:
        # Load the entire FAISS vector store, enabling dangerous deserialization since we trust the source
        new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)
        logging.info("FAISS vector store loaded successfully")
        
        # Perform similarity search and generate response
        docs = new_db.similarity_search(user_question)
        chain = get_conversation_chain()
        response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
        st.write(user_template.replace("{{MSG}}", response["output_text"]), unsafe_allow_html=True)
    except Exception as e:
        logging.error(f"Error processing user input: {e}")
        st.write(bot_template.replace("{{MSG}}", f"Sorry, there was an error processing your request: {str(e)}. Please try again later."), unsafe_allow_html=True)

def main():
    st.set_page_config(page_title="Chat with multiple PDFs",
                       page_icon=":books:")
    st.write(css, unsafe_allow_html=True)

    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = None

    st.header("Chat with multiple PDFs with Gemini Pro :books:")
    
    with st.sidebar:
        pdf_docs = st.file_uploader(
            "Upload your PDF Files and Click on Process",
            accept_multiple_files=True
        )
        if st.button("Process"):
            with st.spinner("Processing..."):
                try:
                    raw_text = get_pdf_text(pdf_docs)
                    text_chunks = get_text_chunks(raw_text)
                    get_vector_store(text_chunks)
                    st.session_state.conversation = get_conversation_chain()
                    st.success("PDFs processed successfully. You can now ask questions.")
                except Exception as e:
                    logging.error(f"Error processing PDF files: {e}")
                    st.error("There was an error processing the PDF files. Please try again later.")

    user_question = st.text_input("Ask a Question from the PDF Files")
    if user_question:
        if not os.path.exists(os.path.join(os.path.dirname(__file__), "faiss_index", "index.faiss")):
            st.warning("Please upload and process PDF files before asking questions.")
        else:
            user_input(user_question)

if __name__ == "__main__":
    main()