Spaces:
Runtime error
Runtime error
File size: 5,476 Bytes
0317d24 ffc4ebe 0317d24 ffc4ebe 0317d24 ffc4ebe 0317d24 ffc4ebe 0317d24 c15a5f1 0317d24 c15a5f1 0317d24 c15a5f1 0317d24 c15a5f1 0317d24 c15a5f1 0317d24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from htmlTemplates import css, bot_template, user_template
import logging
import faiss
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
try:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
except Exception as e:
logging.error(f"Error processing PDF file: {e}")
return text
def get_text_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=10000,
chunk_overlap=1000
)
chunks = text_splitter.split_text(text)
return chunks
def get_vector_store(text_chunks):
logging.info("Starting vector store creation")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
logging.info("Embeddings created")
# Create the FAISS vector store
vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
logging.info("FAISS vector store created")
# Define the directory where the FAISS index will be saved
faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
os.makedirs(faiss_index_dir, exist_ok=True)
# Save the entire FAISS vector store, including the docstore and index_to_docstore_id
vector_store.save_local(faiss_index_dir)
logging.info("FAISS vector store saved successfully.")
def get_conversation_chain():
prompt_template = """
Answer the question clear and precise. If not provided the context return the result as
"Sorry I dont know the answer", don't provide the wrong answer.
Context:\n {context}?\n
Question:\n{question}\n
Answer:
"""
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)
return chain
def user_input(user_question):
logging.info("Processing user input")
# Reload the FAISS vector store from the saved directory
faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")
if not os.path.exists(faiss_index_dir):
st.warning("Please upload and process PDF files before asking questions.")
return
try:
# Load the entire FAISS vector store, enabling dangerous deserialization since we trust the source
new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)
logging.info("FAISS vector store loaded successfully")
# Perform similarity search and generate response
docs = new_db.similarity_search(user_question)
chain = get_conversation_chain()
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
st.write(user_template.replace("{{MSG}}", response["output_text"]), unsafe_allow_html=True)
except Exception as e:
logging.error(f"Error processing user input: {e}")
st.write(bot_template.replace("{{MSG}}", f"Sorry, there was an error processing your request: {str(e)}. Please try again later."), unsafe_allow_html=True)
def main():
st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=":books:")
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header("Chat with multiple PDFs with Gemini Pro :books:")
with st.sidebar:
pdf_docs = st.file_uploader(
"Upload your PDF Files and Click on Process",
accept_multiple_files=True
)
if st.button("Process"):
with st.spinner("Processing..."):
try:
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
get_vector_store(text_chunks)
st.session_state.conversation = get_conversation_chain()
st.success("PDFs processed successfully. You can now ask questions.")
except Exception as e:
logging.error(f"Error processing PDF files: {e}")
st.error("There was an error processing the PDF files. Please try again later.")
user_question = st.text_input("Ask a Question from the PDF Files")
if user_question:
if not os.path.exists(os.path.join(os.path.dirname(__file__), "faiss_index", "index.faiss")):
st.warning("Please upload and process PDF files before asking questions.")
else:
user_input(user_question)
if __name__ == "__main__":
main() |