Spaces:
Runtime error
Runtime error
import streamlit as st | |
from dotenv import load_dotenv | |
from PyPDF2 import PdfReader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import os | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
import google.generativeai as genai | |
from langchain_community.vectorstores import FAISS | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.prompts import PromptTemplate | |
from htmlTemplates import css, bot_template, user_template | |
import logging | |
import faiss | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') | |
load_dotenv() | |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
def get_pdf_text(pdf_docs): | |
text = "" | |
for pdf in pdf_docs: | |
try: | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
except Exception as e: | |
logging.error(f"Error processing PDF file: {e}") | |
return text | |
def get_text_chunks(text): | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=10000, | |
chunk_overlap=1000 | |
) | |
chunks = text_splitter.split_text(text) | |
return chunks | |
def get_vector_store(text_chunks): | |
logging.info("Starting vector store creation") | |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") | |
logging.info("Embeddings created") | |
# Create the FAISS vector store | |
vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings) | |
logging.info("FAISS vector store created") | |
# Define the directory where the FAISS index will be saved | |
faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index") | |
os.makedirs(faiss_index_dir, exist_ok=True) | |
# Save the entire FAISS vector store, including the docstore and index_to_docstore_id | |
vector_store.save_local(faiss_index_dir) | |
logging.info("FAISS vector store saved successfully.") | |
def get_conversation_chain(): | |
prompt_template = """ | |
Answer the question clear and precise. If not provided the context return the result as | |
"Sorry I dont know the answer", don't provide the wrong answer. | |
Context:\n {context}?\n | |
Question:\n{question}\n | |
Answer: | |
""" | |
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) | |
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question']) | |
chain = load_qa_chain(model, chain_type='stuff', prompt=prompt) | |
return chain | |
def user_input(user_question): | |
logging.info("Processing user input") | |
# Reload the FAISS vector store from the saved directory | |
faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index") | |
if not os.path.exists(faiss_index_dir): | |
st.warning("Please upload and process PDF files before asking questions.") | |
return | |
try: | |
# Load the entire FAISS vector store, enabling dangerous deserialization since we trust the source | |
new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True) | |
logging.info("FAISS vector store loaded successfully") | |
# Perform similarity search and generate response | |
docs = new_db.similarity_search(user_question) | |
chain = get_conversation_chain() | |
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True) | |
st.write(user_template.replace("{{MSG}}", response["output_text"]), unsafe_allow_html=True) | |
except Exception as e: | |
logging.error(f"Error processing user input: {e}") | |
st.write(bot_template.replace("{{MSG}}", f"Sorry, there was an error processing your request: {str(e)}. Please try again later."), unsafe_allow_html=True) | |
def main(): | |
st.set_page_config(page_title="Chat with multiple PDFs", | |
page_icon=":books:") | |
st.write(css, unsafe_allow_html=True) | |
if "conversation" not in st.session_state: | |
st.session_state.conversation = None | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = None | |
st.header("Chat with multiple PDFs with Gemini Pro :books:") | |
with st.sidebar: | |
pdf_docs = st.file_uploader( | |
"Upload your PDF Files and Click on Process", | |
accept_multiple_files=True | |
) | |
if st.button("Process"): | |
with st.spinner("Processing..."): | |
try: | |
raw_text = get_pdf_text(pdf_docs) | |
text_chunks = get_text_chunks(raw_text) | |
get_vector_store(text_chunks) | |
st.session_state.conversation = get_conversation_chain() | |
st.success("PDFs processed successfully. You can now ask questions.") | |
except Exception as e: | |
logging.error(f"Error processing PDF files: {e}") | |
st.error("There was an error processing the PDF files. Please try again later.") | |
user_question = st.text_input("Ask a Question from the PDF Files") | |
if user_question: | |
if not os.path.exists(os.path.join(os.path.dirname(__file__), "faiss_index", "index.faiss")): | |
st.warning("Please upload and process PDF files before asking questions.") | |
else: | |
user_input(user_question) | |
if __name__ == "__main__": | |
main() |