Spaces:

PRIYANSHUDHAKED
/

Intelligent-chat-with-multiple-pdfs

Runtime error

File size: 4,417 Bytes

from fastapi import FastAPI, UploadFile, File  
from fastapi.responses import HTMLResponse  
from fastapi.staticfiles import StaticFiles  
import os  
from dotenv import load_dotenv  
from PyPDF2 import PdfReader  
from langchain.text_splitter import RecursiveCharacterTextSplitter  
from langchain_google_genai import GoogleGenerativeAIEmbeddings  
from langchain_community.vectorstores import FAISS  
from langchain_google_genai import ChatGoogleGenerativeAI  
from langchain.chains.question_answering import load_qa_chain  
from langchain.prompts import PromptTemplate  
import logging  

app = FastAPI()  

# Set up logging  
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')  

load_dotenv()  
genai.configure(api_key=os.getenv("AIzaSyD0GxR2J1JxGic807Cc89Jq6MB4aDJYgDc"))  

def get_pdf_text(pdf_docs):  
    text = ""  
    for pdf in pdf_docs:  
        try:  
            pdf_reader = PdfReader(pdf.file)  
            for page in pdf_reader.pages:  
                text += page.extract_text()  
        except Exception as e:  
            logging.error(f"Error processing PDF file: {e}")  
    return text  

def get_text_chunks(text):  
    text_splitter = RecursiveCharacterTextSplitter(  
        chunk_size=10000,  
        chunk_overlap=1000  
    )  
    chunks = text_splitter.split_text(text)  
    return chunks  

def get_vector_store(text_chunks):  
    logging.info("Starting vector store creation")  
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")  
    
    vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)  
    logging.info("FAISS vector store created")  

    faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")  
    os.makedirs(faiss_index_dir, exist_ok=True)  

    vector_store.save_local(faiss_index_dir)  
    logging.info("FAISS vector store saved successfully.")  

def get_conversation_chain():  
    prompt_template = """  
        Answer the question clear and precise. If not provided the context return the result as  
        "Sorry I dont know the answer", don't provide the wrong answer.  
        Context:\n {context}?\n  
        Question:\n{question}\n  
        Answer:  
    """  
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)  
    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])  
    chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)  
    return chain  

def user_input(user_question):  
    logging.info("Processing user input")  
    
    faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")  
    
    if not os.path.exists(faiss_index_dir):  
        return "Please upload and process PDF files before asking questions."  

    try:  
        new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)  
        logging.info("FAISS vector store loaded successfully")  
        
        docs = new_db.similarity_search(user_question)  
        chain = get_conversation_chain()  
        response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)  
        return response["output_text"]  
    
    except Exception as e:  
        logging.error(f"Error processing user input: {e}")  
        return f"Sorry, there was an error processing your request: {str(e)}. Please try again later."  

@app.post("/upload_pdf/")  
async def upload_pdf(pdf_docs: list[UploadFile] = File(...)):  
    raw_text = get_pdf_text(pdf_docs)  
    text_chunks = get_text_chunks(raw_text)  
    get_vector_store(text_chunks)  
    return {"message": "PDFs processed successfully. You can now ask questions."}  

@app.get("/ask_question/")  
async def ask_question(user_question: str):  
    response = user_input(user_question)  
    return {"response": response}  

@app.get("/", response_class=HTMLResponse)  
async def read_root():  
    return """  
        <html>  
            <head>  
                <title>Chat with PDFs</title>  
            </head>  
            <body>  
                <h1>Welcome to Chat with PDFs API</h1>  
                <p>Use POST /upload_pdf/ to upload PDF files.</p>  
                <p>Use GET /ask_question/ to ask questions from the PDFs you uploaded.</p>  
            </body>  
        </html>  
    """