File size: 4,417 Bytes
42416c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc83d0f
42416c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from fastapi import FastAPI, UploadFile, File  
from fastapi.responses import HTMLResponse  
from fastapi.staticfiles import StaticFiles  
import os  
from dotenv import load_dotenv  
from PyPDF2 import PdfReader  
from langchain.text_splitter import RecursiveCharacterTextSplitter  
from langchain_google_genai import GoogleGenerativeAIEmbeddings  
from langchain_community.vectorstores import FAISS  
from langchain_google_genai import ChatGoogleGenerativeAI  
from langchain.chains.question_answering import load_qa_chain  
from langchain.prompts import PromptTemplate  
import logging  

app = FastAPI()  

# Set up logging  
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')  

load_dotenv()  
genai.configure(api_key=os.getenv("AIzaSyD0GxR2J1JxGic807Cc89Jq6MB4aDJYgDc"))  

def get_pdf_text(pdf_docs):  
    text = ""  
    for pdf in pdf_docs:  
        try:  
            pdf_reader = PdfReader(pdf.file)  
            for page in pdf_reader.pages:  
                text += page.extract_text()  
        except Exception as e:  
            logging.error(f"Error processing PDF file: {e}")  
    return text  

def get_text_chunks(text):  
    text_splitter = RecursiveCharacterTextSplitter(  
        chunk_size=10000,  
        chunk_overlap=1000  
    )  
    chunks = text_splitter.split_text(text)  
    return chunks  

def get_vector_store(text_chunks):  
    logging.info("Starting vector store creation")  
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")  
    
    vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)  
    logging.info("FAISS vector store created")  

    faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")  
    os.makedirs(faiss_index_dir, exist_ok=True)  

    vector_store.save_local(faiss_index_dir)  
    logging.info("FAISS vector store saved successfully.")  

def get_conversation_chain():  
    prompt_template = """  
        Answer the question clear and precise. If not provided the context return the result as  
        "Sorry I dont know the answer", don't provide the wrong answer.  
        Context:\n {context}?\n  
        Question:\n{question}\n  
        Answer:  
    """  
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)  
    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])  
    chain = load_qa_chain(model, chain_type='stuff', prompt=prompt)  
    return chain  

def user_input(user_question):  
    logging.info("Processing user input")  
    
    faiss_index_dir = os.path.join(os.path.dirname(__file__), "faiss_index")  
    
    if not os.path.exists(faiss_index_dir):  
        return "Please upload and process PDF files before asking questions."  

    try:  
        new_db = FAISS.load_local(faiss_index_dir, GoogleGenerativeAIEmbeddings(model='models/embedding-001'), allow_dangerous_deserialization=True)  
        logging.info("FAISS vector store loaded successfully")  
        
        docs = new_db.similarity_search(user_question)  
        chain = get_conversation_chain()  
        response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)  
        return response["output_text"]  
    
    except Exception as e:  
        logging.error(f"Error processing user input: {e}")  
        return f"Sorry, there was an error processing your request: {str(e)}. Please try again later."  

@app.post("/upload_pdf/")  
async def upload_pdf(pdf_docs: list[UploadFile] = File(...)):  
    raw_text = get_pdf_text(pdf_docs)  
    text_chunks = get_text_chunks(raw_text)  
    get_vector_store(text_chunks)  
    return {"message": "PDFs processed successfully. You can now ask questions."}  

@app.get("/ask_question/")  
async def ask_question(user_question: str):  
    response = user_input(user_question)  
    return {"response": response}  

@app.get("/", response_class=HTMLResponse)  
async def read_root():  
    return """  
        <html>  
            <head>  
                <title>Chat with PDFs</title>  
            </head>  
            <body>  
                <h1>Welcome to Chat with PDFs API</h1>  
                <p>Use POST /upload_pdf/ to upload PDF files.</p>  
                <p>Use GET /ask_question/ to ask questions from the PDFs you uploaded.</p>  
            </body>  
        </html>  
    """