File size: 5,235 Bytes
2057158
 
 
 
 
 
 
 
 
 
 
547cbaa
2057158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547cbaa
2057158
560feb8
c87e7b9
2057158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547cbaa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
import os
from PyPDF2 import PdfReader
import openpyxl
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

os.environ['GOOGLE_API_KEY'] = 'your_google_api_key_here'

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_excel_text(excel_docs):
    text = ""
    for excel_doc in excel_docs:
        workbook = openpyxl.load_workbook(filename=excel_doc)
        for sheet in workbook:
            for row in sheet:
                for cell in row:
                    text += str(cell.value) + " "
    return text.strip()

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    embeddings = GooglePalmEmbeddings()
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    return vector_store

def get_conversational_chain(vector_store):
    llm = GooglePalm()
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
    return conversation_chain

def get_user_input(user_question):
    with st.container():
        response = st.session_state.conversation({'question': user_question})
        st.session_state.chatHistory = response['chat_history']
        file_contents = ""
        left , right = st.columns((2,1))
        with left:
            for i, message in enumerate(st.session_state.chatHistory):
                if i % 2 == 0:
                    st.markdown(f'<div style="background-color: rgb(30 24 17 / 77%); border-radius: 10px; padding: 10px; margin-bottom: 5px; text-align: end;"><span style="text-align: end;">User:</span> {message.content}</div>', unsafe_allow_html=True)
                else:
                    st.markdown(f'<div style="background-color: rgb(145 74 1 / 25%); border-radius: 10px; padding: 10px; margin-bottom: 5px; ">Bot: {message.content}</div>', unsafe_allow_html=True)
        with right:
            for message in st.session_state.chatHistory:
                file_contents += f"{message.content}\n"
            file_name = "Chat_History.txt"

def main():
    st.set_page_config("DocChat")
    # Define Streamlit app layout
    st.markdown("<style>body { background-color: black; color: white; }</style>", unsafe_allow_html=True)
    st.markdown("<h3 style='color: orange;'>🧾 DocChat - Chat with multiple documents</h3>", unsafe_allow_html=True)
    st.caption("🚀 Chat bot developed By :- [Dinesh Abeysinghe](https://www.linkedin.com/in/dinesh-abeysinghe-bb773293) | [GitHub Source Code](https://github.com/dineshabey/AI-Chat_with_document)")
    st.markdown("<div style= 'text-align: center;'>First need to upload PDF file or Excel file. Then click PROCESS PDF file / PROCESS EXCEL file and next you can start chat with document related things <span style='color: orange;'>Please click like button</span>❤️ and support me and enjoy it.</div>", unsafe_allow_html=True)
    st.write("---")
    with st.container():
        with st.sidebar:
            st.title("Settings")
            st.subheader("Upload Documents")
            st.markdown("**PDF files:**")
            pdf_docs = st.file_uploader("Upload PDF Files", accept_multiple_files=True)
            if st.button("Process PDF file"):
                with st.spinner("Processing PDFs..."):
                    raw_text = get_pdf_text(pdf_docs)
                    text_chunks = get_text_chunks(raw_text)
                    vector_store = get_vector_store(text_chunks)
                    st.session_state.conversation = get_conversational_chain(vector_store)
                    st.success("PDF processed successfully!")

            st.markdown("**Excel files:**")
            excel_docs = st.file_uploader("Upload Excel Files", accept_multiple_files=True)
            if st.button("Process Excel file"):
                with st.spinner("Processing Excel files..."):
                    raw_text = get_excel_text(excel_docs)
                    text_chunks = get_text_chunks(raw_text)
                    vector_store = get_vector_store(text_chunks)
                    st.session_state.conversation = get_conversational_chain(vector_store)
                    st.success("Excel file processed successfully!")

    with st.container():
        st.subheader("Document Q&A")
        user_question = st.text_input("Ask a Question from the document")
        if "conversation" not in st.session_state:
            st.session_state.conversation = None
        if "chatHistory" not in st.session_state:
            st.session_state.chatHistory = None
        if user_question:
            get_user_input(user_question)

if __name__ == "__main__":
    main()