File size: 4,682 Bytes
589e932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
import datetime
import logging
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import CrossEncoder

from functions import (
    build_vectorstore,
    get_answer,
    sanitize_input
)

st.set_page_config(
    page_title="πŸ“„ PDF Chatbot",
    layout="wide",
    initial_sidebar_state="expanded",
    menu_items={
        'About': "### πŸ“š PDF Chatbot\nA Streamlit app that answers your questions based on the content of your PDF documents."
    }
)

if "vectorstore" not in st.session_state:
    st.session_state.vectorstore = None
if "messages" not in st.session_state:
    st.session_state.messages = []
if "pdf_uploaded" not in st.session_state:
    st.session_state.pdf_uploaded = False
if "current_pdf" not in st.session_state:
    st.session_state.current_pdf = None
if "embeddings" not in st.session_state:
    st.session_state.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
if "reranker" not in st.session_state:
    st.session_state.reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")


st.title("πŸ“„ **PDF Chatbot** πŸ€–") # Title

# Sidebar section: Uploading of PDF and initialising of knowledge base is done here.
with st.sidebar:
    st.markdown("### πŸ“š **Knowledge Base Management**")
    st.markdown("---")
    if not st.session_state.pdf_uploaded:
        pdf_file = st.file_uploader("πŸ“₯ Upload a PDF to build the database", type="pdf", accept_multiple_files=False)
        if pdf_file:
            st.session_state.pdf_uploaded = True
            st.session_state.current_pdf = pdf_file
            st.success("βœ… PDF uploaded successfully!. Click below to build the knowledge base.")
    else:
        st.subheader("πŸ“„ **Uploaded Document**")
        st.write(f"**πŸ“ File Name:** {st.session_state.current_pdf.name}")
        st.write(f"**πŸ“ File Size:** {st.session_state.current_pdf.size / (1024 * 1024):.2f} MB")
        st.info("πŸ”„ To upload a different PDF, please restart the application.")

    build_button = st.button("πŸ› οΈ Build Knowledge Base")
    st.markdown("---")

    if st.session_state.pdf_uploaded:
        if build_button:
            status_placeholder = st.empty()
            try:
                with st.spinner("⏳ Processing PDF..."):
                    num_chunks, metadata = build_vectorstore(st.session_state.current_pdf)
                if num_chunks == 0:
                    status_placeholder.error("❌ Failed to build the knowledge base.")
                else:
                    status_placeholder.success(f"βœ… Successfully built the knowledge base with {num_chunks} chunks.")
            except Exception as e:
                status_placeholder.error("❌ An error occurred during the build process.")
                logging.error(f"Error during knowledge base build: {e}", exc_info=True)

    # Clear Chat History Button
    clear_button = st.button("πŸ—‘οΈ Clear Chat History")
    if clear_button:
        st.session_state.messages = []
        st.success("πŸ—‘οΈ Chat history cleared.")
    st.markdown("---")

# Main Chat - Displays existing messages with timestamps each time UI is rendering.
st.header("πŸ’¬ **Ask Your Questions**")
for role, content, timestamp in st.session_state.messages:
    if role == "user":
        with st.chat_message("user"):
            st.write(content)
            st.markdown(f"*Time: {timestamp}*")
    else:
        with st.chat_message("assistant"):
            st.write(content)
            st.markdown(f"*Time: {timestamp}*")
            st.markdown(f"...")


# User Input Section - Takes user question and returns answer
if st.session_state.vectorstore is not None:
    user_input = st.chat_input("❓ Ask a question about the uploaded PDF...")
    if user_input:
        if sanitize_input(user_input):
            current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            st.session_state.messages.append(("user", user_input, current_time))
            with st.chat_message("user"):
                st.write(user_input)
                st.markdown(f"*Time: {current_time}*")

            with st.chat_message("assistant"):
                with st.spinner("πŸ” Generating answer..."):
                    answer = get_answer(user_input)
                st.write(answer)
                current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                st.markdown(f"*Time: {current_time}*")
            st.session_state.messages.append(
                ("assistant", answer, current_time))
else:
    st.info("πŸ“„ Please upload a PDF and build the knowledge base before asking questions.")