Spaces:
Sleeping
Sleeping
File size: 4,682 Bytes
589e932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import streamlit as st
import datetime
import logging
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import CrossEncoder
from functions import (
build_vectorstore,
get_answer,
sanitize_input
)
st.set_page_config(
page_title="π PDF Chatbot",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'About': "### π PDF Chatbot\nA Streamlit app that answers your questions based on the content of your PDF documents."
}
)
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "messages" not in st.session_state:
st.session_state.messages = []
if "pdf_uploaded" not in st.session_state:
st.session_state.pdf_uploaded = False
if "current_pdf" not in st.session_state:
st.session_state.current_pdf = None
if "embeddings" not in st.session_state:
st.session_state.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
if "reranker" not in st.session_state:
st.session_state.reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
st.title("π **PDF Chatbot** π€") # Title
# Sidebar section: Uploading of PDF and initialising of knowledge base is done here.
with st.sidebar:
st.markdown("### π **Knowledge Base Management**")
st.markdown("---")
if not st.session_state.pdf_uploaded:
pdf_file = st.file_uploader("π₯ Upload a PDF to build the database", type="pdf", accept_multiple_files=False)
if pdf_file:
st.session_state.pdf_uploaded = True
st.session_state.current_pdf = pdf_file
st.success("β
PDF uploaded successfully!. Click below to build the knowledge base.")
else:
st.subheader("π **Uploaded Document**")
st.write(f"**π File Name:** {st.session_state.current_pdf.name}")
st.write(f"**π File Size:** {st.session_state.current_pdf.size / (1024 * 1024):.2f} MB")
st.info("π To upload a different PDF, please restart the application.")
build_button = st.button("π οΈ Build Knowledge Base")
st.markdown("---")
if st.session_state.pdf_uploaded:
if build_button:
status_placeholder = st.empty()
try:
with st.spinner("β³ Processing PDF..."):
num_chunks, metadata = build_vectorstore(st.session_state.current_pdf)
if num_chunks == 0:
status_placeholder.error("β Failed to build the knowledge base.")
else:
status_placeholder.success(f"β
Successfully built the knowledge base with {num_chunks} chunks.")
except Exception as e:
status_placeholder.error("β An error occurred during the build process.")
logging.error(f"Error during knowledge base build: {e}", exc_info=True)
# Clear Chat History Button
clear_button = st.button("ποΈ Clear Chat History")
if clear_button:
st.session_state.messages = []
st.success("ποΈ Chat history cleared.")
st.markdown("---")
# Main Chat - Displays existing messages with timestamps each time UI is rendering.
st.header("π¬ **Ask Your Questions**")
for role, content, timestamp in st.session_state.messages:
if role == "user":
with st.chat_message("user"):
st.write(content)
st.markdown(f"*Time: {timestamp}*")
else:
with st.chat_message("assistant"):
st.write(content)
st.markdown(f"*Time: {timestamp}*")
st.markdown(f"...")
# User Input Section - Takes user question and returns answer
if st.session_state.vectorstore is not None:
user_input = st.chat_input("β Ask a question about the uploaded PDF...")
if user_input:
if sanitize_input(user_input):
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.session_state.messages.append(("user", user_input, current_time))
with st.chat_message("user"):
st.write(user_input)
st.markdown(f"*Time: {current_time}*")
with st.chat_message("assistant"):
with st.spinner("π Generating answer..."):
answer = get_answer(user_input)
st.write(answer)
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
st.markdown(f"*Time: {current_time}*")
st.session_state.messages.append(
("assistant", answer, current_time))
else:
st.info("π Please upload a PDF and build the knowledge base before asking questions.")
|