Spaces:
Sleeping
Sleeping
import streamlit as st | |
import datetime | |
import logging | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from sentence_transformers import CrossEncoder | |
from functions import ( | |
build_vectorstore, | |
get_answer, | |
sanitize_input | |
) | |
st.set_page_config( | |
page_title="π PDF Chatbot", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
menu_items={ | |
'About': "### π PDF Chatbot\nA Streamlit app that answers your questions based on the content of your PDF documents." | |
} | |
) | |
if "vectorstore" not in st.session_state: | |
st.session_state.vectorstore = None | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
if "pdf_uploaded" not in st.session_state: | |
st.session_state.pdf_uploaded = False | |
if "current_pdf" not in st.session_state: | |
st.session_state.current_pdf = None | |
if "embeddings" not in st.session_state: | |
st.session_state.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
if "reranker" not in st.session_state: | |
st.session_state.reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2") | |
st.title("π **PDF Chatbot** π€") # Title | |
# Sidebar section: Uploading of PDF and initialising of knowledge base is done here. | |
with st.sidebar: | |
st.markdown("### π **Knowledge Base Management**") | |
st.markdown("---") | |
if not st.session_state.pdf_uploaded: | |
pdf_file = st.file_uploader("π₯ Upload a PDF to build the database", type="pdf", accept_multiple_files=False) | |
if pdf_file: | |
st.session_state.pdf_uploaded = True | |
st.session_state.current_pdf = pdf_file | |
st.success("β PDF uploaded successfully!. Click below to build the knowledge base.") | |
else: | |
st.subheader("π **Uploaded Document**") | |
st.write(f"**π File Name:** {st.session_state.current_pdf.name}") | |
st.write(f"**π File Size:** {st.session_state.current_pdf.size / (1024 * 1024):.2f} MB") | |
st.info("π To upload a different PDF, please restart the application.") | |
build_button = st.button("π οΈ Build Knowledge Base") | |
st.markdown("---") | |
if st.session_state.pdf_uploaded: | |
if build_button: | |
status_placeholder = st.empty() | |
try: | |
with st.spinner("β³ Processing PDF..."): | |
num_chunks, metadata = build_vectorstore(st.session_state.current_pdf) | |
if num_chunks == 0: | |
status_placeholder.error("β Failed to build the knowledge base.") | |
else: | |
status_placeholder.success(f"β Successfully built the knowledge base with {num_chunks} chunks.") | |
except Exception as e: | |
status_placeholder.error("β An error occurred during the build process.") | |
logging.error(f"Error during knowledge base build: {e}", exc_info=True) | |
# Clear Chat History Button | |
clear_button = st.button("ποΈ Clear Chat History") | |
if clear_button: | |
st.session_state.messages = [] | |
st.success("ποΈ Chat history cleared.") | |
st.markdown("---") | |
# Main Chat - Displays existing messages with timestamps each time UI is rendering. | |
st.header("π¬ **Ask Your Questions**") | |
for role, content, timestamp in st.session_state.messages: | |
if role == "user": | |
with st.chat_message("user"): | |
st.write(content) | |
st.markdown(f"*Time: {timestamp}*") | |
else: | |
with st.chat_message("assistant"): | |
st.write(content) | |
st.markdown(f"*Time: {timestamp}*") | |
st.markdown(f"...") | |
# User Input Section - Takes user question and returns answer | |
if st.session_state.vectorstore is not None: | |
user_input = st.chat_input("β Ask a question about the uploaded PDF...") | |
if user_input: | |
if sanitize_input(user_input): | |
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
st.session_state.messages.append(("user", user_input, current_time)) | |
with st.chat_message("user"): | |
st.write(user_input) | |
st.markdown(f"*Time: {current_time}*") | |
with st.chat_message("assistant"): | |
with st.spinner("π Generating answer..."): | |
answer = get_answer(user_input) | |
st.write(answer) | |
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
st.markdown(f"*Time: {current_time}*") | |
st.session_state.messages.append( | |
("assistant", answer, current_time)) | |
else: | |
st.info("π Please upload a PDF and build the knowledge base before asking questions.") | |