Spaces:

rajesh1729
/

Streamlit-RAG-Chat-with-PDF

Running

App Files Files Community

Streamlit-RAG-Chat-with-PDF / app.py

rajesh1729

Update app.py

d0e6488 verified 3 months ago

raw

history blame contribute delete

5.07 kB

	import os
	import streamlit as st
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory
	from langchain.document_loaders import PyPDFLoader

	# Initialize session state variables
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "chain" not in st.session_state:
	st.session_state.chain = None

	def create_sidebar():
	with st.sidebar:
	st.title("🤖 PDF Chat")

	api_key = st.text_input("OpenAI API Key:", type="password", help="Get your API key from OpenAI website")

	st.markdown("""
	### What is this?
	A simple app that lets you chat with your PDF files using GPT and RAG.

	### How to use
	1. Paste your OpenAI API key
	2. Upload PDF file(s)
	3. Click 'Process PDFs'
	4. Start asking questions!

	### Built using
	- LangChain
	- OpenAI
	- FAISS
	- Streamlit

	Made with ☕
	""")

	return api_key

	def process_pdfs(papers, api_key):
	"""Process PDFs and return whether processing was successful"""
	if not papers:
	return False

	with st.spinner("Processing PDFs..."):
	try:
	embeddings = OpenAIEmbeddings(openai_api_key=api_key)
	all_texts = []

	for paper in papers:
	file_path = os.path.join('./uploads', paper.name)
	os.makedirs('./uploads', exist_ok=True)
	with open(file_path, "wb") as f:
	f.write(paper.getbuffer())

	loader = PyPDFLoader(file_path)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	)
	texts = text_splitter.split_documents(documents)
	all_texts.extend(texts)
	os.remove(file_path)

	vectorstore = FAISS.from_documents(all_texts, embeddings)

	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True,
	output_key="answer"
	)

	st.session_state.chain = ConversationalRetrievalChain.from_llm(
	llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
	retriever=vectorstore.as_retriever(),
	memory=memory,
	return_source_documents=False,
	chain_type="stuff"
	)

	st.success(f"Processed {len(papers)} PDF(s) successfully!")
	return True

	except Exception as e:
	st.error(f"Error processing PDFs: {str(e)}")
	return False

	def main():
	st.set_page_config(page_title="PDF Chat")

	api_key = create_sidebar()

	st.title("💬 Chat with your PDFs")
	st.markdown("""
	### 👋 Hey there!
	This is a simple demo showing how to chat with your PDF documents using GPT and RAG (Retrieval Augmented Generation).

	#### Try it out:
	- Upload one or more PDFs
	- Ask questions about their content
	- The app will use RAG to find relevant info and answer your questions
	""")

	st.divider()

	# File uploader with custom styling
	st.markdown("### 📄 Upload your documents")
	papers = st.file_uploader("Choose PDF files", type=["pdf"], accept_multiple_files=True)

	if papers:
	st.markdown(f"{len(papers)} files uploaded")
	if st.button("Process PDFs"):
	process_pdfs(papers, api_key)

	st.divider()

	if not api_key:
	st.warning("👆 Please enter your OpenAI API key in the sidebar to start")
	return

	# Chat interface
	st.markdown("### 💭 Chat")
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if prompt := st.chat_input("Ask about your PDFs..."):
	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.chat_message("user"):
	st.markdown(prompt)

	with st.chat_message("assistant"):
	if st.session_state.chain is None:
	response = "Please upload and process a PDF first! 👆"
	else:
	with st.spinner("Thinking..."):
	result = st.session_state.chain({"question": prompt})
	response = result["answer"]

	st.markdown(response)
	st.session_state.messages.append({"role": "assistant", "content": response})

	if __name__ == "__main__":
	main()