at2507 commited on
Commit
88e2a57
·
verified ·
1 Parent(s): ba4a966

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain_community.chat_models import ChatOpenAI
9
+
10
+ def process_pdf(file_path):
11
+ """Process PDF with fallback strategies"""
12
+ try:
13
+ # Try different loaders with fallback
14
+ try:
15
+ loader = PyPDFLoader(file_path)
16
+ documents = loader.load()
17
+ except:
18
+ loader = UnstructuredPDFLoader(file_path, strategy="ocr_only")
19
+ documents = loader.load()
20
+
21
+ # Create embeddings and vector store
22
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
23
+ return Chroma.from_documents(documents, embeddings)
24
+ except Exception as e:
25
+ raise gr.Error(f"Error processing PDF: {str(e)}")
26
+
27
+ def setup_conversation_chain(vector_store, api_key):
28
+ """Initialize conversation chain with memory"""
29
+ try:
30
+ os.environ["OPENAI_API_KEY"] = api_key
31
+ memory = ConversationBufferMemory(
32
+ memory_key="chat_history",
33
+ return_messages=True
34
+ )
35
+ return ConversationalRetrievalChain.from_llm(
36
+ ChatOpenAI(temperature=0.1),
37
+ vector_store.as_retriever(search_kwargs={"k": 3}),
38
+ memory=memory
39
+ )
40
+ except Exception as e:
41
+ raise gr.Error(f"Error initializing chat: {str(e)}")
42
+
43
+ def upload_file(file, api_key, chat_history):
44
+ """Handle PDF upload and initialization"""
45
+ if not api_key.startswith("sk-"):
46
+ raise gr.Error("Invalid OpenAI API key format")
47
+
48
+ if not file.name.endswith('.pdf'):
49
+ raise gr.Error("Only PDF files are supported")
50
+
51
+ vector_store = process_pdf(file.name)
52
+ if not vector_store:
53
+ raise gr.Error("Failed to process PDF")
54
+
55
+ conversation_chain = setup_conversation_chain(vector_store, api_key)
56
+ return conversation_chain, [("System", "PDF processed successfully! Ask me anything about the document.")]
57
+
58
+ def respond(query, chat_history, conversation_chain):
59
+ """Handle user queries"""
60
+ if not conversation_chain:
61
+ raise gr.Error("Please upload a PDF first")
62
+
63
+ try:
64
+ result = conversation_chain({"question": query})
65
+ chat_history.append((query, result["answer"]))
66
+ return "", chat_history
67
+ except Exception as e:
68
+ raise gr.Error(f"Error processing query: {str(e)}")
69
+
70
+ with gr.Blocks(title="PDF Chatbot", theme=gr.themes.Soft()) as app:
71
+ gr.Markdown("# 📄 DocuBuddy - Ask Me Questions About Your Document")
72
+
73
+ # State variables
74
+ conversation_chain = gr.State(None)
75
+
76
+ with gr.Row():
77
+ with gr.Column(scale=1):
78
+ api_key = gr.Textbox(
79
+ label="OpenAI API Key",
80
+ type="password",
81
+ placeholder="Enter your OpenAI API key (sk-...)"
82
+ )
83
+ upload_btn = gr.UploadButton(
84
+ "📁 Upload PDF",
85
+ file_types=[".pdf"],
86
+ file_count="single"
87
+ )
88
+
89
+ chatbot = gr.Chatbot(label="Conversation", height=500)
90
+ query = gr.Textbox(label="Your Question", placeholder="Type your question here...")
91
+ clear_btn = gr.ClearButton([query, chatbot])
92
+
93
+ # Event handlers
94
+ upload_btn.upload(
95
+ upload_file,
96
+ [upload_btn, api_key, chatbot],
97
+ [conversation_chain, chatbot]
98
+ )
99
+
100
+ query.submit(
101
+ respond,
102
+ [query, chatbot, conversation_chain],
103
+ [query, chatbot]
104
+ )
105
+
106
+ if __name__ == "__main__":
107
+ app.launch(share=True)