Spaces:

DrishtiSharma
/

chat-w-google-patents

Running

App Files Files Community

DrishtiSharma commited on Dec 20, 2024

Commit

4b3c99f

verified ·

1 Parent(s): 6a9fa88

Update test.py

Browse files

Files changed (1) hide show

test.py +21 -21

test.py CHANGED Viewed

@@ -5,18 +5,25 @@ import shutil
 import time
 import streamlit as st
 import nltk
-# Ensure NLTK 'punkt' resource is downloaded
-nltk_data_path = os.path.join(os.getcwd(), "nltk_data")
-os.makedirs(nltk_data_path, exist_ok=True)
-nltk.data.path.append(nltk_data_path)
-# Force download of the 'punkt' resource
 try:
-    print("Ensuring NLTK 'punkt' resource is downloaded...")
-    nltk.download("punkt", download_dir=nltk_data_path)
 except Exception as e:
-    print(f"Error downloading NLTK 'punkt': {e}")
 sys.path.append(os.path.abspath("."))
 from langchain.chains import ConversationalRetrievalChain
@@ -28,7 +35,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import NLTKTextSplitter
 from patent_downloader import PatentDownloader
-PERSISTED_DIRECTORY = "."
 # Fetch API key securely from the environment
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -50,7 +57,7 @@ def load_docs(document_path):
             document_path,
             mode="elements",
             strategy="fast",
-            ocr_languages=None  # Explicitly disable OCR
         )
         documents = loader.load()
         text_splitter = NLTKTextSplitter(chunk_size=1000)
@@ -106,8 +113,8 @@ def extract_patent_number(url):
 def download_pdf(patent_number):
     try:
         patent_downloader = PatentDownloader(verbose=True)
-        output_path = patent_downloader.download(patents=patent_number)
-        return output_path[0]  # Return the first file path
     except Exception as e:
         st.error(f"Failed to download patent PDF: {e}")
         st.stop()
@@ -121,7 +128,6 @@ if __name__ == "__main__":
     )
     st.header("📖 Patent Chat: Google Patents Chat Demo")
-    # Allow user to input the Google patent link
     patent_link = st.text_input("Enter Google Patent Link:", key="PATENT_LINK")
     if not patent_link:
@@ -135,8 +141,7 @@ if __name__ == "__main__":
     st.write(f"Patent number: **{patent_number}**")
-    # Download the PDF file
-    pdf_path = f"{patent_number}.pdf"
     if os.path.isfile(pdf_path):
         st.write("✅ File already downloaded.")
     else:
@@ -144,29 +149,24 @@ if __name__ == "__main__":
         pdf_path = download_pdf(patent_number)
         st.write(f"✅ File downloaded: {pdf_path}")
-    # Load the conversational chain
     st.write("🔄 Loading document into the system...")
     chain = load_chain(pdf_path)
     st.success("🚀 Document successfully loaded! You can now start asking questions.")
-    # Initialize the chat
     if "messages" not in st.session_state:
         st.session_state["messages"] = [
             {"role": "assistant", "content": "Hello! How can I assist you with this patent?"}
         ]
-    # Display chat history
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
-    # User input
     if user_input := st.chat_input("What is your question?"):
         st.session_state.messages.append({"role": "user", "content": user_input})
         with st.chat_message("user"):
             st.markdown(user_input)
-        # Generate assistant response
         with st.chat_message("assistant"):
             message_placeholder = st.empty()
             full_response = ""
@@ -176,7 +176,7 @@ if __name__ == "__main__":
                 assistant_response = chain({"question": user_input})
                 for chunk in assistant_response["answer"].split():
                     full_response += chunk + " "
-                    time.sleep(0.05)  # Simulate typing effect
                     message_placeholder.markdown(full_response + "▌")
             except Exception as e:
                 full_response = f"An error occurred: {e}"

 import time
 import streamlit as st
 import nltk
+import tempfile
+import subprocess
+# Pin NLTK to version 3.9.1
+REQUIRED_NLTK_VERSION = "3.9.1"
+subprocess.run([sys.executable, "-m", "pip", "install", f"nltk=={REQUIRED_NLTK_VERSION}"])
+# Set up temporary directory for NLTK resources
+nltk_data_path = os.path.join(tempfile.gettempdir(), "nltk_data")
+os.makedirs(nltk_data_path, exist_ok=True)
+nltk.data.path.append(nltk_data_path)
+# Download 'punkt_tab' for compatibility
 try:
+    print("Ensuring NLTK 'punkt_tab' resource is downloaded...")
+    nltk.download("punkt_tab", download_dir=nltk_data_path)
 except Exception as e:
+    print(f"Error downloading NLTK 'punkt_tab': {e}")
+    raise e
 sys.path.append(os.path.abspath("."))
 from langchain.chains import ConversationalRetrievalChain
 from langchain.text_splitter import NLTKTextSplitter
 from patent_downloader import PatentDownloader
+PERSISTED_DIRECTORY = tempfile.mkdtemp()
 # Fetch API key securely from the environment
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
             document_path,
             mode="elements",
             strategy="fast",
+            ocr_languages=None
         )
         documents = loader.load()
         text_splitter = NLTKTextSplitter(chunk_size=1000)
 def download_pdf(patent_number):
     try:
         patent_downloader = PatentDownloader(verbose=True)
+        output_path = patent_downloader.download(patents=patent_number, output_path=tempfile.gettempdir())
+        return output_path[0]
     except Exception as e:
         st.error(f"Failed to download patent PDF: {e}")
         st.stop()
     )
     st.header("📖 Patent Chat: Google Patents Chat Demo")
     patent_link = st.text_input("Enter Google Patent Link:", key="PATENT_LINK")
     if not patent_link:
     st.write(f"Patent number: **{patent_number}**")
+    pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
     if os.path.isfile(pdf_path):
         st.write("✅ File already downloaded.")
     else:
         pdf_path = download_pdf(patent_number)
         st.write(f"✅ File downloaded: {pdf_path}")
     st.write("🔄 Loading document into the system...")
     chain = load_chain(pdf_path)
     st.success("🚀 Document successfully loaded! You can now start asking questions.")
     if "messages" not in st.session_state:
         st.session_state["messages"] = [
             {"role": "assistant", "content": "Hello! How can I assist you with this patent?"}
         ]
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
     if user_input := st.chat_input("What is your question?"):
         st.session_state.messages.append({"role": "user", "content": user_input})
         with st.chat_message("user"):
             st.markdown(user_input)
         with st.chat_message("assistant"):
             message_placeholder = st.empty()
             full_response = ""
                 assistant_response = chain({"question": user_input})
                 for chunk in assistant_response["answer"].split():
                     full_response += chunk + " "
+                    time.sleep(0.05)
                     message_placeholder.markdown(full_response + "▌")
             except Exception as e:
                 full_response = f"An error occurred: {e}"