DrishtiSharma commited on
Commit
4b3c99f
Β·
verified Β·
1 Parent(s): 6a9fa88

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +21 -21
test.py CHANGED
@@ -5,18 +5,25 @@ import shutil
5
  import time
6
  import streamlit as st
7
  import nltk
 
 
8
 
9
- # Ensure NLTK 'punkt' resource is downloaded
10
- nltk_data_path = os.path.join(os.getcwd(), "nltk_data")
11
- os.makedirs(nltk_data_path, exist_ok=True)
12
- nltk.data.path.append(nltk_data_path)
13
 
14
- # Force download of the 'punkt' resource
 
 
 
 
 
15
  try:
16
- print("Ensuring NLTK 'punkt' resource is downloaded...")
17
- nltk.download("punkt", download_dir=nltk_data_path)
18
  except Exception as e:
19
- print(f"Error downloading NLTK 'punkt': {e}")
 
20
 
21
  sys.path.append(os.path.abspath("."))
22
  from langchain.chains import ConversationalRetrievalChain
@@ -28,7 +35,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
28
  from langchain.text_splitter import NLTKTextSplitter
29
  from patent_downloader import PatentDownloader
30
 
31
- PERSISTED_DIRECTORY = "."
32
 
33
  # Fetch API key securely from the environment
34
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -50,7 +57,7 @@ def load_docs(document_path):
50
  document_path,
51
  mode="elements",
52
  strategy="fast",
53
- ocr_languages=None # Explicitly disable OCR
54
  )
55
  documents = loader.load()
56
  text_splitter = NLTKTextSplitter(chunk_size=1000)
@@ -106,8 +113,8 @@ def extract_patent_number(url):
106
  def download_pdf(patent_number):
107
  try:
108
  patent_downloader = PatentDownloader(verbose=True)
109
- output_path = patent_downloader.download(patents=patent_number)
110
- return output_path[0] # Return the first file path
111
  except Exception as e:
112
  st.error(f"Failed to download patent PDF: {e}")
113
  st.stop()
@@ -121,7 +128,6 @@ if __name__ == "__main__":
121
  )
122
  st.header("πŸ“– Patent Chat: Google Patents Chat Demo")
123
 
124
- # Allow user to input the Google patent link
125
  patent_link = st.text_input("Enter Google Patent Link:", key="PATENT_LINK")
126
 
127
  if not patent_link:
@@ -135,8 +141,7 @@ if __name__ == "__main__":
135
 
136
  st.write(f"Patent number: **{patent_number}**")
137
 
138
- # Download the PDF file
139
- pdf_path = f"{patent_number}.pdf"
140
  if os.path.isfile(pdf_path):
141
  st.write("βœ… File already downloaded.")
142
  else:
@@ -144,29 +149,24 @@ if __name__ == "__main__":
144
  pdf_path = download_pdf(patent_number)
145
  st.write(f"βœ… File downloaded: {pdf_path}")
146
 
147
- # Load the conversational chain
148
  st.write("πŸ”„ Loading document into the system...")
149
  chain = load_chain(pdf_path)
150
  st.success("πŸš€ Document successfully loaded! You can now start asking questions.")
151
 
152
- # Initialize the chat
153
  if "messages" not in st.session_state:
154
  st.session_state["messages"] = [
155
  {"role": "assistant", "content": "Hello! How can I assist you with this patent?"}
156
  ]
157
 
158
- # Display chat history
159
  for message in st.session_state.messages:
160
  with st.chat_message(message["role"]):
161
  st.markdown(message["content"])
162
 
163
- # User input
164
  if user_input := st.chat_input("What is your question?"):
165
  st.session_state.messages.append({"role": "user", "content": user_input})
166
  with st.chat_message("user"):
167
  st.markdown(user_input)
168
 
169
- # Generate assistant response
170
  with st.chat_message("assistant"):
171
  message_placeholder = st.empty()
172
  full_response = ""
@@ -176,7 +176,7 @@ if __name__ == "__main__":
176
  assistant_response = chain({"question": user_input})
177
  for chunk in assistant_response["answer"].split():
178
  full_response += chunk + " "
179
- time.sleep(0.05) # Simulate typing effect
180
  message_placeholder.markdown(full_response + "β–Œ")
181
  except Exception as e:
182
  full_response = f"An error occurred: {e}"
 
5
  import time
6
  import streamlit as st
7
  import nltk
8
+ import tempfile
9
+ import subprocess
10
 
11
+ # Pin NLTK to version 3.9.1
12
+ REQUIRED_NLTK_VERSION = "3.9.1"
13
+ subprocess.run([sys.executable, "-m", "pip", "install", f"nltk=={REQUIRED_NLTK_VERSION}"])
 
14
 
15
+ # Set up temporary directory for NLTK resources
16
+ nltk_data_path = os.path.join(tempfile.gettempdir(), "nltk_data")
17
+ os.makedirs(nltk_data_path, exist_ok=True)
18
+ nltk.data.path.append(nltk_data_path)
19
+
20
+ # Download 'punkt_tab' for compatibility
21
  try:
22
+ print("Ensuring NLTK 'punkt_tab' resource is downloaded...")
23
+ nltk.download("punkt_tab", download_dir=nltk_data_path)
24
  except Exception as e:
25
+ print(f"Error downloading NLTK 'punkt_tab': {e}")
26
+ raise e
27
 
28
  sys.path.append(os.path.abspath("."))
29
  from langchain.chains import ConversationalRetrievalChain
 
35
  from langchain.text_splitter import NLTKTextSplitter
36
  from patent_downloader import PatentDownloader
37
 
38
+ PERSISTED_DIRECTORY = tempfile.mkdtemp()
39
 
40
  # Fetch API key securely from the environment
41
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
57
  document_path,
58
  mode="elements",
59
  strategy="fast",
60
+ ocr_languages=None
61
  )
62
  documents = loader.load()
63
  text_splitter = NLTKTextSplitter(chunk_size=1000)
 
113
  def download_pdf(patent_number):
114
  try:
115
  patent_downloader = PatentDownloader(verbose=True)
116
+ output_path = patent_downloader.download(patents=patent_number, output_path=tempfile.gettempdir())
117
+ return output_path[0]
118
  except Exception as e:
119
  st.error(f"Failed to download patent PDF: {e}")
120
  st.stop()
 
128
  )
129
  st.header("πŸ“– Patent Chat: Google Patents Chat Demo")
130
 
 
131
  patent_link = st.text_input("Enter Google Patent Link:", key="PATENT_LINK")
132
 
133
  if not patent_link:
 
141
 
142
  st.write(f"Patent number: **{patent_number}**")
143
 
144
+ pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
 
145
  if os.path.isfile(pdf_path):
146
  st.write("βœ… File already downloaded.")
147
  else:
 
149
  pdf_path = download_pdf(patent_number)
150
  st.write(f"βœ… File downloaded: {pdf_path}")
151
 
 
152
  st.write("πŸ”„ Loading document into the system...")
153
  chain = load_chain(pdf_path)
154
  st.success("πŸš€ Document successfully loaded! You can now start asking questions.")
155
 
 
156
  if "messages" not in st.session_state:
157
  st.session_state["messages"] = [
158
  {"role": "assistant", "content": "Hello! How can I assist you with this patent?"}
159
  ]
160
 
 
161
  for message in st.session_state.messages:
162
  with st.chat_message(message["role"]):
163
  st.markdown(message["content"])
164
 
 
165
  if user_input := st.chat_input("What is your question?"):
166
  st.session_state.messages.append({"role": "user", "content": user_input})
167
  with st.chat_message("user"):
168
  st.markdown(user_input)
169
 
 
170
  with st.chat_message("assistant"):
171
  message_placeholder = st.empty()
172
  full_response = ""
 
176
  assistant_response = chain({"question": user_input})
177
  for chunk in assistant_response["answer"].split():
178
  full_response += chunk + " "
179
+ time.sleep(0.05)
180
  message_placeholder.markdown(full_response + "β–Œ")
181
  except Exception as e:
182
  full_response = f"An error occurred: {e}"