DrishtiSharma commited on
Commit
fc4ce9d
·
verified ·
1 Parent(s): 869c7af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py CHANGED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import re
4
+ import shutil
5
+ import time
6
+ import fitz
7
+ import streamlit as st
8
+ import nltk
9
+ import tempfile
10
+ import subprocess
11
+
12
+ # Pin NLTK to version 3.9.1
13
+ REQUIRED_NLTK_VERSION = "3.9.1"
14
+ subprocess.run([sys.executable, "-m", "pip", "install", f"nltk=={REQUIRED_NLTK_VERSION}"])
15
+
16
+ # Set up temporary directory for NLTK resources
17
+ nltk_data_path = os.path.join(tempfile.gettempdir(), "nltk_data")
18
+ os.makedirs(nltk_data_path, exist_ok=True)
19
+ nltk.data.path.append(nltk_data_path)
20
+
21
+ # Download 'punkt_tab' for compatibility
22
+ try:
23
+ print("Ensuring NLTK 'punkt_tab' resource is downloaded...")
24
+ nltk.download("punkt_tab", download_dir=nltk_data_path)
25
+ except Exception as e:
26
+ print(f"Error downloading NLTK 'punkt_tab': {e}")
27
+ raise e
28
+
29
+ sys.path.append(os.path.abspath("."))
30
+ from langchain.chains import ConversationalRetrievalChain
31
+ from langchain.memory import ConversationBufferMemory
32
+ from langchain.llms import OpenAI
33
+ from langchain.document_loaders import UnstructuredPDFLoader
34
+ from langchain.vectorstores import Chroma
35
+ from langchain.embeddings import HuggingFaceEmbeddings
36
+ from langchain.text_splitter import NLTKTextSplitter
37
+ from patent_downloader import PatentDownloader
38
+ from langchain.document_loaders import PyMuPDFLoader
39
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
40
+
41
+ PERSISTED_DIRECTORY = tempfile.mkdtemp()
42
+
43
+ # Fetch API key securely from the environment
44
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
45
+ if not OPENAI_API_KEY:
46
+ st.error("Critical Error: OpenAI API key not found in the environment variables. Please configure it.")
47
+ st.stop()
48
+
49
+ def check_poppler_installed():
50
+ if not shutil.which("pdfinfo"):
51
+ raise EnvironmentError(
52
+ "Poppler is not installed or not in PATH. Install 'poppler-utils' for PDF processing."
53
+ )
54
+
55
+ check_poppler_installed()