ryanrwatkins commited on
Commit
42cf399
Β·
1 Parent(s): b15e4ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -25
app.py CHANGED
@@ -96,32 +96,32 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
96
  #vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
97
  #vectordb.persist()
98
 
99
- #path = './files'
100
- #pdf_files = glob.glob(os.path.join(path, "*.pdf"))
101
 
102
- #merger = PdfWriter()
103
 
104
  # add all file in the list to the merger object
105
- #for pdf in pdf_files:
106
- # merger.append(pdf)
107
- #merger.write("merged-pdf.pdf")
108
- #merger.close()
109
 
110
- #reader = PdfReader("merged-pdf.pdf")
111
- #raw_text = ''
112
- #for i, page in enumerate(reader.pages):
113
- # text = page.extract_text()
114
- # if text:
115
- # raw_text += text
116
- #text_splitter = CharacterTextSplitter(
117
- # separator = "\n",
118
- # chunk_size = 1000,
119
- # chunk_overlap = 200,
120
- # length_function = len,
121
- #)
122
- #texts = text_splitter.split_text(raw_text)
123
- #len(texts)
124
- #embeddings = OpenAIEmbeddings()
125
 
126
 
127
  #with open("foo.pkl", 'wb') as f:
@@ -161,10 +161,10 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
161
 
162
  # completion = completion({"question": query, "chat_history": history[-context_length*2:]})
163
 
164
- with open("foo.pkl", 'rb') as f:
165
- new_docsearch = pickle.load(f)
166
 
167
- docsearch = FAISS.from_texts(texts, new_docsearch)
168
  query = str(system_prompt + history[-context_length*2:] + [prompt_msg])
169
  docs = docsearch.similarity_search(query)
170
  #print(docs[0].page_content)
 
96
  #vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
97
  #vectordb.persist()
98
 
99
+ path = './files'
100
+ pdf_files = glob.glob(os.path.join(path, "*.pdf"))
101
 
102
+ merger = PdfWriter()
103
 
104
  # add all file in the list to the merger object
105
+ for pdf in pdf_files:
106
+ merger.append(pdf)
107
+ merger.write("merged-pdf.pdf")
108
+ merger.close()
109
 
110
+ reader = PdfReader("merged-pdf.pdf")
111
+ raw_text = ''
112
+ for i, page in enumerate(reader.pages):
113
+ text = page.extract_text()
114
+ if text:
115
+ raw_text += text
116
+ text_splitter = CharacterTextSplitter(
117
+ separator = "\n",
118
+ chunk_size = 1000,
119
+ chunk_overlap = 200,
120
+ length_function = len,
121
+ )
122
+ texts = text_splitter.split_text(raw_text)
123
+ len(texts)
124
+ embeddings = OpenAIEmbeddings()
125
 
126
 
127
  #with open("foo.pkl", 'wb') as f:
 
161
 
162
  # completion = completion({"question": query, "chat_history": history[-context_length*2:]})
163
 
164
+ #with open("foo.pkl", 'rb') as f:
165
+ # new_docsearch = pickle.load(f)
166
 
167
+ docsearch = FAISS.from_texts(texts, embeddings)
168
  query = str(system_prompt + history[-context_length*2:] + [prompt_msg])
169
  docs = docsearch.similarity_search(query)
170
  #print(docs[0].page_content)