Spaces:
Runtime error
Runtime error
Commit
Β·
42cf399
1
Parent(s):
b15e4ae
Update app.py
Browse files
app.py
CHANGED
@@ -96,32 +96,32 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
|
|
96 |
#vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
|
97 |
#vectordb.persist()
|
98 |
|
99 |
-
|
100 |
-
|
101 |
|
102 |
-
|
103 |
|
104 |
# add all file in the list to the merger object
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
|
127 |
#with open("foo.pkl", 'wb') as f:
|
@@ -161,10 +161,10 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
|
|
161 |
|
162 |
# completion = completion({"question": query, "chat_history": history[-context_length*2:]})
|
163 |
|
164 |
-
with open("foo.pkl", 'rb') as f:
|
165 |
-
|
166 |
|
167 |
-
docsearch = FAISS.from_texts(texts,
|
168 |
query = str(system_prompt + history[-context_length*2:] + [prompt_msg])
|
169 |
docs = docsearch.similarity_search(query)
|
170 |
#print(docs[0].page_content)
|
|
|
96 |
#vectordb = Chroma.from_documents(split_pages, embeddings, persist_directory=persist_directory)
|
97 |
#vectordb.persist()
|
98 |
|
99 |
+
path = './files'
|
100 |
+
pdf_files = glob.glob(os.path.join(path, "*.pdf"))
|
101 |
|
102 |
+
merger = PdfWriter()
|
103 |
|
104 |
# add all file in the list to the merger object
|
105 |
+
for pdf in pdf_files:
|
106 |
+
merger.append(pdf)
|
107 |
+
merger.write("merged-pdf.pdf")
|
108 |
+
merger.close()
|
109 |
|
110 |
+
reader = PdfReader("merged-pdf.pdf")
|
111 |
+
raw_text = ''
|
112 |
+
for i, page in enumerate(reader.pages):
|
113 |
+
text = page.extract_text()
|
114 |
+
if text:
|
115 |
+
raw_text += text
|
116 |
+
text_splitter = CharacterTextSplitter(
|
117 |
+
separator = "\n",
|
118 |
+
chunk_size = 1000,
|
119 |
+
chunk_overlap = 200,
|
120 |
+
length_function = len,
|
121 |
+
)
|
122 |
+
texts = text_splitter.split_text(raw_text)
|
123 |
+
len(texts)
|
124 |
+
embeddings = OpenAIEmbeddings()
|
125 |
|
126 |
|
127 |
#with open("foo.pkl", 'wb') as f:
|
|
|
161 |
|
162 |
# completion = completion({"question": query, "chat_history": history[-context_length*2:]})
|
163 |
|
164 |
+
#with open("foo.pkl", 'rb') as f:
|
165 |
+
# new_docsearch = pickle.load(f)
|
166 |
|
167 |
+
docsearch = FAISS.from_texts(texts, embeddings)
|
168 |
query = str(system_prompt + history[-context_length*2:] + [prompt_msg])
|
169 |
docs = docsearch.similarity_search(query)
|
170 |
#print(docs[0].page_content)
|