Spaces:
Sleeping
Sleeping
danicafisher
commited on
Commit
•
39a4e56
1
Parent(s):
3c72577
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from langchain_community.document_loaders import PyMuPDFLoader
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
@@ -10,10 +11,22 @@ from qdrant_client.http.models import Distance, VectorParams
|
|
10 |
from operator import itemgetter
|
11 |
import chainlit as cl
|
12 |
|
13 |
-
# Load the documents
|
14 |
-
pdf_loader_NIST = PyMuPDFLoader("data/NIST.AI.600-1.pdf").load()
|
15 |
-
pdf_loader_Blueprint = PyMuPDFLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
|
16 |
-
documents = pdf_loader_NIST + pdf_loader_Blueprint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# Split the documents
|
19 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
1 |
+
import os
|
2 |
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
|
|
11 |
from operator import itemgetter
|
12 |
import chainlit as cl
|
13 |
|
14 |
+
# # Load the documents
|
15 |
+
# pdf_loader_NIST = PyMuPDFLoader("data/NIST.AI.600-1.pdf").load()
|
16 |
+
# pdf_loader_Blueprint = PyMuPDFLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
|
17 |
+
# documents = pdf_loader_NIST + pdf_loader_Blueprint
|
18 |
+
|
19 |
+
# List to store all the documents
|
20 |
+
documents = []
|
21 |
+
directory = "data/"
|
22 |
+
|
23 |
+
# Iterate through all the files in the directory
|
24 |
+
for filename in os.listdir(directory):
|
25 |
+
if filename.endswith(".pdf"): # Check if the file is a PDF
|
26 |
+
file_path = os.path.join(directory, filename)
|
27 |
+
loader = PyMuPDFLoader(file_path)
|
28 |
+
docs = loader.load()
|
29 |
+
all_docs.extend(docs)
|
30 |
|
31 |
# Split the documents
|
32 |
text_splitter = RecursiveCharacterTextSplitter(
|