danicafisher commited on
Commit
61170c1
1 Parent(s): 8fddebd

Fixes documents

Browse files
Files changed (1) hide show
  1. app.py +8 -13
app.py CHANGED
@@ -15,26 +15,21 @@ import nest_asyncio
15
  nest_asyncio.apply()
16
  from langchain_community.document_loaders import PyMuPDFLoader
17
  from langchain_text_splitters import RecursiveCharacterTextSplitter
18
- # from langchain_openai import ChatOpenAI, OpenAIEmbeddings
19
- # from langchain_community.vectorstores import Qdrant
20
- # from langchain.prompts import ChatPromptTemplate
21
- # from langchain_core.runnables import RunnablePassthrough
22
 
23
 
24
  filepath_NIST = "data/NIST.AI.600-1.pdf"
25
  filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
26
 
27
- documents_NIST = PyMuPDFLoader(filepath_NIST).load()
28
- documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
29
- documents = documents_NIST + documents_Blueprint
30
-
31
-
32
  text_splitter = RecursiveCharacterTextSplitter(
33
  chunk_size = 500,
34
  chunk_overlap = 50
35
  )
36
 
37
- rag_documents = text_splitter.split_documents(documents)
 
 
 
 
38
 
39
  # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
40
 
@@ -113,9 +108,9 @@ async def start_chat():
113
 
114
  # # Create a dict vector store
115
  vector_db = VectorDatabase()
116
- vector_db = await vector_db.abuild_from_list(rag_documents)
117
- # vector_db = await vector_db.abuild_from_list(split_documents_NIST)
118
- # vector_db = await vector_db.abuild_from_list(split_documents_Blueprint)
119
 
120
  # # chat_openai = ChatOpenAI()
121
  llm = ChatOpenAI(model="gpt-4o-mini", tags=["base_llm"])
 
15
  nest_asyncio.apply()
16
  from langchain_community.document_loaders import PyMuPDFLoader
17
  from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
 
 
18
 
19
 
20
  filepath_NIST = "data/NIST.AI.600-1.pdf"
21
  filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
22
 
 
 
 
 
 
23
  text_splitter = RecursiveCharacterTextSplitter(
24
  chunk_size = 500,
25
  chunk_overlap = 50
26
  )
27
 
28
+ documents_NIST = PyMuPDFLoader(filepath_NIST).load()
29
+ documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
30
+
31
+ split_NIST = text_splitter.split_documents(documents_NIST)
32
+ split_Blueprint = text_splitter.split_documents(documents_Blueprint)
33
 
34
  # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
35
 
 
108
 
109
  # # Create a dict vector store
110
  vector_db = VectorDatabase()
111
+ # vector_db = await vector_db.abuild_from_list(rag_documents)
112
+ vector_db = await vector_db.abuild_from_list(split_NIST)
113
+ vector_db = await vector_db.abuild_from_list(split_Blueprint)
114
 
115
  # # chat_openai = ChatOpenAI()
116
  llm = ChatOpenAI(model="gpt-4o-mini", tags=["base_llm"])