danicafisher commited on
Commit
ff4614b
1 Parent(s): ee4e058

Tests langchain

Browse files
Files changed (2) hide show
  1. app.py +31 -15
  2. requirements.txt +4 -1
app.py CHANGED
@@ -1,30 +1,45 @@
1
- import os
2
- from typing import List
3
- from chainlit.types import AskFileResponse
4
  from aimakerspace.text_utils import CharacterTextSplitter, PDFFileLoader
5
  from aimakerspace.openai_utils.prompts import (
6
  UserRolePrompt,
7
  SystemRolePrompt,
8
- AssistantRolePrompt,
9
  )
10
- from aimakerspace.openai_utils.embedding import EmbeddingModel
11
  from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
- import asyncio
15
  import nest_asyncio
16
  nest_asyncio.apply()
 
 
 
17
 
18
 
19
- pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
20
- pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
21
- documents_NIST = pdf_loader_NIST.load_documents()
22
- documents_Blueprint = pdf_loader_Blueprint.load_documents()
23
 
24
- text_splitter = CharacterTextSplitter()
25
- split_documents_NIST = text_splitter.split_texts(documents_NIST)
26
- split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  RAG_PROMPT_TEMPLATE = """ \
30
  Use the provided context to answer the user's query.
@@ -82,8 +97,9 @@ async def start_chat():
82
 
83
  # Create a dict vector store
84
  vector_db = VectorDatabase()
85
- vector_db = await vector_db.abuild_from_list(split_documents_NIST)
86
- vector_db = await vector_db.abuild_from_list(split_documents_Blueprint)
 
87
 
88
  chat_openai = ChatOpenAI()
89
 
 
1
+ # from typing import List
2
+ # from chainlit.types import AskFileResponse
 
3
  from aimakerspace.text_utils import CharacterTextSplitter, PDFFileLoader
4
  from aimakerspace.openai_utils.prompts import (
5
  UserRolePrompt,
6
  SystemRolePrompt,
 
7
  )
8
+ # from aimakerspace.openai_utils.embedding import EmbeddingModel
9
  from aimakerspace.vectordatabase import VectorDatabase
10
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
11
  import chainlit as cl
12
+ # import asyncio
13
  import nest_asyncio
14
  nest_asyncio.apply()
15
+ from langchain_community.document_loaders import PyMuPDFLoader
16
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
17
+ from langchain_community.embeddings import OpenAIEmbeddings
18
 
19
 
20
+ # pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
21
+ # pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
22
+ # documents_NIST = pdf_loader_NIST.load_documents()
23
+ # documents_Blueprint = pdf_loader_Blueprint.load_documents()
24
 
25
+ filepath_NIST = "data/NIST.AI.600-1.pdf"
26
+ filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
 
27
 
28
+ documents_NIST = PyMuPDFLoader(filepath_NIST).load()
29
+ documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
30
+ documents = documents_NIST + documents_Blueprint
31
+
32
+
33
+ # text_splitter = CharacterTextSplitter()
34
+ # split_documents_NIST = text_splitter.split_texts(documents_NIST)
35
+ # split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
36
+
37
+ text_splitter = RecursiveCharacterTextSplitter(
38
+ chunk_size = 500,
39
+ chunk_overlap = 50
40
+ )
41
+
42
+ rag_documents = text_splitter.split_documents(documents)
43
 
44
  RAG_PROMPT_TEMPLATE = """ \
45
  Use the provided context to answer the user's query.
 
97
 
98
  # Create a dict vector store
99
  vector_db = VectorDatabase()
100
+ vector_db = await vector_db.abuild_from_list(rag_documents)
101
+ # vector_db = await vector_db.abuild_from_list(split_documents_NIST)
102
+ # vector_db = await vector_db.abuild_from_list(split_documents_Blueprint)
103
 
104
  chat_openai = ChatOpenAI()
105
 
requirements.txt CHANGED
@@ -1,4 +1,7 @@
1
  numpy
2
  chainlit==0.7.700
3
  openai
4
- PyPDF2
 
 
 
 
1
  numpy
2
  chainlit==0.7.700
3
  openai
4
+ PyPDF2
5
+ langchain
6
+ langchain-community
7
+ langchain-text-splitters