danicafisher commited on
Commit
354026c
1 Parent(s): 61170c1
Files changed (1) hide show
  1. app.py +23 -50
app.py CHANGED
@@ -1,60 +1,36 @@
1
- # from typing import List
2
- # from chainlit.types import AskFileResponse
 
3
  from aimakerspace.text_utils import CharacterTextSplitter, PDFFileLoader
4
  from aimakerspace.openai_utils.prompts import (
5
  UserRolePrompt,
6
  SystemRolePrompt,
 
7
  )
8
  from aimakerspace.openai_utils.embedding import EmbeddingModel
9
  from aimakerspace.vectordatabase import VectorDatabase
10
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
11
  import chainlit as cl
12
- # import asyncio
13
- # from operator import itemgetter
14
  import nest_asyncio
15
  nest_asyncio.apply()
16
- from langchain_community.document_loaders import PyMuPDFLoader
17
- from langchain_text_splitters import RecursiveCharacterTextSplitter
18
 
19
 
20
- filepath_NIST = "data/NIST.AI.600-1.pdf"
21
- filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
 
 
22
 
23
- text_splitter = RecursiveCharacterTextSplitter(
24
- chunk_size = 500,
25
- chunk_overlap = 50
26
- )
27
-
28
- documents_NIST = PyMuPDFLoader(filepath_NIST).load()
29
- documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
30
-
31
- split_NIST = text_splitter.split_documents(documents_NIST)
32
- split_Blueprint = text_splitter.split_documents(documents_Blueprint)
33
-
34
- # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
35
-
36
- # vectorstore = Qdrant.from_documents(
37
- # documents=rag_documents,
38
- # embedding=embeddings,
39
- # location=":memory:",
40
- # collection_name="Implications of AI"
41
- # )
42
- # retriever = qdrant_vectorstore.as_retriever()
43
-
44
- RAG_PROMPT = """\
45
- Given a provided context and question, you must answer the question based only on context.
46
-
47
- If you cannot answer the question based on the context - you must say "I don't know".
48
 
49
- Context: {context}
50
- Question: {question}
51
- """
52
-
53
- # prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
54
 
55
  RAG_PROMPT_TEMPLATE = """ \
56
  Use the provided context to answer the user's query.
 
57
  You may not answer the user's query unless there is specific context in the following text.
 
58
  If you do not know the answer, or cannot answer, please respond with "I don't know".
59
  """
60
 
@@ -63,6 +39,7 @@ rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)
63
  USER_PROMPT_TEMPLATE = """ \
64
  Context:
65
  {context}
 
66
  User Query:
67
  {user_query}
68
  """
@@ -106,25 +83,21 @@ async def start_chat():
106
  # "presence_penalty": 0,
107
  # }
108
 
109
- # # Create a dict vector store
110
  vector_db = VectorDatabase()
111
- # vector_db = await vector_db.abuild_from_list(rag_documents)
112
- vector_db = await vector_db.abuild_from_list(split_NIST)
113
- vector_db = await vector_db.abuild_from_list(split_Blueprint)
114
 
115
- # # chat_openai = ChatOpenAI()
116
- llm = ChatOpenAI(model="gpt-4o-mini", tags=["base_llm"])
117
-
118
 
119
- # # Create a chain
120
- rag_chain = RetrievalAugmentedQAPipeline(
121
  vector_db_retriever=vector_db,
122
- llm=llm
123
  )
124
 
125
-
126
  # cl.user_session.set("settings", settings)
127
- cl.user_session.set("chain", rag_chain)
128
 
129
 
130
  @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
 
1
+ import os
2
+ from typing import List
3
+ from chainlit.types import AskFileResponse
4
  from aimakerspace.text_utils import CharacterTextSplitter, PDFFileLoader
5
  from aimakerspace.openai_utils.prompts import (
6
  UserRolePrompt,
7
  SystemRolePrompt,
8
+ AssistantRolePrompt,
9
  )
10
  from aimakerspace.openai_utils.embedding import EmbeddingModel
11
  from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
+ import asyncio
 
15
  import nest_asyncio
16
  nest_asyncio.apply()
 
 
17
 
18
 
19
+ pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
20
+ pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
21
+ documents_NIST = pdf_loader_NIST.load_documents()
22
+ documents_Blueprint = pdf_loader_Blueprint.load_documents()
23
 
24
+ text_splitter = CharacterTextSplitter()
25
+ split_documents_NIST = text_splitter.split_texts(documents_NIST)
26
+ split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
28
 
29
  RAG_PROMPT_TEMPLATE = """ \
30
  Use the provided context to answer the user's query.
31
+
32
  You may not answer the user's query unless there is specific context in the following text.
33
+
34
  If you do not know the answer, or cannot answer, please respond with "I don't know".
35
  """
36
 
 
39
  USER_PROMPT_TEMPLATE = """ \
40
  Context:
41
  {context}
42
+
43
  User Query:
44
  {user_query}
45
  """
 
83
  # "presence_penalty": 0,
84
  # }
85
 
86
+ # Create a dict vector store
87
  vector_db = VectorDatabase()
88
+ vector_db = await vector_db.abuild_from_list(split_documents_NIST)
89
+ vector_db = await vector_db.abuild_from_list(split_documents_Blueprint)
 
90
 
91
+ chat_openai = ChatOpenAI()
 
 
92
 
93
+ # Create a chain
94
+ retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
95
  vector_db_retriever=vector_db,
96
+ llm=chat_openai
97
  )
98
 
 
99
  # cl.user_session.set("settings", settings)
100
+ cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
101
 
102
 
103
  @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user