danicafisher commited on
Commit
d643080
1 Parent(s): d78b065
Files changed (2) hide show
  1. app.py +51 -65
  2. requirements.txt +3 -2
app.py CHANGED
@@ -5,20 +5,20 @@ from aimakerspace.openai_utils.prompts import (
5
  UserRolePrompt,
6
  SystemRolePrompt,
7
  )
8
- # from aimakerspace.openai_utils.embedding import EmbeddingModel
9
  from aimakerspace.vectordatabase import VectorDatabase
10
- # from aimakerspace.openai_utils.chatmodel import ChatOpenAI
11
  import chainlit as cl
12
  # import asyncio
13
- from operator import itemgetter
14
  import nest_asyncio
15
  nest_asyncio.apply()
16
  from langchain_community.document_loaders import PyMuPDFLoader
17
  from langchain_text_splitters import RecursiveCharacterTextSplitter
18
- from langchain_openai import ChatOpenAI, OpenAIEmbeddings
19
- from langchain_community.vectorstores import Qdrant
20
- from langchain.prompts import ChatPromptTemplate
21
- from langchain_core.runnables import RunnablePassthrough
22
 
23
 
24
  filepath_NIST = "data/NIST.AI.600-1.pdf"
@@ -36,15 +36,15 @@ text_splitter = RecursiveCharacterTextSplitter(
36
 
37
  rag_documents = text_splitter.split_documents(documents)
38
 
39
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
40
 
41
- vectorstore = Qdrant.from_documents(
42
- documents=rag_documents,
43
- embedding=embeddings,
44
- location=":memory:",
45
- collection_name="Implications of AI"
46
- )
47
- retriever = qdrant_vectorstore.as_retriever()
48
 
49
  RAG_PROMPT = """\
50
  Given a provided context and question, you must answer the question based only on context.
@@ -55,46 +55,46 @@ Context: {context}
55
  Question: {question}
56
  """
57
 
58
- prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
59
 
60
- # RAG_PROMPT_TEMPLATE = """ \
61
- # Use the provided context to answer the user's query.
62
- # You may not answer the user's query unless there is specific context in the following text.
63
- # If you do not know the answer, or cannot answer, please respond with "I don't know".
64
- # """
65
 
66
- # rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)
67
 
68
- # USER_PROMPT_TEMPLATE = """ \
69
- # Context:
70
- # {context}
71
- # User Query:
72
- # {user_query}
73
- # """
74
 
75
- # user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)
76
 
77
- # class RetrievalAugmentedQAPipeline:
78
- # def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
79
- # self.llm = llm
80
- # self.vector_db_retriever = vector_db_retriever
81
 
82
- # async def arun_pipeline(self, user_query: str):
83
- # context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
84
 
85
- # context_prompt = ""
86
- # for context in context_list:
87
- # context_prompt += context[0] + "\n"
88
 
89
- # formatted_system_prompt = rag_prompt.create_message()
90
 
91
- # formatted_user_prompt = user_prompt.create_message(user_query=user_query, context=context_prompt)
92
 
93
- # async def generate_response():
94
- # async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
95
- # yield chunk
96
 
97
- # return {"response": generate_response(), "context": context_list}
98
 
99
 
100
  # ------------------------------------------------------------
@@ -112,36 +112,22 @@ async def start_chat():
112
  # }
113
 
114
  # # Create a dict vector store
115
- # vector_db = VectorDatabase()
116
- # vector_db = await vector_db.abuild_from_list(rag_documents)
117
  # vector_db = await vector_db.abuild_from_list(split_documents_NIST)
118
  # vector_db = await vector_db.abuild_from_list(split_documents_Blueprint)
119
 
120
  # # chat_openai = ChatOpenAI()
121
- # llm = ChatOpenAI(model="gpt-4o-mini", tags=["base_llm"])
122
 
123
 
124
  # # Create a chain
125
- # retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
126
- # vector_db_retriever=vector_db,
127
- # llm=llm
128
- # )
129
- primary_llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
130
-
131
- rag_chain = (
132
- # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
133
- # "question" : populated by getting the value of the "question" key
134
- # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
135
- {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
136
- # "context" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
137
- # by getting the value of the "context" key from the previous step
138
- | RunnablePassthrough.assign(context=itemgetter("context"))
139
- # "response" : the "context" and "question" values are used to format our prompt object and then piped
140
- # into the LLM and stored in a key called "response"
141
- # "context" : populated by getting the value of the "context" key from the previous step
142
- | {"response": prompt | primary_llm, "context": itemgetter("context")}
143
  )
144
 
 
145
  # cl.user_session.set("settings", settings)
146
  cl.user_session.set("chain", rag_chain)
147
 
 
5
  UserRolePrompt,
6
  SystemRolePrompt,
7
  )
8
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
9
  from aimakerspace.vectordatabase import VectorDatabase
10
+ from aimakerspace.openai_utils.chatmodel import ChatOpenAI
11
  import chainlit as cl
12
  # import asyncio
13
+ # from operator import itemgetter
14
  import nest_asyncio
15
  nest_asyncio.apply()
16
  from langchain_community.document_loaders import PyMuPDFLoader
17
  from langchain_text_splitters import RecursiveCharacterTextSplitter
18
+ # from langchain_openai import ChatOpenAI, OpenAIEmbeddings
19
+ # from langchain_community.vectorstores import Qdrant
20
+ # from langchain.prompts import ChatPromptTemplate
21
+ # from langchain_core.runnables import RunnablePassthrough
22
 
23
 
24
  filepath_NIST = "data/NIST.AI.600-1.pdf"
 
36
 
37
  rag_documents = text_splitter.split_documents(documents)
38
 
39
+ # embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
40
 
41
+ # vectorstore = Qdrant.from_documents(
42
+ # documents=rag_documents,
43
+ # embedding=embeddings,
44
+ # location=":memory:",
45
+ # collection_name="Implications of AI"
46
+ # )
47
+ # retriever = qdrant_vectorstore.as_retriever()
48
 
49
  RAG_PROMPT = """\
50
  Given a provided context and question, you must answer the question based only on context.
 
55
  Question: {question}
56
  """
57
 
58
+ # prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
59
 
60
+ RAG_PROMPT_TEMPLATE = """ \
61
+ Use the provided context to answer the user's query.
62
+ You may not answer the user's query unless there is specific context in the following text.
63
+ If you do not know the answer, or cannot answer, please respond with "I don't know".
64
+ """
65
 
66
+ rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)
67
 
68
+ USER_PROMPT_TEMPLATE = """ \
69
+ Context:
70
+ {context}
71
+ User Query:
72
+ {user_query}
73
+ """
74
 
75
+ user_prompt = UserRolePrompt(USER_PROMPT_TEMPLATE)
76
 
77
+ class RetrievalAugmentedQAPipeline:
78
+ def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
79
+ self.llm = llm
80
+ self.vector_db_retriever = vector_db_retriever
81
 
82
+ async def arun_pipeline(self, user_query: str):
83
+ context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
84
 
85
+ context_prompt = ""
86
+ for context in context_list:
87
+ context_prompt += context[0] + "\n"
88
 
89
+ formatted_system_prompt = rag_prompt.create_message()
90
 
91
+ formatted_user_prompt = user_prompt.create_message(user_query=user_query, context=context_prompt)
92
 
93
+ async def generate_response():
94
+ async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
95
+ yield chunk
96
 
97
+ return {"response": generate_response(), "context": context_list}
98
 
99
 
100
  # ------------------------------------------------------------
 
112
  # }
113
 
114
  # # Create a dict vector store
115
+ vector_db = VectorDatabase()
116
+ vector_db = await vector_db.abuild_from_list(rag_documents)
117
  # vector_db = await vector_db.abuild_from_list(split_documents_NIST)
118
  # vector_db = await vector_db.abuild_from_list(split_documents_Blueprint)
119
 
120
  # # chat_openai = ChatOpenAI()
121
+ llm = ChatOpenAI(model="gpt-4o-mini", tags=["base_llm"])
122
 
123
 
124
  # # Create a chain
125
+ rag_chain = RetrievalAugmentedQAPipeline(
126
+ vector_db_retriever=vector_db,
127
+ llm=llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  )
129
 
130
+
131
  # cl.user_session.set("settings", settings)
132
  cl.user_session.set("chain", rag_chain)
133
 
requirements.txt CHANGED
@@ -7,5 +7,6 @@ langchain
7
  langchain-core
8
  langchain-community
9
  langchain-text-splitters
10
- langchain-openai
11
- qdrant-client
 
 
7
  langchain-core
8
  langchain-community
9
  langchain-text-splitters
10
+ # langchain-openai
11
+ # qdrant-client
12
+ # langchain-qdrant