Spaces:

glyphicai
/

casestudyqa

Runtime error

App Files Files Community

DylanASHillier commited on Jun 14, 2023

Commit

67f7d43

•

1 Parent(s): eb26f08

fixes caching

Browse files

Files changed (1) hide show

streamlit.py +19 -12

streamlit.py CHANGED Viewed

@@ -125,7 +125,6 @@ class ChatModel:
         return ChatModel._chat_model
 # anthropic_semaphore = asyncio.Semaphore(5)
 @backoff.on_exception(backoff.expo,
                       exception=ANTHROPIC_ERRORS_FOR_BACKOFF,
                       base=ANTHROPIC_BACKOFF_BASE,
@@ -260,8 +259,11 @@ import langchain.text_splitter as lc_text_splitter
 embeddings = lc_embeddings.OpenAIEmbeddings(
         openai_api_key=OPENAI_API_KEY)
-workableVectorDB = NumpyVectorDB(embeddings, EMBEDDING_DIM)
 # """Module provides a reusable retrieval chain
 # """
@@ -282,11 +284,16 @@ QUERY_MESSAGES: list[tuple[Roles, str]] = [
     ),
     (Roles.HUMAN, "Great let me think about that for a second.")
 ]
 # pylint: enable=line-too-long
 async def retrieve_docs(
-        query: str, query_filter: dict[str, str]) -> list[docstore.Document]:
     # """Retrieves documents for a query
     # Args:
@@ -300,10 +307,11 @@ async def retrieve_docs(
     print("Retrieving docs for query %s and filter %s")
     retriever = workableVectorDB.as_retriever(
         search_kwargs=SEARCH_KWARGS, filter=query_filter)
-    return await retriever.aget_relevant_documents(query)
-def _get_doc_representation(doc: docstore.Document) -> str:
     metadata = doc.metadata
     content = doc.page_content
     if "call_id" in metadata:
@@ -315,13 +323,11 @@ def _get_doc_representation(doc: docstore.Document) -> str:
     return content
-async def _combine_docs(docs: list[docstore.Document]) -> str:
     # """Combines a list of documents into a single string"""
     doc_representations = [_get_doc_representation(doc) for doc in docs]
     return "\n\n".join(doc_representations)
 async def answer_question(question: str, docs: str):
     # """Answers a question given a query and a list of documents"""
     messages = QUERY_MESSAGES.copy()
@@ -335,7 +341,6 @@ async def answer_question(question: str, docs: str):
                   "The user will be unable to ask follow up questions.")]
     return await chat_query_anthropic(messages)
 async def run_query(query: str, query_filter: dict[str, str]) -> str:
     # """Runs a query on the retrieval chain
@@ -388,6 +393,7 @@ for page in PAGES:
             workable_customers.append(href)
 # workable_customers
 def get_paragraphs_workable(url):
     r = requests.get(url=url, headers=headers)
@@ -416,6 +422,7 @@ def loop(input):
         input = clean_text(input)
     return input
 def get_case_studies():
     workable_case_studies = []
     # for customer in customers:
@@ -434,10 +441,10 @@ workable_case_studies = get_case_studies()
 for (url, case_study) in workable_case_studies:
     workableVectorDB.add_texts([case_study], [{"url": url}])
 def get_answer(question):
     response = asyncio.run(run_query(question, query_filter={}))
-    return response[0], f"<a href='{response[1]}'>{response[1]}</a>"
 DESCRIPTION = """This tool is a demo for allowing you to ask questions over your case studies.

         return ChatModel._chat_model
 # anthropic_semaphore = asyncio.Semaphore(5)
 @backoff.on_exception(backoff.expo,
                       exception=ANTHROPIC_ERRORS_FOR_BACKOFF,
                       base=ANTHROPIC_BACKOFF_BASE,
 embeddings = lc_embeddings.OpenAIEmbeddings(
         openai_api_key=OPENAI_API_KEY)
+@st.cache_resource()
+def get_workable_vector_db() -> base_vc.VectorStore:
+    return NumpyVectorDB(embeddings, EMBEDDING_DIM)
+workableVectorDB = get_workable_vector_db()
 # """Module provides a reusable retrieval chain
 # """
     ),
     (Roles.HUMAN, "Great let me think about that for a second.")
 ]
+from dataclasses import dataclass
+@dataclass
+class HashableDoc():
+    page_content: str
+    metadata: dict[str, str]
 # pylint: enable=line-too-long
 async def retrieve_docs(
+        query: str, query_filter: dict[str, str]) -> list[HashableDoc]:
     # """Retrieves documents for a query
     # Args:
     print("Retrieving docs for query %s and filter %s")
     retriever = workableVectorDB.as_retriever(
         search_kwargs=SEARCH_KWARGS, filter=query_filter)
+    docs = await retriever.aget_relevant_documents(query)
+    return [HashableDoc(page_content=doc.page_content, metadata=doc.metadata) for doc in docs]
+@st.cache_data
+def _get_doc_representation(doc: HashableDoc) -> str:
     metadata = doc.metadata
     content = doc.page_content
     if "call_id" in metadata:
     return content
+async def _combine_docs(docs: list[HashableDoc]) -> str:
     # """Combines a list of documents into a single string"""
     doc_representations = [_get_doc_representation(doc) for doc in docs]
     return "\n\n".join(doc_representations)
 async def answer_question(question: str, docs: str):
     # """Answers a question given a query and a list of documents"""
     messages = QUERY_MESSAGES.copy()
                   "The user will be unable to ask follow up questions.")]
     return await chat_query_anthropic(messages)
 async def run_query(query: str, query_filter: dict[str, str]) -> str:
     # """Runs a query on the retrieval chain
             workable_customers.append(href)
 # workable_customers
+@st.cache_data
 def get_paragraphs_workable(url):
     r = requests.get(url=url, headers=headers)
         input = clean_text(input)
     return input
+@st.cache_data
 def get_case_studies():
     workable_case_studies = []
     # for customer in customers:
 for (url, case_study) in workable_case_studies:
     workableVectorDB.add_texts([case_study], [{"url": url}])
+@st.cache_data
 def get_answer(question):
     response = asyncio.run(run_query(question, query_filter={}))
+    return response[0], f"{response[1]}"
 DESCRIPTION = """This tool is a demo for allowing you to ask questions over your case studies.