DylanASHillier commited on
Commit
67f7d43
1 Parent(s): eb26f08

fixes caching

Browse files
Files changed (1) hide show
  1. streamlit.py +19 -12
streamlit.py CHANGED
@@ -125,7 +125,6 @@ class ChatModel:
125
  return ChatModel._chat_model
126
 
127
  # anthropic_semaphore = asyncio.Semaphore(5)
128
-
129
  @backoff.on_exception(backoff.expo,
130
  exception=ANTHROPIC_ERRORS_FOR_BACKOFF,
131
  base=ANTHROPIC_BACKOFF_BASE,
@@ -260,8 +259,11 @@ import langchain.text_splitter as lc_text_splitter
260
  embeddings = lc_embeddings.OpenAIEmbeddings(
261
  openai_api_key=OPENAI_API_KEY)
262
 
263
- workableVectorDB = NumpyVectorDB(embeddings, EMBEDDING_DIM)
 
 
264
 
 
265
  # """Module provides a reusable retrieval chain
266
  # """
267
 
@@ -282,11 +284,16 @@ QUERY_MESSAGES: list[tuple[Roles, str]] = [
282
  ),
283
  (Roles.HUMAN, "Great let me think about that for a second.")
284
  ]
 
285
 
 
 
 
 
286
 
287
  # pylint: enable=line-too-long
288
  async def retrieve_docs(
289
- query: str, query_filter: dict[str, str]) -> list[docstore.Document]:
290
  # """Retrieves documents for a query
291
 
292
  # Args:
@@ -300,10 +307,11 @@ async def retrieve_docs(
300
  print("Retrieving docs for query %s and filter %s")
301
  retriever = workableVectorDB.as_retriever(
302
  search_kwargs=SEARCH_KWARGS, filter=query_filter)
303
- return await retriever.aget_relevant_documents(query)
304
-
305
 
306
- def _get_doc_representation(doc: docstore.Document) -> str:
 
307
  metadata = doc.metadata
308
  content = doc.page_content
309
  if "call_id" in metadata:
@@ -315,13 +323,11 @@ def _get_doc_representation(doc: docstore.Document) -> str:
315
 
316
  return content
317
 
318
-
319
- async def _combine_docs(docs: list[docstore.Document]) -> str:
320
  # """Combines a list of documents into a single string"""
321
  doc_representations = [_get_doc_representation(doc) for doc in docs]
322
  return "\n\n".join(doc_representations)
323
 
324
-
325
  async def answer_question(question: str, docs: str):
326
  # """Answers a question given a query and a list of documents"""
327
  messages = QUERY_MESSAGES.copy()
@@ -335,7 +341,6 @@ async def answer_question(question: str, docs: str):
335
  "The user will be unable to ask follow up questions.")]
336
  return await chat_query_anthropic(messages)
337
 
338
-
339
  async def run_query(query: str, query_filter: dict[str, str]) -> str:
340
  # """Runs a query on the retrieval chain
341
 
@@ -388,6 +393,7 @@ for page in PAGES:
388
  workable_customers.append(href)
389
 
390
  # workable_customers
 
391
  def get_paragraphs_workable(url):
392
  r = requests.get(url=url, headers=headers)
393
 
@@ -416,6 +422,7 @@ def loop(input):
416
  input = clean_text(input)
417
  return input
418
 
 
419
  def get_case_studies():
420
  workable_case_studies = []
421
  # for customer in customers:
@@ -434,10 +441,10 @@ workable_case_studies = get_case_studies()
434
  for (url, case_study) in workable_case_studies:
435
  workableVectorDB.add_texts([case_study], [{"url": url}])
436
 
437
-
438
  def get_answer(question):
439
  response = asyncio.run(run_query(question, query_filter={}))
440
- return response[0], f"<a href='{response[1]}'>{response[1]}</a>"
441
 
442
  DESCRIPTION = """This tool is a demo for allowing you to ask questions over your case studies.
443
 
 
125
  return ChatModel._chat_model
126
 
127
  # anthropic_semaphore = asyncio.Semaphore(5)
 
128
  @backoff.on_exception(backoff.expo,
129
  exception=ANTHROPIC_ERRORS_FOR_BACKOFF,
130
  base=ANTHROPIC_BACKOFF_BASE,
 
259
  embeddings = lc_embeddings.OpenAIEmbeddings(
260
  openai_api_key=OPENAI_API_KEY)
261
 
262
+ @st.cache_resource()
263
+ def get_workable_vector_db() -> base_vc.VectorStore:
264
+ return NumpyVectorDB(embeddings, EMBEDDING_DIM)
265
 
266
+ workableVectorDB = get_workable_vector_db()
267
  # """Module provides a reusable retrieval chain
268
  # """
269
 
 
284
  ),
285
  (Roles.HUMAN, "Great let me think about that for a second.")
286
  ]
287
+ from dataclasses import dataclass
288
 
289
+ @dataclass
290
+ class HashableDoc():
291
+ page_content: str
292
+ metadata: dict[str, str]
293
 
294
  # pylint: enable=line-too-long
295
  async def retrieve_docs(
296
+ query: str, query_filter: dict[str, str]) -> list[HashableDoc]:
297
  # """Retrieves documents for a query
298
 
299
  # Args:
 
307
  print("Retrieving docs for query %s and filter %s")
308
  retriever = workableVectorDB.as_retriever(
309
  search_kwargs=SEARCH_KWARGS, filter=query_filter)
310
+ docs = await retriever.aget_relevant_documents(query)
311
+ return [HashableDoc(page_content=doc.page_content, metadata=doc.metadata) for doc in docs]
312
 
313
+ @st.cache_data
314
+ def _get_doc_representation(doc: HashableDoc) -> str:
315
  metadata = doc.metadata
316
  content = doc.page_content
317
  if "call_id" in metadata:
 
323
 
324
  return content
325
 
326
+ async def _combine_docs(docs: list[HashableDoc]) -> str:
 
327
  # """Combines a list of documents into a single string"""
328
  doc_representations = [_get_doc_representation(doc) for doc in docs]
329
  return "\n\n".join(doc_representations)
330
 
 
331
  async def answer_question(question: str, docs: str):
332
  # """Answers a question given a query and a list of documents"""
333
  messages = QUERY_MESSAGES.copy()
 
341
  "The user will be unable to ask follow up questions.")]
342
  return await chat_query_anthropic(messages)
343
 
 
344
  async def run_query(query: str, query_filter: dict[str, str]) -> str:
345
  # """Runs a query on the retrieval chain
346
 
 
393
  workable_customers.append(href)
394
 
395
  # workable_customers
396
+ @st.cache_data
397
  def get_paragraphs_workable(url):
398
  r = requests.get(url=url, headers=headers)
399
 
 
422
  input = clean_text(input)
423
  return input
424
 
425
+ @st.cache_data
426
  def get_case_studies():
427
  workable_case_studies = []
428
  # for customer in customers:
 
441
  for (url, case_study) in workable_case_studies:
442
  workableVectorDB.add_texts([case_study], [{"url": url}])
443
 
444
+ @st.cache_data
445
  def get_answer(question):
446
  response = asyncio.run(run_query(question, query_filter={}))
447
+ return response[0], f"{response[1]}"
448
 
449
  DESCRIPTION = """This tool is a demo for allowing you to ask questions over your case studies.
450