JoshuaKelleyDs commited on
Commit
d60224c
1 Parent(s): f13efbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -19,7 +19,6 @@ async def create_youtube_transcription(youtube_url: str) -> List[langchain_core.
19
  Returns:
20
  List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
21
  """
22
- await cl.Message(content=f"Hi").send()
23
  try:
24
  loader = YoutubeLoader.from_youtube_url(
25
  youtube_url, add_video_info=False
@@ -59,7 +58,7 @@ async def create_faiss_vector_store(docs: List[langchain_core.documents.Document
59
  except Exception as e:
60
  await cl.Message(content=f"failed to create vector db: {e}").send() # display the error if we failed to create the vector db
61
 
62
- def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25Retriever:
63
  """
64
  Create a BM25 retriever from a list of documents
65
  More Info: https://python.langchain.com/docs/integrations/retrievers/bm25/
@@ -68,11 +67,14 @@ def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25
68
  Returns:
69
  BM25Retriever: A BM25 retriever containing the documents
70
  """
71
- bm25 = BM25Retriever.from_documents(docs) # we don't need embeddings for BM25, as it uses keyword matching!
72
- bm25.k = 5 # we set k to 5, so we get 5 documents back
73
- return bm25
 
 
 
74
 
75
- def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRetriever:
76
  """
77
  Create an ensemble retriever from a vector db and a BM25 retriever
78
  More Info: https://python.langchain.com/docs/how_to/ensemble_retriever/
@@ -82,8 +84,11 @@ def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRe
82
  Returns:
83
  EnsembleRetriever: An ensemble retriever containing the vector db and the BM25 retriever
84
  """
85
- ensemble_retreiver = EnsembleRetriever(retrievers=[vector_db.as_retriever(), bm25], weights=[.3, .7]) # 30% semantic, 70% keyword retrieval
86
- return ensemble_retreiver
 
 
 
87
 
88
  @cl.on_chat_start
89
  async def start():
@@ -110,8 +115,8 @@ async def start():
110
  await cl.Message(content=f"youtube docs: {transcription}").send() # display the transcription of the first document to show that we have the correct data
111
  split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks
112
  vector_db = await create_faiss_vector_store(split_docs) # create the vector db
113
- bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
114
- ensemble_retriever = create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
115
  cl.user_session.set("ensemble_retriever", ensemble_retriever) # store the ensemble retriever in the user session for our on message function
116
  except Exception as e:
117
  await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
 
19
  Returns:
20
  List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
21
  """
 
22
  try:
23
  loader = YoutubeLoader.from_youtube_url(
24
  youtube_url, add_video_info=False
 
58
  except Exception as e:
59
  await cl.Message(content=f"failed to create vector db: {e}").send() # display the error if we failed to create the vector db
60
 
61
+ async def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25Retriever:
62
  """
63
  Create a BM25 retriever from a list of documents
64
  More Info: https://python.langchain.com/docs/integrations/retrievers/bm25/
 
67
  Returns:
68
  BM25Retriever: A BM25 retriever containing the documents
69
  """
70
+ try:
71
+ bm25 = BM25Retriever.from_documents(docs) # we don't need embeddings for BM25, as it uses keyword matching!
72
+ bm25.k = 5 # we set k to 5, so we get 5 documents back
73
+ return bm25
74
+ except Exception as e:
75
+ await cl.Message(content=f"failed to create BM25 retreiver: {e}").send() # display the error if we failed to create the BM25 retreiver
76
 
77
+ async def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRetriever:
78
  """
79
  Create an ensemble retriever from a vector db and a BM25 retriever
80
  More Info: https://python.langchain.com/docs/how_to/ensemble_retriever/
 
84
  Returns:
85
  EnsembleRetriever: An ensemble retriever containing the vector db and the BM25 retriever
86
  """
87
+ try:
88
+ ensemble_retreiver = EnsembleRetriever(retrievers=[vector_db.as_retriever(), bm25], weights=[.3, .7]) # 30% semantic, 70% keyword retrieval
89
+ return ensemble_retreiver
90
+ except Exception as e:
91
+ await cl.Message(content=f"failed to create ensemble retriever: {e}").send() # display the error if we failed to create the ensemble retriever
92
 
93
  @cl.on_chat_start
94
  async def start():
 
115
  await cl.Message(content=f"youtube docs: {transcription}").send() # display the transcription of the first document to show that we have the correct data
116
  split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks
117
  vector_db = await create_faiss_vector_store(split_docs) # create the vector db
118
+ bm25 = await create_bm25_retreiver(split_docs) # create the BM25 retreiver
119
+ ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
120
  cl.user_session.set("ensemble_retriever", ensemble_retriever) # store the ensemble retriever in the user session for our on message function
121
  except Exception as e:
122
  await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model