Spaces:
Runtime error
Runtime error
JoshuaKelleyDs
commited on
Commit
•
d60224c
1
Parent(s):
f13efbb
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@ async def create_youtube_transcription(youtube_url: str) -> List[langchain_core.
|
|
19 |
Returns:
|
20 |
List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
|
21 |
"""
|
22 |
-
await cl.Message(content=f"Hi").send()
|
23 |
try:
|
24 |
loader = YoutubeLoader.from_youtube_url(
|
25 |
youtube_url, add_video_info=False
|
@@ -59,7 +58,7 @@ async def create_faiss_vector_store(docs: List[langchain_core.documents.Document
|
|
59 |
except Exception as e:
|
60 |
await cl.Message(content=f"failed to create vector db: {e}").send() # display the error if we failed to create the vector db
|
61 |
|
62 |
-
def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25Retriever:
|
63 |
"""
|
64 |
Create a BM25 retriever from a list of documents
|
65 |
More Info: https://python.langchain.com/docs/integrations/retrievers/bm25/
|
@@ -68,11 +67,14 @@ def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25
|
|
68 |
Returns:
|
69 |
BM25Retriever: A BM25 retriever containing the documents
|
70 |
"""
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
74 |
|
75 |
-
def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRetriever:
|
76 |
"""
|
77 |
Create an ensemble retriever from a vector db and a BM25 retriever
|
78 |
More Info: https://python.langchain.com/docs/how_to/ensemble_retriever/
|
@@ -82,8 +84,11 @@ def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRe
|
|
82 |
Returns:
|
83 |
EnsembleRetriever: An ensemble retriever containing the vector db and the BM25 retriever
|
84 |
"""
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
87 |
|
88 |
@cl.on_chat_start
|
89 |
async def start():
|
@@ -110,8 +115,8 @@ async def start():
|
|
110 |
await cl.Message(content=f"youtube docs: {transcription}").send() # display the transcription of the first document to show that we have the correct data
|
111 |
split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks
|
112 |
vector_db = await create_faiss_vector_store(split_docs) # create the vector db
|
113 |
-
bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
|
114 |
-
ensemble_retriever = create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
|
115 |
cl.user_session.set("ensemble_retriever", ensemble_retriever) # store the ensemble retriever in the user session for our on message function
|
116 |
except Exception as e:
|
117 |
await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
|
|
|
19 |
Returns:
|
20 |
List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
|
21 |
"""
|
|
|
22 |
try:
|
23 |
loader = YoutubeLoader.from_youtube_url(
|
24 |
youtube_url, add_video_info=False
|
|
|
58 |
except Exception as e:
|
59 |
await cl.Message(content=f"failed to create vector db: {e}").send() # display the error if we failed to create the vector db
|
60 |
|
61 |
+
async def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25Retriever:
|
62 |
"""
|
63 |
Create a BM25 retriever from a list of documents
|
64 |
More Info: https://python.langchain.com/docs/integrations/retrievers/bm25/
|
|
|
67 |
Returns:
|
68 |
BM25Retriever: A BM25 retriever containing the documents
|
69 |
"""
|
70 |
+
try:
|
71 |
+
bm25 = BM25Retriever.from_documents(docs) # we don't need embeddings for BM25, as it uses keyword matching!
|
72 |
+
bm25.k = 5 # we set k to 5, so we get 5 documents back
|
73 |
+
return bm25
|
74 |
+
except Exception as e:
|
75 |
+
await cl.Message(content=f"failed to create BM25 retreiver: {e}").send() # display the error if we failed to create the BM25 retreiver
|
76 |
|
77 |
+
async def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRetriever:
|
78 |
"""
|
79 |
Create an ensemble retriever from a vector db and a BM25 retriever
|
80 |
More Info: https://python.langchain.com/docs/how_to/ensemble_retriever/
|
|
|
84 |
Returns:
|
85 |
EnsembleRetriever: An ensemble retriever containing the vector db and the BM25 retriever
|
86 |
"""
|
87 |
+
try:
|
88 |
+
ensemble_retreiver = EnsembleRetriever(retrievers=[vector_db.as_retriever(), bm25], weights=[.3, .7]) # 30% semantic, 70% keyword retrieval
|
89 |
+
return ensemble_retreiver
|
90 |
+
except Exception as e:
|
91 |
+
await cl.Message(content=f"failed to create ensemble retriever: {e}").send() # display the error if we failed to create the ensemble retriever
|
92 |
|
93 |
@cl.on_chat_start
|
94 |
async def start():
|
|
|
115 |
await cl.Message(content=f"youtube docs: {transcription}").send() # display the transcription of the first document to show that we have the correct data
|
116 |
split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks
|
117 |
vector_db = await create_faiss_vector_store(split_docs) # create the vector db
|
118 |
+
bm25 = await create_bm25_retreiver(split_docs) # create the BM25 retreiver
|
119 |
+
ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
|
120 |
cl.user_session.set("ensemble_retriever", ensemble_retriever) # store the ensemble retriever in the user session for our on message function
|
121 |
except Exception as e:
|
122 |
await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
|