Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from langchain_community.retrievers import BM25Retriever # for the BM25 retrieve
|
|
10 |
from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever
|
11 |
from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter
|
12 |
|
13 |
-
def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]:
|
14 |
"""
|
15 |
Create a youtube transcription from a youtube url
|
16 |
More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/
|
@@ -19,11 +19,14 @@ def create_youtube_transcription(youtube_url: str) -> List[langchain_core.docume
|
|
19 |
Returns:
|
20 |
List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
|
21 |
"""
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
27 |
|
28 |
def create_text_splitter(docs: List[langchain_core.documents.Document]):
|
29 |
"""
|
@@ -99,7 +102,7 @@ async def start():
|
|
99 |
# more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input
|
100 |
await cl.Message(content=f"youtube link: {youtube_link}").send() # display and double check to make sure the link is correct
|
101 |
youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription
|
102 |
-
split_docs =
|
103 |
vector_db = create_faiss_vector_store(split_docs) # create the vector db
|
104 |
bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
|
105 |
ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
|
@@ -109,7 +112,6 @@ async def start():
|
|
109 |
except Exception as e:
|
110 |
await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
|
111 |
|
112 |
-
|
113 |
@cl.on_message
|
114 |
async def message(message: cl.Message):
|
115 |
"""
|
|
|
10 |
from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever
|
11 |
from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter
|
12 |
|
13 |
+
async def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]:
|
14 |
"""
|
15 |
Create a youtube transcription from a youtube url
|
16 |
More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/
|
|
|
19 |
Returns:
|
20 |
List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
|
21 |
"""
|
22 |
+
try:
|
23 |
+
loader = YoutubeLoader.from_youtube_url(
|
24 |
+
youtube_url, add_video_info=True
|
25 |
+
) # we can also pass an array of youtube urls to load multiple videos at once!
|
26 |
+
youtube_docs = loader.load() # this loads the transcript
|
27 |
+
return youtube_docs
|
28 |
+
except Exception as e:
|
29 |
+
await cl.Message(content=f"failed to load youtube video: {e}").send() # display the error if we failed to load the youtube video
|
30 |
|
31 |
def create_text_splitter(docs: List[langchain_core.documents.Document]):
|
32 |
"""
|
|
|
102 |
# more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input
|
103 |
await cl.Message(content=f"youtube link: {youtube_link}").send() # display and double check to make sure the link is correct
|
104 |
youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription
|
105 |
+
split_docs = create_text_splitter(youtube_docs) # split the documents into chunks
|
106 |
vector_db = create_faiss_vector_store(split_docs) # create the vector db
|
107 |
bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
|
108 |
ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
|
|
|
112 |
except Exception as e:
|
113 |
await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
|
114 |
|
|
|
115 |
@cl.on_message
|
116 |
async def message(message: cl.Message):
|
117 |
"""
|