Spaces:

JoshuaKelleyDs
/

chainlit-youtube-rag-chat

Runtime error

App Files Files Community

JoshuaKelleyDs commited on Oct 18, 2024

Commit

ccac939

verified ·

1 Parent(s): c578b36

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -8

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from langchain_community.retrievers import BM25Retriever # for the BM25 retrieve
 from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever
 from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter
-def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]:
     """
     Create a youtube transcription from a youtube url
     More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/
@@ -19,11 +19,14 @@ def create_youtube_transcription(youtube_url: str) -> List[langchain_core.docume
     Returns:
         List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
     """
-    loader = YoutubeLoader.from_youtube_url(
-        youtube_url, add_video_info=True
-    ) # we can also pass an array of youtube urls to load multiple videos at once!
-    youtube_docs = loader.load() # this loads the transcript
-    return youtube_docs
 def create_text_splitter(docs: List[langchain_core.documents.Document]):
     """
@@ -99,7 +102,7 @@ async def start():
         # more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input
         await cl.Message(content=f"youtube link: {youtube_link}").send() # display and double check to make sure the link is correct
         youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription
-        split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks
         vector_db = create_faiss_vector_store(split_docs) # create the vector db
         bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
         ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
@@ -109,7 +112,6 @@ async def start():
     except Exception as e:
         await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
 @cl.on_message
 async def message(message: cl.Message):
     """

 from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever
 from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter
+async def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]:
     """
     Create a youtube transcription from a youtube url
     More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/
     Returns:
         List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
     """
+    try:
+        loader = YoutubeLoader.from_youtube_url(
+            youtube_url, add_video_info=True
+        ) # we can also pass an array of youtube urls to load multiple videos at once!
+        youtube_docs = loader.load() # this loads the transcript
+        return youtube_docs
+    except Exception as e:
+        await cl.Message(content=f"failed to load youtube video: {e}").send() # display the error if we failed to load the youtube video
 def create_text_splitter(docs: List[langchain_core.documents.Document]):
     """
         # more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input
         await cl.Message(content=f"youtube link: {youtube_link}").send() # display and double check to make sure the link is correct
         youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription
+        split_docs = create_text_splitter(youtube_docs) # split the documents into chunks
         vector_db = create_faiss_vector_store(split_docs) # create the vector db
         bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
         ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
     except Exception as e:
         await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
 @cl.on_message
 async def message(message: cl.Message):
     """