JoshuaKelleyDs commited on
Commit
ccac939
·
verified ·
1 Parent(s): c578b36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -10,7 +10,7 @@ from langchain_community.retrievers import BM25Retriever # for the BM25 retrieve
10
  from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever
11
  from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter
12
 
13
- def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]:
14
  """
15
  Create a youtube transcription from a youtube url
16
  More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/
@@ -19,11 +19,14 @@ def create_youtube_transcription(youtube_url: str) -> List[langchain_core.docume
19
  Returns:
20
  List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
21
  """
22
- loader = YoutubeLoader.from_youtube_url(
23
- youtube_url, add_video_info=True
24
- ) # we can also pass an array of youtube urls to load multiple videos at once!
25
- youtube_docs = loader.load() # this loads the transcript
26
- return youtube_docs
 
 
 
27
 
28
  def create_text_splitter(docs: List[langchain_core.documents.Document]):
29
  """
@@ -99,7 +102,7 @@ async def start():
99
  # more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input
100
  await cl.Message(content=f"youtube link: {youtube_link}").send() # display and double check to make sure the link is correct
101
  youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription
102
- split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks
103
  vector_db = create_faiss_vector_store(split_docs) # create the vector db
104
  bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
105
  ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
@@ -109,7 +112,6 @@ async def start():
109
  except Exception as e:
110
  await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
111
 
112
-
113
  @cl.on_message
114
  async def message(message: cl.Message):
115
  """
 
10
  from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever
11
  from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter
12
 
13
+ async def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]:
14
  """
15
  Create a youtube transcription from a youtube url
16
  More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/
 
19
  Returns:
20
  List[langchain_core.documents.Document]: A list of documents containing the youtube transcription
21
  """
22
+ try:
23
+ loader = YoutubeLoader.from_youtube_url(
24
+ youtube_url, add_video_info=True
25
+ ) # we can also pass an array of youtube urls to load multiple videos at once!
26
+ youtube_docs = loader.load() # this loads the transcript
27
+ return youtube_docs
28
+ except Exception as e:
29
+ await cl.Message(content=f"failed to load youtube video: {e}").send() # display the error if we failed to load the youtube video
30
 
31
  def create_text_splitter(docs: List[langchain_core.documents.Document]):
32
  """
 
102
  # more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input
103
  await cl.Message(content=f"youtube link: {youtube_link}").send() # display and double check to make sure the link is correct
104
  youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription
105
+ split_docs = create_text_splitter(youtube_docs) # split the documents into chunks
106
  vector_db = create_faiss_vector_store(split_docs) # create the vector db
107
  bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver
108
  ensemble_retriever = await create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever
 
112
  except Exception as e:
113
  await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model
114
 
 
115
  @cl.on_message
116
  async def message(message: cl.Message):
117
  """