import chainlit as cl # handles the chat interface from langchain_together import ChatTogether, TogetherEmbeddings # for the LLM and Embeddings from langchain_core.runnables import RunnableSequence, RunnablePassthrough # for chain execution from langchain_core.prompts import ChatPromptTemplate # for writing the prompt template from langchain_community.document_loaders import YoutubeLoader # for loading the youtube video from typing import List # for type hinting import langchain_core # for type hinting from langchain_community.vectorstores import FAISS # for the vector store from langchain_community.retrievers import BM25Retriever # for the BM25 retriever from langchain.retrievers.ensemble import EnsembleRetriever # for the ensemble retriever from langchain_text_splitters import RecursiveCharacterTextSplitter # for the text splitter async def create_youtube_transcription(youtube_url: str) -> List[langchain_core.documents.Document]: """ Create a youtube transcription from a youtube url More Info: https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/ Accepts: youtube_url: str - The url of the youtube video Returns: List[langchain_core.documents.Document]: A list of documents containing the youtube transcription """ await cl.Message(content=f"Hi").send() try: loader = YoutubeLoader.from_youtube_url( youtube_url, add_video_info=False ) # we can also pass an array of youtube urls to load multiple videos at once! youtube_docs = loader.load() # this loads the transcript return youtube_docs except Exception as e: await cl.Message(content=f"failed to load youtube video: {e} Please refresh the page").send() # display the error if we failed to load the youtube video async def create_text_splitter(docs: List[langchain_core.documents.Document]): """ Create a text splitter from a list of documents More Info: ument_transformers/recursive_text_splitter/ Accepts: docs: List[langchain_core.documents.Document] - A list of documents to split Returns: List[langchain_core.documents.Document]: A list of documents containing the text split """ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # without an overlap, context might get cut off docs = text_splitter.split_documents(docs) # split the documents into chunks return docs async def create_faiss_vector_store(docs: List[langchain_core.documents.Document]) -> FAISS: """ Create a FAISS vector store or vector database from a list of documents More Info: https://python.langchain.com/docs/integrations/vectorstores/faiss/ Accepts: docs: List[langchain_core.documents.Document] - A list of documents to store Returns: FAISS: A vector store containing the documents """ try: embedding = cl.user_session.get("embedding") # we can get the embedding model from the user session or pass as a parameter too! vector_db = FAISS.from_documents(docs, embedding) # create the vector store vector_db.k = 5 # we set k to 5, so we get 5 documents back return vector_db except Exception as e: await cl.Message(content=f"failed to create vector db: {e}").send() # display the error if we failed to create the vector db def create_bm25_retreiver(docs: List[langchain_core.documents.Document]) -> BM25Retriever: """ Create a BM25 retriever from a list of documents More Info: https://python.langchain.com/docs/integrations/retrievers/bm25/ Accepts: docs: List[langchain_core.documents.Document] - A list of documents to store Returns: BM25Retriever: A BM25 retriever containing the documents """ bm25 = BM25Retriever.from_documents(docs) # we don't need embeddings for BM25, as it uses keyword matching! bm25.k = 5 # we set k to 5, so we get 5 documents back return bm25 def create_ensemble_retriever(vector_db:FAISS, bm25:BM25Retriever) -> EnsembleRetriever: """ Create an ensemble retriever from a vector db and a BM25 retriever More Info: https://python.langchain.com/docs/how_to/ensemble_retriever/ Accepts: vector_db: FAISS - A vector db bm25: BM25Retriever - A BM25 retriever Returns: EnsembleRetriever: An ensemble retriever containing the vector db and the BM25 retriever """ ensemble_retreiver = EnsembleRetriever(retrievers=[vector_db.as_retriever(), bm25], weights=[.3, .7]) # 30% semantic, 70% keyword retrieval return ensemble_retreiver @cl.on_chat_start async def start(): """ More info: https://docs.chainlit.io/api-reference/lifecycle-hooks/on-chat-start This function is called when the chat starts. Under the hood it handles all the complicated stuff for loading the UI. We explicitly load the model, embeddings, and retrievers. Asks the user to provide the YouTube video link and loads the transcription. With the transcription, it creates a vector store and a BM25 vector store. That is used to create an ensemble retriever combining the two. """ await cl.Message(content="Hello! I am your AI assistant. I can help you with your questions about the video you provide.").send() try: # a try catch block prevents the app from crashing if have an error llm = ChatTogether(model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo") # initialize the LLM model await cl.Message(content=f"model is successfully loaded").send() # we can send messages to be displayed with cl.Message().send() cl.user_session.set("llm", llm) # we can store variables in a special memory called the user session, so we can use them in our on message function and more embedding = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval") # initialize the embedding model cl.user_session.set("embedding", embedding) # store the embedding model in the user session await cl.Message(content="embedding model loaded").send() youtube_link = await cl.AskUserMessage("Please provide the YouTube video link").send() # We can ask the user for input using cl.AskUserMessage().send() which does not affect cl.on_message() # more on ask user message: https://docs.chainlit.io/api-reference/ask/ask-for-input await cl.Message(content=f"youtube link: {youtube_link['content']}").send() # display and double check to make sure the link is correct youtube_docs = await create_youtube_transcription(youtube_link['content']) # create the youtube transcription transcription = youtube_docs[0].page_content # get the transcription of the first document await cl.Message(content=f"youtube docs: {transcription}").send() # display the transcription of the first document to show that we have the correct data split_docs = await create_text_splitter(youtube_docs) # split the documents into chunks vector_db = await create_faiss_vector_store(split_docs) # create the vector db bm25 = create_bm25_retreiver(split_docs) # create the BM25 retreiver ensemble_retriever = create_ensemble_retriever(vector_db, bm25) # create the ensemble retriever cl.user_session.set("ensemble_retriever", ensemble_retriever) # store the ensemble retriever in the user session for our on message function except Exception as e: await cl.Message(content=f"failed to load model: {e}").send() # display the error if we failed to load the model @cl.on_message async def message(message: cl.Message): """ More info: https://docs.chainlit.io/api-reference/lifecycle-hooks/on-message This function is called when the user sends a message. It uses the ensemble retriever to find the most relevant documents and feeds them into the LLM. We can then display the answer and the relevant documents to the user. """ prompt_template = ChatPromptTemplate.from_template(template=""" You are a helpful assistant that can answer questions about the following video. Here is the appropriate chunks of context: {context}. Answer the question: {question} but do not use any information outside of the video. Site the source or information you used to answer the question """) # we create a prompt template that we will use to format our prompt llm = cl.user_session.get("llm") # we get the LLM model we initialized in the start function ensemble_retriever = cl.user_session.get("ensemble_retriever") # we get the ensemble retriever we initialized in the start function relevant_docs = ensemble_retriever.invoke(message.content) # we use the ensemble retriever to find the most relevant documents cl.Message(content=f"Displaying Relevant Docs").send() # we display the relevant documents to the user for doc in relevant_docs: # loop through the relevant documents and display each one! await cl.Message(content=doc.page_content).send() await cl.Message(content="Done Displaying Relevant Docs").send() # question -> retrieve relevant docs -> format the question and context and add it to the prompt template -> pass to LLM rag_chain = RunnableSequence({"context": ensemble_retriever, "question": RunnablePassthrough()} | prompt_template | llm) response = rag_chain.invoke(message.content) # we invoke the rag chain with the user's message await cl.Message(content=f"LLM Response: {response.content}").send() # we display the response to the user