def transcribe_youtube_video(url, force_transcribe=False): text = '' try: from youtube_transcript_api import YouTubeTranscriptApi import pytube from pytube import YouTube vid_id = pytube.extract.video_id(url) temp = YouTubeTranscriptApi.get_transcript(vid_id) for t in temp: text+=t['text']+' ' yt = YouTube(str(url)) except: pass if text == '' or force_transcribe: from pytube import YouTube import torch save_dir="./docs/youtube/" yt = YouTube(str(url)) audio = yt.streams.filter(only_audio = True).first() out_file = audio.download(filename="audio.mp3",output_path = save_dir) import transformers whisper_asr = transformers.pipeline( "automatic-speech-recognition", model="openai/whisper-large", device_map= 'auto', ) whisper_asr.model.config.forced_decoder_ids = ( whisper_asr.tokenizer.get_decoder_prompt_ids( language="en", task="transcribe" ) ) temp = whisper_asr(out_file,chunk_length_s=20) text = temp['text'] del(whisper_asr) torch.cuda.empty_cache() return yt.title, text def summarize_text(title,text): from langchain.chains.llm import LLMChain from langchain.prompts import PromptTemplate from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain from langchain.chains.combine_documents.stuff import StuffDocumentsChain import torch import transformers from transformers import BitsAndBytesConfig from transformers import AutoTokenizer, AutoModelForCausalLM quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, ) # model = "nomic-ai/gpt4all-falcon" model = "tiiuae/falcon-7b-instruct" tokenizer = AutoTokenizer.from_pretrained(model,trust_remote_code=True,) model = AutoModelForCausalLM.from_pretrained(model, # trust_remote_code=True, quantization_config=quantization_config, ) from langchain import HuggingFacePipeline import torch pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto", max_new_tokens = 150, pad_token_id=tokenizer.eos_token_id, # device=-1, ) llm = HuggingFacePipeline(pipeline=pipeline) pipeline2 = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device_map="auto", max_new_tokens = 250, pad_token_id=tokenizer.eos_token_id, repetition_penalty= 2.0, # device=-1, ) llm2 = HuggingFacePipeline(pipeline=pipeline2) # Map map_template = """ Summarize the following text in a clear and concise way: TITLE: `{title}` TEXT:`{docs}` Brief Summary: """ map_prompt = PromptTemplate(template = map_template, input_variables = ['title','docs']) map_chain = LLMChain(llm=llm, prompt=map_prompt) # Reduce - Collapse reduce_template = """ The following is set of partial summaries of a video titled {title}: partial summaries: {doc_summaries} Take these and distill them into a consolidated summary. Summary: """ reduce_prompt = PromptTemplate(template = reduce_template, input_variables = ['title','doc_summaries']) reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt) # Takes a list of documents, combines them into a single string, and passes this to an LLMChain collapse_documents_chain = StuffDocumentsChain( llm_chain=reduce_chain, document_variable_name="doc_summaries" ) # Final Reduce - Combine final_reduce_template = """ The following is set of partial summaries of a video titled '{title}': partial summaries: {doc_summaries} Generate a summary of the whole text that includes `Video Subject`, and the `Key Highlights` as maximum 10 pullet points listing the main facts, arguments, or points: """ final_reduce_prompt = PromptTemplate(template = final_reduce_template, input_variables = ['title','doc_summaries']) final_reduce_chain = LLMChain(llm=llm2, prompt=final_reduce_prompt) # Takes a list of documents, combines them into a single string, and passes this to an LLMChain combine_documents_chain = StuffDocumentsChain( llm_chain=final_reduce_chain, document_variable_name="doc_summaries" ) # Combines and iteravely reduces the mapped documents reduce_documents_chain = ReduceDocumentsChain( # This is final chain that is called. combine_documents_chain=combine_documents_chain, # If documents exceed context for `StuffDocumentsChain` collapse_documents_chain=collapse_documents_chain, # The maximum number of tokens to group documents into. token_max=500, ) # Combining documents by mapping a chain over them, then combining results map_reduce_chain = MapReduceDocumentsChain( # Map chain llm_chain=map_chain, # Reduce chain reduce_documents_chain=reduce_documents_chain, # The variable name in the llm_chain to put the documents in document_variable_name="docs", # Return the results of the map steps in the output return_intermediate_steps=False, ) from langchain.document_loaders import TextLoader from langchain.text_splitter import TokenTextSplitter with open('./transcript.txt','w') as f: f.write(text) loader = TextLoader("./transcript.txt") doc = loader.load() text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=0) docs = text_splitter.split_documents(doc) summary = map_reduce_chain.run({'input_documents':docs, 'title':title}) del(llm) del(llm2) del(model) del(tokenizer) torch.cuda.empty_cache() return summary import gradio as gr import pytube from pytube import YouTube def get_youtube_title(url): yt = YouTube(str(url)) return yt.title def get_video(url): vid_id = pytube.extract.video_id(url) embed_html = ''.format(vid_id) return embed_html def summarize_youtube_video(url,force_transcribe): title,text = transcribe_youtube_video(url,force_transcribe) Summary = summarize_text(title,text) return Summary html = '' with gr.Blocks() as demo: # gr.Markdown("Transribe a YouTube video using this demo.") with gr.Row(): with gr.Column(scale=3): url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=") force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.") with gr.Column(scale=1): gr.Markdown("# Summarize a YouTube video using this demo!",scale=2) sum_btn = gr.Button("Summarize!",scale=1) title = gr.Textbox(label="Video Title",placeholder="title...") with gr.Row(): video = gr.HTML(html) output = gr.Textbox(label="Summary",placeholder="summary...") sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title") sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe], outputs=output, api_name="summarize_youtube_video", queue=True) sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video",queue=False) demo.queue() demo.launch(share=True)