smakamali's picture
update app.py
b313c5d
raw
history blame
8.53 kB
def transcribe_youtube_video(url, force_transcribe=False):
text = ''
try:
from youtube_transcript_api import YouTubeTranscriptApi
import pytube
from pytube import YouTube
vid_id = pytube.extract.video_id(url)
temp = YouTubeTranscriptApi.get_transcript(vid_id)
for t in temp:
text+=t['text']+' '
yt = YouTube(str(url))
except:
pass
if text == '' or force_transcribe:
from pytube import YouTube
import torch
import os
save_dir="./docs/youtube/"
os.mkdir(save_dir)
yt = YouTube(str(url))
audio = yt.streams.filter(only_audio = True).first()
out_file = audio.download(filename="audio.mp3",output_path = save_dir)
import transformers
whisper_asr = transformers.pipeline(
"automatic-speech-recognition", model="openai/whisper-large", device_map= 'auto',
)
whisper_asr.model.config.forced_decoder_ids = (
whisper_asr.tokenizer.get_decoder_prompt_ids(
language="en",
task="transcribe"
)
)
temp = whisper_asr(out_file,chunk_length_s=20)
text = temp['text']
del(whisper_asr)
torch.cuda.empty_cache()
return yt.title, text
def summarize_text(title,text):
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
import torch
import transformers
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
# model = "nomic-ai/gpt4all-falcon"
model = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model,trust_remote_code=True,)
model = AutoModelForCausalLM.from_pretrained(model,
# trust_remote_code=True,
quantization_config=quantization_config,
)
from langchain import HuggingFacePipeline
import torch
pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
device_map="auto",
max_new_tokens = 150,
pad_token_id=tokenizer.eos_token_id,
# device=-1,
)
llm = HuggingFacePipeline(pipeline=pipeline)
pipeline2 = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
device_map="auto",
max_new_tokens = 250,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty= 2.0,
# device=-1,
)
llm2 = HuggingFacePipeline(pipeline=pipeline2)
# Map
map_template = """
Summarize the following text in a clear and concise way:
TITLE: `{title}`
TEXT:`{docs}`
Brief Summary:
"""
map_prompt = PromptTemplate(template = map_template,
input_variables = ['title','docs'])
map_chain = LLMChain(llm=llm, prompt=map_prompt)
# Reduce - Collapse
reduce_template = """
The following is set of partial summaries of a video titled {title}:
partial summaries: {doc_summaries}
Take these and distill them into a consolidated summary.
Summary:
"""
reduce_prompt = PromptTemplate(template = reduce_template,
input_variables = ['title','doc_summaries'])
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
collapse_documents_chain = StuffDocumentsChain(
llm_chain=reduce_chain, document_variable_name="doc_summaries"
)
# Final Reduce - Combine
final_reduce_template = """
The following is set of partial summaries of a video titled '{title}':
partial summaries:
{doc_summaries}
Generate a summary of the whole text that includes `Video Subject`, and the `Key Highlights` as maximum 10 pullet points listing the main facts, arguments, or points:
"""
final_reduce_prompt = PromptTemplate(template = final_reduce_template,
input_variables = ['title','doc_summaries'])
final_reduce_chain = LLMChain(llm=llm2, prompt=final_reduce_prompt)
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
llm_chain=final_reduce_chain, document_variable_name="doc_summaries"
)
# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
# This is final chain that is called.
combine_documents_chain=combine_documents_chain,
# If documents exceed context for `StuffDocumentsChain`
collapse_documents_chain=collapse_documents_chain,
# The maximum number of tokens to group documents into.
token_max=500,
)
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
# Map chain
llm_chain=map_chain,
# Reduce chain
reduce_documents_chain=reduce_documents_chain,
# The variable name in the llm_chain to put the documents in
document_variable_name="docs",
# Return the results of the map steps in the output
return_intermediate_steps=False,
)
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
with open('./docs/transcript.txt','w') as f:
f.write(text)
loader = TextLoader("./docs/transcript.txt")
doc = loader.load()
text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(doc)
summary = map_reduce_chain.run({'input_documents':docs, 'title':title})
del(llm)
del(llm2)
del(model)
del(tokenizer)
torch.cuda.empty_cache()
return summary
import gradio as gr
import pytube
from pytube import YouTube
def get_youtube_title(url):
yt = YouTube(str(url))
return yt.title
def get_video(url):
vid_id = pytube.extract.video_id(url)
embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
return embed_html
def summarize_youtube_video(url,force_transcribe):
title,text = transcribe_youtube_video(url,force_transcribe)
Summary = summarize_text(title,text)
return Summary
html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
with gr.Blocks() as demo:
# gr.Markdown("Transribe a YouTube video using this demo.")
with gr.Row():
with gr.Column(scale=3):
url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.")
with gr.Column(scale=1):
gr.Markdown("# Summarize a YouTube video using this demo!",scale=2)
sum_btn = gr.Button("Summarize!",scale=1)
title = gr.Textbox(label="Video Title",placeholder="title...")
with gr.Row():
video = gr.HTML(html)
output = gr.Textbox(label="Summary",placeholder="summary...")
sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title")
sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe], outputs=output, api_name="summarize_youtube_video", queue=True)
sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video",queue=False)
demo.queue()
demo.launch(share=True)