File size: 8,532 Bytes
610c69b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b313c5d
610c69b
 
b313c5d
610c69b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b313c5d
610c69b
b313c5d
610c69b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def transcribe_youtube_video(url, force_transcribe=False):

    text = ''
    try:
        from youtube_transcript_api import YouTubeTranscriptApi
        import pytube
        from pytube import YouTube
        
        vid_id = pytube.extract.video_id(url)
        temp = YouTubeTranscriptApi.get_transcript(vid_id)
        for t in temp:
            text+=t['text']+' '
        yt = YouTube(str(url))
        
    except:
        pass

    if text == '' or force_transcribe:
        from pytube import YouTube
        import torch
        import os
        
        save_dir="./docs/youtube/"
        os.mkdir(save_dir)
        yt = YouTube(str(url))
        audio = yt.streams.filter(only_audio = True).first()
        out_file = audio.download(filename="audio.mp3",output_path = save_dir)
        
        import transformers

        whisper_asr = transformers.pipeline(
            "automatic-speech-recognition", model="openai/whisper-large", device_map= 'auto',
        )
        
        whisper_asr.model.config.forced_decoder_ids = (
            whisper_asr.tokenizer.get_decoder_prompt_ids(
                language="en", 
                task="transcribe"
            )
        )
        temp = whisper_asr(out_file,chunk_length_s=20)
        text = temp['text']

        del(whisper_asr)
        torch.cuda.empty_cache()
    
    return yt.title, text

def summarize_text(title,text):
    
    from langchain.chains.llm import LLMChain
    from langchain.prompts import PromptTemplate
    from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
    from langchain.chains.combine_documents.stuff import StuffDocumentsChain
    import torch
    import transformers
    from transformers import BitsAndBytesConfig
    from transformers import AutoTokenizer, AutoModelForCausalLM

    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    # model = "nomic-ai/gpt4all-falcon"
    model = "tiiuae/falcon-7b-instruct"

    tokenizer = AutoTokenizer.from_pretrained(model,trust_remote_code=True,)
    model = AutoModelForCausalLM.from_pretrained(model,
                                                # trust_remote_code=True,
                                                quantization_config=quantization_config,
                                                )

    from langchain import HuggingFacePipeline
    import torch

    pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        max_new_tokens = 150,
        pad_token_id=tokenizer.eos_token_id,
        # device=-1,
    )

    llm = HuggingFacePipeline(pipeline=pipeline)

    pipeline2 = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        max_new_tokens = 250,
        pad_token_id=tokenizer.eos_token_id,
        repetition_penalty= 2.0,
        # device=-1,
    )

    llm2 = HuggingFacePipeline(pipeline=pipeline2)

    # Map
    map_template = """
    Summarize the following text in a clear and concise way:
    TITLE: `{title}`
    TEXT:`{docs}`
    Brief Summary:
    """
    map_prompt = PromptTemplate(template = map_template, 
                                input_variables = ['title','docs'])
    map_chain = LLMChain(llm=llm, prompt=map_prompt)

    # Reduce - Collapse
    reduce_template = """
    The following is set of partial summaries of a video titled {title}:
    partial summaries: {doc_summaries}
    Take these and distill them into a consolidated summary. 
    Summary:
    """

    reduce_prompt = PromptTemplate(template = reduce_template, 
                                    input_variables = ['title','doc_summaries'])
    reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
    collapse_documents_chain = StuffDocumentsChain(
        llm_chain=reduce_chain, document_variable_name="doc_summaries"
    )

    # Final Reduce - Combine
    final_reduce_template = """
    The following is set of partial summaries of a video titled '{title}':
    partial summaries: 

    {doc_summaries}

    Generate a summary of the whole text that includes `Video Subject`, and the `Key Highlights` as maximum 10 pullet points listing the main facts, arguments, or points:
    """
    final_reduce_prompt = PromptTemplate(template = final_reduce_template, 
                                        input_variables = ['title','doc_summaries'])
    final_reduce_chain = LLMChain(llm=llm2, prompt=final_reduce_prompt)

    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
    combine_documents_chain = StuffDocumentsChain(
        llm_chain=final_reduce_chain, document_variable_name="doc_summaries"
    )

    # Combines and iteravely reduces the mapped documents
    reduce_documents_chain = ReduceDocumentsChain(
        # This is final chain that is called.
        combine_documents_chain=combine_documents_chain,
        # If documents exceed context for `StuffDocumentsChain`
        collapse_documents_chain=collapse_documents_chain,
        # The maximum number of tokens to group documents into.
        token_max=500,
    )

    # Combining documents by mapping a chain over them, then combining results
    map_reduce_chain = MapReduceDocumentsChain(
        # Map chain
        llm_chain=map_chain,
        # Reduce chain
        reduce_documents_chain=reduce_documents_chain,
        # The variable name in the llm_chain to put the documents in
        document_variable_name="docs",
        # Return the results of the map steps in the output
        return_intermediate_steps=False,
    )

    from langchain.document_loaders import TextLoader
    from langchain.text_splitter import TokenTextSplitter
    
    with open('./docs/transcript.txt','w') as f:
        f.write(text)
    loader = TextLoader("./docs/transcript.txt")
    doc = loader.load()
    text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=0)
    docs = text_splitter.split_documents(doc)

    summary = map_reduce_chain.run({'input_documents':docs, 'title':title})

    del(llm)
    del(llm2)
    del(model)
    del(tokenizer)
    torch.cuda.empty_cache()

    return summary

import gradio as gr 
import pytube
from pytube import YouTube

def get_youtube_title(url):
    yt = YouTube(str(url))
    return yt.title

def get_video(url):
    vid_id = pytube.extract.video_id(url)
    embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
    return embed_html

def summarize_youtube_video(url,force_transcribe):
    title,text = transcribe_youtube_video(url,force_transcribe)
    Summary = summarize_text(title,text)
    return Summary

html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

with gr.Blocks() as demo:
    # gr.Markdown("Transribe a YouTube video using this demo.")
    with gr.Row():
        with gr.Column(scale=3):
            url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
            force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.")
        with gr.Column(scale=1):
            gr.Markdown("# Summarize a YouTube video using this demo!",scale=2)
            sum_btn = gr.Button("Summarize!",scale=1)
    title = gr.Textbox(label="Video Title",placeholder="title...")
    with gr.Row():
        video = gr.HTML(html)
        output = gr.Textbox(label="Summary",placeholder="summary...")
    sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title")
    sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe], outputs=output, api_name="summarize_youtube_video", queue=True)
    sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video",queue=False)

demo.queue()    
demo.launch(share=True)