smakamali commited on
Commit
7c42e43
1 Parent(s): 7330707

updating the app to v1.2

Browse files
Files changed (1) hide show
  1. app.py +306 -141
app.py CHANGED
@@ -1,52 +1,116 @@
1
- def transcribe_youtube_video(url, force_transcribe=False):
 
 
 
2
 
 
 
 
 
 
 
 
 
3
  text = ''
4
- try:
5
- from youtube_transcript_api import YouTubeTranscriptApi
6
- import pytube
7
- from pytube import YouTube
8
-
9
- vid_id = pytube.extract.video_id(url)
10
- temp = YouTubeTranscriptApi.get_transcript(vid_id)
11
- for t in temp:
12
- text+=t['text']+' '
13
- yt = YouTube(str(url))
14
-
15
- except:
16
- pass
17
 
18
- if text == '' or force_transcribe:
19
- from pytube import YouTube
20
- import torch
21
- import os
22
-
23
- save_dir="./docs/youtube/"
24
- os.mkdir(save_dir)
25
- yt = YouTube(str(url))
26
- audio = yt.streams.filter(only_audio = True).first()
27
- out_file = audio.download(filename="audio.mp3",output_path = save_dir)
28
-
29
- import transformers
30
 
31
- whisper_asr = transformers.pipeline(
32
- "automatic-speech-recognition", model="openai/whisper-large", device_map= 'auto',
33
- )
34
-
35
- whisper_asr.model.config.forced_decoder_ids = (
36
- whisper_asr.tokenizer.get_decoder_prompt_ids(
37
- language="en",
38
- task="transcribe"
39
- )
 
 
 
 
 
 
 
 
 
 
 
40
  )
41
- temp = whisper_asr(out_file,chunk_length_s=20)
42
- text = temp['text']
43
 
44
- del(whisper_asr)
45
- torch.cuda.empty_cache()
 
46
 
47
- return yt.title, text
 
 
 
 
48
 
49
- def summarize_text(title,text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  from langchain.chains.llm import LLMChain
52
  from langchain.prompts import PromptTemplate
@@ -56,108 +120,153 @@ def summarize_text(title,text):
56
  import transformers
57
  from transformers import BitsAndBytesConfig
58
  from transformers import AutoTokenizer, AutoModelForCausalLM
59
-
60
- # quantization_config = BitsAndBytesConfig(
61
- # load_in_4bit=True,
62
- # bnb_4bit_compute_dtype=torch.float16,
63
- # bnb_4bit_quant_type="nf4",
64
- # bnb_4bit_use_double_quant=True,
65
- # )
66
-
67
- # model = "nomic-ai/gpt4all-falcon"
68
- model = "tiiuae/falcon-7b-instruct"
69
-
70
- tokenizer = AutoTokenizer.from_pretrained(model,trust_remote_code=True,)
71
- model = AutoModelForCausalLM.from_pretrained(model,
72
- # trust_remote_code=True,
73
- # quantization_config=quantization_config,
74
- )
75
-
76
  from langchain import HuggingFacePipeline
77
  import torch
78
 
79
- pipeline = transformers.pipeline(
80
- "text-generation",
81
- model=model,
82
- tokenizer=tokenizer,
83
- torch_dtype=torch.bfloat16,
84
- device_map="auto",
85
- max_new_tokens = 150,
86
- pad_token_id=tokenizer.eos_token_id,
87
- # device=-1,
88
- )
 
 
 
 
 
 
 
 
89
 
90
- llm = HuggingFacePipeline(pipeline=pipeline)
91
-
92
- pipeline2 = transformers.pipeline(
93
- "text-generation",
94
- model=model,
95
- tokenizer=tokenizer,
96
- torch_dtype=torch.bfloat16,
97
- device_map="auto",
98
- max_new_tokens = 250,
99
- pad_token_id=tokenizer.eos_token_id,
100
- repetition_penalty= 2.0,
101
- # device=-1,
102
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- llm2 = HuggingFacePipeline(pipeline=pipeline2)
105
 
106
  # Map
107
  map_template = """
108
- Summarize the following text in a clear and concise way:
109
- TITLE: `{title}`
110
- TEXT:`{docs}`
111
- Brief Summary:
 
 
 
 
 
112
  """
113
- map_prompt = PromptTemplate(template = map_template,
114
- input_variables = ['title','docs'])
 
 
115
  map_chain = LLMChain(llm=llm, prompt=map_prompt)
116
 
117
  # Reduce - Collapse
118
- reduce_template = """
119
- The following is set of partial summaries of a video titled {title}:
120
- partial summaries: {doc_summaries}
121
- Take these and distill them into a consolidated summary.
122
- Summary:
 
 
 
 
 
 
123
  """
124
 
125
- reduce_prompt = PromptTemplate(template = reduce_template,
126
- input_variables = ['title','doc_summaries'])
127
- reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
 
 
128
 
129
  # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
130
  collapse_documents_chain = StuffDocumentsChain(
131
- llm_chain=reduce_chain, document_variable_name="doc_summaries"
132
- )
133
 
134
  # Final Reduce - Combine
135
- final_reduce_template = """
136
- The following is set of partial summaries of a video titled '{title}':
137
- partial summaries:
138
-
139
- {doc_summaries}
140
-
141
- Generate a summary of the whole text that includes `Video Subject`, and the `Key Highlights` as maximum 10 pullet points listing the main facts, arguments, or points:
 
 
 
 
142
  """
143
- final_reduce_prompt = PromptTemplate(template = final_reduce_template,
144
- input_variables = ['title','doc_summaries'])
145
- final_reduce_chain = LLMChain(llm=llm2, prompt=final_reduce_prompt)
 
 
146
 
147
  # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
148
  combine_documents_chain = StuffDocumentsChain(
149
- llm_chain=final_reduce_chain, document_variable_name="doc_summaries"
150
- )
151
 
152
- # Combines and iteravely reduces the mapped documents
153
  reduce_documents_chain = ReduceDocumentsChain(
154
  # This is final chain that is called.
155
  combine_documents_chain=combine_documents_chain,
156
  # If documents exceed context for `StuffDocumentsChain`
157
  collapse_documents_chain=collapse_documents_chain,
158
  # The maximum number of tokens to group documents into.
159
- token_max=500,
160
- )
161
 
162
  # Combining documents by mapping a chain over them, then combining results
163
  map_reduce_chain = MapReduceDocumentsChain(
@@ -169,27 +278,27 @@ def summarize_text(title,text):
169
  document_variable_name="docs",
170
  # Return the results of the map steps in the output
171
  return_intermediate_steps=False,
172
- )
173
 
174
  from langchain.document_loaders import TextLoader
175
  from langchain.text_splitter import TokenTextSplitter
176
 
177
- with open('./docs/transcript.txt','w') as f:
178
  f.write(text)
179
- loader = TextLoader("./docs/transcript.txt")
180
  doc = loader.load()
181
- text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=0)
182
  docs = text_splitter.split_documents(doc)
183
 
184
- summary = map_reduce_chain.run({'input_documents':docs, 'title':title})
185
 
186
- # del(llm)
187
- # del(llm2)
188
- # del(model)
189
- # del(tokenizer)
190
- # torch.cuda.empty_cache()
191
 
192
- return summary
193
 
194
  import gradio as gr
195
  import pytube
@@ -204,29 +313,85 @@ def get_video(url):
204
  embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
205
  return embed_html
206
 
207
- def summarize_youtube_video(url,force_transcribe):
208
- title,text = transcribe_youtube_video(url,force_transcribe)
209
- Summary = summarize_text(title,text)
210
- return Summary
 
211
 
212
  html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  with gr.Blocks() as demo:
215
- # gr.Markdown("Transribe a YouTube video using this demo.")
216
  with gr.Row():
217
- with gr.Column(scale=3):
 
 
 
218
  url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
219
- force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.")
220
  with gr.Column(scale=1):
221
- gr.Markdown("# Summarize a YouTube video using this demo!",scale=2)
222
- sum_btn = gr.Button("Summarize!",scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  title = gr.Textbox(label="Video Title",placeholder="title...")
 
 
 
 
 
 
224
  with gr.Row():
225
- video = gr.HTML(html)
226
- output = gr.Textbox(label="Summary",placeholder="summary...")
227
- sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title")
228
- sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe], outputs=output, api_name="summarize_youtube_video", queue=True)
229
- sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video",queue=False)
230
-
231
- demo.queue()
232
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ save_dir="./docs/youtube/"
3
+ if not os.path.exists(save_dir):
4
+ os.mkdir(save_dir)
5
 
6
+ transcription_model_id = "openai/whisper-large"
7
+ llm_model_id = "tiiuae/falcon-7b-instruct"
8
+
9
+ from youtube_transcript_api import YouTubeTranscriptApi
10
+ import pytube
11
+
12
+ # get the transcript from YouTube
13
+ def get_yt_transcript(url):
14
  text = ''
15
+ vid_id = pytube.extract.video_id(url)
16
+ temp = YouTubeTranscriptApi.get_transcript(vid_id)
17
+ for t in temp:
18
+ text+=t['text']+' '
19
+ return text
 
 
 
 
 
 
 
 
20
 
21
+ from pytube import YouTube
22
+ import transformers
23
+ import torch
 
 
 
 
 
 
 
 
 
24
 
25
+ # transcribes the video
26
+ def transcribe_yt_vid(url):
27
+ # download YouTube video's audio
28
+ yt = YouTube(str(url))
29
+ audio = yt.streams.filter(only_audio = True).first()
30
+ out_file = audio.download(filename="audio.mp3",
31
+ output_path = save_dir)
32
+
33
+ # defining an automatic-speech-recognition pipeline
34
+ asr = transformers.pipeline(
35
+ "automatic-speech-recognition",
36
+ model=transcription_model_id,
37
+ device_map= 'auto',
38
+ )
39
+
40
+ # setting model config parameters
41
+ asr.model.config.forced_decoder_ids = (
42
+ asr.tokenizer.get_decoder_prompt_ids(
43
+ language="en",
44
+ task="transcribe"
45
  )
46
+ )
 
47
 
48
+ # invoking the Whisper model
49
+ temp = asr(out_file,chunk_length_s=20)
50
+ text = temp['text']
51
 
52
+ # we can do this at the end to release GPU memory
53
+ del(asr)
54
+ torch.cuda.empty_cache()
55
+
56
+ return text
57
 
58
+ from pytube import YouTube
59
+ from huggingface_hub import InferenceClient
60
+
61
+ # transcribes the video using the Hugging Face Hub API
62
+ def transcribe_yt_vid_api(url,api_token):
63
+ # download YouTube video's audio
64
+ yt = YouTube(str(url))
65
+ audio = yt.streams.filter(only_audio = True).first()
66
+ out_file = audio.download(filename="audio.mp3",
67
+ output_path = save_dir)
68
+
69
+ # Initialize client for the Whisper model
70
+ client = InferenceClient(model=transcription_model_id,
71
+ token=api_token)
72
+
73
+ import librosa
74
+ import soundfile as sf
75
+
76
+ text = ''
77
+ t=20 # audio chunk length in seconds
78
+ x, sr = librosa.load(out_file, sr=None)
79
+ # This gives x as audio file in numpy array and sr as original sampling rate
80
+ # The audio needs to be split in 20 second chunks since the API call truncates the response
81
+ for i in range(0, len(x)//(t * sr)):
82
+ y = x[t * sr * i: t * sr *(i+1)]
83
+ split_path = save_dir+"audio_split.mp3"
84
+ sf.write(split_path, y, sr)
85
+ text += client.automatic_speech_recognition(split_path)
86
+
87
+ return text
88
+
89
+ def transcribe_youtube_video(url, force_transcribe=False,use_api=False,api_token=None):
90
+
91
+ yt = YouTube(str(url))
92
+ text = ''
93
+ # get the transcript from YouTube if available
94
+ try:
95
+ text = get_yt_transcript(url)
96
+ except:
97
+ pass
98
+
99
+ # transcribes the video if YouTube did not provide a transcription
100
+ # or if you want to force_transcribe anyway
101
+ if text == '' or force_transcribe:
102
+ if use_api:
103
+ text = transcribe_yt_vid_api(url,api_token=api_token)
104
+ transcript_source = 'The transcript was generated using {} via the Hugging Face Hub API.'.format(transcription_model_id)
105
+ else:
106
+ text = transcribe_yt_vid(url)
107
+ transcript_source = 'The transcript was generated using {} hosted locally.'.format(transcription_model_id)
108
+ else:
109
+ transcript_source = 'The transcript was downloaded from YouTube.'
110
+
111
+ return yt.title, text, transcript_source
112
+
113
+ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_sample=False):
114
 
115
  from langchain.chains.llm import LLMChain
116
  from langchain.prompts import PromptTemplate
 
120
  import transformers
121
  from transformers import BitsAndBytesConfig
122
  from transformers import AutoTokenizer, AutoModelForCausalLM
123
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  from langchain import HuggingFacePipeline
125
  import torch
126
 
127
+ model_kwargs1 = {"temperature":temperature ,
128
+ "do_sample":do_sample,
129
+ "min_new_tokens":200-25,
130
+ "max_new_tokens":200+25
131
+ }
132
+ model_kwargs2 = {"temperature":temperature ,
133
+ "do_sample":do_sample,
134
+ "min_new_tokens":words-25,
135
+ "max_new_tokens":words+25,
136
+ 'repetition_penalty':2.0
137
+ }
138
+ if not do_sample:
139
+ del model_kwargs1["temperature"]
140
+ del model_kwargs2["temperature"]
141
+
142
+ if use_api:
143
+
144
+ from langchain import HuggingFaceHub
145
 
146
+ # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
147
+ llm=HuggingFaceHub(
148
+ repo_id=llm_model_id, model_kwargs=model_kwargs1,
149
+ huggingfacehub_api_token=api_token
150
+ )
151
+ llm2=HuggingFaceHub(
152
+ repo_id=llm_model_id, model_kwargs=model_kwargs2,
153
+ huggingfacehub_api_token=api_token
154
+ )
155
+ summary_source = 'The summary was generated using {} via Hugging Face API.'.format(llm_model_id)
156
+
157
+ else:
158
+ quantization_config = BitsAndBytesConfig(
159
+ load_in_4bit=True,
160
+ bnb_4bit_compute_dtype=torch.float16,
161
+ bnb_4bit_quant_type="nf4",
162
+ bnb_4bit_use_double_quant=True,
163
+ )
164
+
165
+ tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
166
+ model = AutoModelForCausalLM.from_pretrained(llm_model_id,
167
+ quantization_config=quantization_config)
168
+ model.to_bettertransformer()
169
+
170
+ pipeline = transformers.pipeline(
171
+ "text-generation",
172
+ model=model,
173
+ tokenizer=tokenizer,
174
+ torch_dtype=torch.bfloat16,
175
+ device_map="auto",
176
+ pad_token_id=tokenizer.eos_token_id,
177
+ **model_kwargs1,
178
+ )
179
+ pipeline2 = transformers.pipeline(
180
+ "text-generation",
181
+ model=model,
182
+ tokenizer=tokenizer,
183
+ torch_dtype=torch.bfloat16,
184
+ device_map="auto",
185
+ pad_token_id=tokenizer.eos_token_id,
186
+ **model_kwargs2,
187
+ )
188
+ llm = HuggingFacePipeline(pipeline=pipeline)
189
+ llm2 = HuggingFacePipeline(pipeline=pipeline2)
190
+
191
+ summary_source = 'The summary was generated using {} hosted locally.'.format(llm_model_id)
192
 
 
193
 
194
  # Map
195
  map_template = """
196
+ You are an intelligent AI assistant that is tasked to review the content of a video and provide a concise and accurate summary.\n
197
+ You do not provide information that is not mentioned in the video. You only provide information that you are absolutely sure about.\n
198
+ Summarize the following text in a clear and concise way:\n
199
+ ----------------------- \n
200
+ TITLE: `{title}`\n
201
+ TEXT:\n
202
+ `{docs}`\n
203
+ ----------------------- \n
204
+ BRIEF SUMMARY:\n
205
  """
206
+ map_prompt = PromptTemplate(
207
+ template = map_template,
208
+ input_variables = ['title','docs']
209
+ )
210
  map_chain = LLMChain(llm=llm, prompt=map_prompt)
211
 
212
  # Reduce - Collapse
213
+ collapse_template = """
214
+ You are an intelligent AI assistant that is tasked to review the content of a video and provide a concise and accurate summary.\n
215
+ You do not provide information that is not mentioned in the video. You only provide information that you are absolutely sure about.\n
216
+ The following is set of partial summaries of a video:\n
217
+ ----------------------- \n
218
+ TITLE: `{title}`\n
219
+ PARTIAL SUMMARIES:\n
220
+ `{doc_summaries}`\n
221
+ ----------------------- \n
222
+ Take these and distill them into a consolidated summary.\n
223
+ SUMMARY:\n
224
  """
225
 
226
+ collapse_prompt = PromptTemplate(
227
+ template = collapse_template,
228
+ input_variables = ['title','doc_summaries']
229
+ )
230
+ collapse_chain = LLMChain(llm=llm, prompt=collapse_prompt)
231
 
232
  # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
233
  collapse_documents_chain = StuffDocumentsChain(
234
+ llm_chain=collapse_chain, document_variable_name="doc_summaries"
235
+ )
236
 
237
  # Final Reduce - Combine
238
+ combine_template = """\n
239
+ You are an intelligent AI assistant that is tasked to review the content of a video and provide a concise and accurate summary.\n
240
+ You do not provide information that is not mentioned in the video. You only provide information that you are absolutely sure about.\n
241
+ The following is a set of partial summaries of a video:\n
242
+ ----------------------- \n
243
+ TITLE: `{title}`\n
244
+ PARTIAL SUMMARIES:\n
245
+ `{doc_summaries}`\n
246
+ ----------------------- \n
247
+ Generate an executive summary of the whole text in maximum {words} words that contains the main messages, points, and arguments presented in the video.\n
248
+ EXECUTIVE SUMMARY:\n
249
  """
250
+ combine_prompt = PromptTemplate(
251
+ template = combine_template,
252
+ input_variables = ['title','doc_summaries','words']
253
+ )
254
+ combine_chain = LLMChain(llm=llm2, prompt=combine_prompt)
255
 
256
  # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
257
  combine_documents_chain = StuffDocumentsChain(
258
+ llm_chain=combine_chain, document_variable_name="doc_summaries"
259
+ )
260
 
261
+ # Combines and iteratively reduces the mapped documents
262
  reduce_documents_chain = ReduceDocumentsChain(
263
  # This is final chain that is called.
264
  combine_documents_chain=combine_documents_chain,
265
  # If documents exceed context for `StuffDocumentsChain`
266
  collapse_documents_chain=collapse_documents_chain,
267
  # The maximum number of tokens to group documents into.
268
+ token_max=800,
269
+ )
270
 
271
  # Combining documents by mapping a chain over them, then combining results
272
  map_reduce_chain = MapReduceDocumentsChain(
 
278
  document_variable_name="docs",
279
  # Return the results of the map steps in the output
280
  return_intermediate_steps=False,
281
+ )
282
 
283
  from langchain.document_loaders import TextLoader
284
  from langchain.text_splitter import TokenTextSplitter
285
 
286
+ with open(save_dir+'/transcript.txt','w') as f:
287
  f.write(text)
288
+ loader = TextLoader(save_dir+"/transcript.txt")
289
  doc = loader.load()
290
+ text_splitter = TokenTextSplitter(chunk_size=800, chunk_overlap=100)
291
  docs = text_splitter.split_documents(doc)
292
 
293
+ summary = map_reduce_chain.run({'input_documents':docs, 'title':title, 'words':words})
294
 
295
+ try:
296
+ del(map_reduce_chain,reduce_documents_chain,combine_chain,collapse_documents_chain,map_chain,collapse_chain,llm,llm2,pipeline,pipeline2,model,tokenizer)
297
+ except:
298
+ pass
299
+ torch.cuda.empty_cache()
300
 
301
+ return summary, summary_source
302
 
303
  import gradio as gr
304
  import pytube
 
313
  embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
314
  return embed_html
315
 
316
+ def summarize_youtube_video(url,force_transcribe,use_transcribe_api=False,api_token="",
317
+ temperature=1,words=250,use_llm_api=False,do_sample=False):
318
+ title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,use_transcribe_api,api_token)
319
+ summary, summary_source = summarize_text(title,text,temperature,words,use_llm_api,api_token,do_sample)
320
+ return summary, text, transcript_source, summary_source
321
 
322
  html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
323
 
324
+ def change_transcribe_api(vis):
325
+ return gr.Checkbox(value=False, visible=vis)
326
+
327
+ def change_api_token(vis):
328
+ return gr.Textbox(visible=vis)
329
+
330
+ def update_source(source):
331
+ return gr.Textbox(info=source)
332
+
333
+ def show_temp(vis):
334
+ return gr.Slider(visible=vis)
335
+
336
+ # Defining the structure of the UI
337
  with gr.Blocks() as demo:
 
338
  with gr.Row():
339
+ gr.Markdown("# Summarize a YouTube Video")
340
+
341
+ with gr.Row():
342
+ with gr.Column(scale=4):
343
  url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
 
344
  with gr.Column(scale=1):
345
+ api_token = gr.Textbox(label="Paste your Hugging Face API token here:",placeholder="hf_...",visible=False,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
346
+ with gr.Column(scale=1):
347
+ sum_btn = gr.Button("Summarize!")
348
+
349
+ with gr.Accordion("Transcription Settings",open=False):
350
+ with gr.Row():
351
+ force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
352
+ use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",visible=False)
353
+
354
+ with gr.Accordion("Summarization Settings",open=False):
355
+ with gr.Row():
356
+ use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",visible=True)
357
+ do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
358
+ temperature = gr.Slider(minimum=0,maximum=1,value=0.9,label="Generation temperature",visible=True)
359
+ words = gr.Slider(minimum=100,maximum=500,value=250,label="Length of the summary")
360
+
361
+ gr.Markdown("# Results")
362
+
363
  title = gr.Textbox(label="Video Title",placeholder="title...")
364
+
365
+ with gr.Row():
366
+ video = gr.HTML(html,scale=1)
367
+ summary_source = gr.Textbox(visible=False,scale=0)
368
+ summary = gr.Textbox(label="Summary",placeholder="summary...",scale=1)
369
+
370
  with gr.Row():
371
+ with gr.Group():
372
+ transcript_source = gr.Textbox(visible=False)
373
+ transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
374
+
375
+ with gr.Accordion("Notes",open=False):
376
+ gr.Markdown("""
377
+ 1. This app attempts to download the transcript from Youtube first. If the transcript is not available, or the prompts require, the video will be transcribed.\n
378
+ 2. The app performs best on videos in which the number of speakers is limited or when the YouTube transcript includes annotations of the speakers.\n
379
+ 3. The trascription does not annotate the speakers which may downgrade the quality of the summary if there are more than one speaker.\n
380
+ """)
381
+
382
+ # Defining the interactivity of the UI elements
383
+ force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
384
+ use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
385
+ use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
386
+ transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
387
+ summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
388
+ do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
389
+
390
+ # Defining the functions to call on clicking the button
391
+ sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
392
+ sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,use_transcribe_api,api_token,temperature,words,use_llm_api,do_sample],
393
+ outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
394
+ sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
395
+
396
+ demo.queue()
397
+ demo.launch(share=False)