smakamali commited on
Commit
8aed00f
1 Parent(s): 1f6084d

fix the bug in the transcribe_yt_vid_api function

Browse files
Files changed (1) hide show
  1. app.py +25 -23
app.py CHANGED
@@ -6,7 +6,6 @@ if not os.path.exists(save_dir):
6
  transcription_model_id = "openai/whisper-large"
7
  llm_model_id = "tiiuae/falcon-7b-instruct"
8
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
9
- # HF_TOKEN = f"Bearer {HF_TOKEN}"
10
 
11
  from youtube_transcript_api import YouTubeTranscriptApi
12
  import pytube
@@ -65,7 +64,7 @@ def transcribe_yt_vid_api(url,api_token):
65
  # download YouTube video's audio
66
  yt = YouTube(str(url))
67
  audio = yt.streams.filter(only_audio = True).first()
68
- out_file = audio.download(filename="audio.mp3",
69
  output_path = save_dir)
70
 
71
  # Initialize client for the Whisper model
@@ -76,13 +75,13 @@ def transcribe_yt_vid_api(url,api_token):
76
  import soundfile as sf
77
 
78
  text = ''
79
- t=20 # audio chunk length in seconds
80
  x, sr = librosa.load(out_file, sr=None)
81
  # This gives x as audio file in numpy array and sr as original sampling rate
82
  # The audio needs to be split in 20 second chunks since the API call truncates the response
83
- for i in range(0, len(x)//(t * sr)):
84
  y = x[t * sr * i: t * sr *(i+1)]
85
- split_path = save_dir+"audio_split.mp3"
86
  sf.write(split_path, y, sr)
87
  text += client.automatic_speech_recognition(split_path)
88
 
@@ -316,19 +315,22 @@ def get_video(url):
316
  embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
317
  return embed_html
318
 
319
- def summarize_youtube_video(url,force_transcribe,use_transcribe_api=False,api_token="",
320
- temperature=1,words=250,use_llm_api=False,do_sample=False):
321
- title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,use_transcribe_api,HF_TOKEN)
322
- summary, summary_source = summarize_text(title,text,temperature,words,use_llm_api,HF_TOKEN,do_sample)
 
 
 
323
  return summary, text, transcript_source, summary_source
324
 
325
  html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
326
 
327
- def change_transcribe_api(vis):
328
- return gr.Checkbox(value=True, visible=vis)
329
 
330
- def change_api_token(vis):
331
- return gr.Textbox(visible=vis)
332
 
333
  def update_source(source):
334
  return gr.Textbox(info=source)
@@ -345,21 +347,21 @@ with gr.Blocks() as demo:
345
  with gr.Column(scale=4):
346
  url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
347
  with gr.Column(scale=1):
348
- api_token = gr.Textbox(label="Paste your Hugging Face API token here (optional):",placeholder="hf_...",visible=False,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
349
  with gr.Column(scale=1):
350
  sum_btn = gr.Button("Summarize!")
351
 
352
  with gr.Accordion("Transcription Settings",open=False):
353
  with gr.Row():
354
  force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
355
- use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",value=True,visible=False)
356
 
357
  with gr.Accordion("Summarization Settings",open=False):
358
  with gr.Row():
359
- use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",value=True,visible=True)
360
  do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
361
  temperature = gr.Slider(minimum=0,maximum=1,value=1.0,label="Generation temperature",visible=True)
362
- words = gr.Slider(minimum=100,maximum=500,value=150,label="Length of the summary")
363
 
364
  gr.Markdown("# Results")
365
 
@@ -372,8 +374,8 @@ with gr.Blocks() as demo:
372
 
373
  with gr.Row():
374
  with gr.Group():
375
- transcript_source = gr.Textbox(visible=False)
376
  transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
 
377
 
378
  with gr.Accordion("Notes",open=False):
379
  gr.Markdown("""
@@ -383,18 +385,18 @@ with gr.Blocks() as demo:
383
  """)
384
 
385
  # Defining the interactivity of the UI elements
386
- force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
387
- use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
388
- use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
389
  transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
390
  summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
391
  do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
392
 
393
  # Defining the functions to call on clicking the button
394
  sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
395
- sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,use_transcribe_api,api_token,temperature,words,use_llm_api,do_sample],
396
  outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
397
  sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
398
 
399
  demo.queue()
400
- demo.launch(share=False)
 
6
  transcription_model_id = "openai/whisper-large"
7
  llm_model_id = "tiiuae/falcon-7b-instruct"
8
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
9
 
10
  from youtube_transcript_api import YouTubeTranscriptApi
11
  import pytube
 
64
  # download YouTube video's audio
65
  yt = YouTube(str(url))
66
  audio = yt.streams.filter(only_audio = True).first()
67
+ out_file = audio.download(filename="audio.wav",
68
  output_path = save_dir)
69
 
70
  # Initialize client for the Whisper model
 
75
  import soundfile as sf
76
 
77
  text = ''
78
+ t=25 # audio chunk length in seconds
79
  x, sr = librosa.load(out_file, sr=None)
80
  # This gives x as audio file in numpy array and sr as original sampling rate
81
  # The audio needs to be split in 20 second chunks since the API call truncates the response
82
+ for _,i in enumerate(range(0, (len(x)//(t * sr)) +1)):
83
  y = x[t * sr * i: t * sr *(i+1)]
84
+ split_path = os.path.join(save_dir,"audio_split.wav")
85
  sf.write(split_path, y, sr)
86
  text += client.automatic_speech_recognition(split_path)
87
 
 
315
  embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
316
  return embed_html
317
 
318
+ def summarize_youtube_video(url,force_transcribe,api_token="",
319
+ temperature=1.0,words=150,do_sample=True):
320
+ if api_token == "":
321
+ api_token = HF_TOKEN
322
+ title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,True,api_token)
323
+ summary, summary_source = summarize_text(title,text,temperature,words,True,api_token,do_sample)
324
+ print(text)
325
  return summary, text, transcript_source, summary_source
326
 
327
  html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
328
 
329
+ # def change_transcribe_api(vis):
330
+ # return gr.Checkbox(value=False, visible=vis)
331
 
332
+ # def change_api_token(vis):
333
+ # return gr.Textbox(visible=vis)
334
 
335
  def update_source(source):
336
  return gr.Textbox(info=source)
 
347
  with gr.Column(scale=4):
348
  url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
349
  with gr.Column(scale=1):
350
+ api_token = gr.Textbox(label="Paste your Hugging Face API token here:",placeholder="hf_...",visible=True,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
351
  with gr.Column(scale=1):
352
  sum_btn = gr.Button("Summarize!")
353
 
354
  with gr.Accordion("Transcription Settings",open=False):
355
  with gr.Row():
356
  force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
357
+ # use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",visible=False)
358
 
359
  with gr.Accordion("Summarization Settings",open=False):
360
  with gr.Row():
361
+ # use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",visible=True)
362
  do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
363
  temperature = gr.Slider(minimum=0,maximum=1,value=1.0,label="Generation temperature",visible=True)
364
+ words = gr.Slider(minimum=100,maximum=500,value=100,label="Length of the summary")
365
 
366
  gr.Markdown("# Results")
367
 
 
374
 
375
  with gr.Row():
376
  with gr.Group():
 
377
  transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
378
+ transcript_source = gr.Textbox(visible=False)
379
 
380
  with gr.Accordion("Notes",open=False):
381
  gr.Markdown("""
 
385
  """)
386
 
387
  # Defining the interactivity of the UI elements
388
+ # force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
389
+ # use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
390
+ # use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
391
  transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
392
  summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
393
  do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
394
 
395
  # Defining the functions to call on clicking the button
396
  sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
397
+ sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,api_token,temperature,words,do_sample],
398
  outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
399
  sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
400
 
401
  demo.queue()
402
+ demo.launch(share=False)