summarize_youtube

Sleeping

App Files Files Community

smakamali commited on Oct 14, 2023

Commit

8aed00f

•

1 Parent(s): 1f6084d

fix the bug in the transcribe_yt_vid_api function

Browse files

Files changed (1) hide show

app.py +25 -23

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ if not os.path.exists(save_dir):
 transcription_model_id = "openai/whisper-large"
 llm_model_id = "tiiuae/falcon-7b-instruct"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-# HF_TOKEN = f"Bearer {HF_TOKEN}"
 from youtube_transcript_api import YouTubeTranscriptApi
 import pytube
@@ -65,7 +64,7 @@ def transcribe_yt_vid_api(url,api_token):
     # download YouTube video's audio
     yt = YouTube(str(url))
     audio = yt.streams.filter(only_audio = True).first()
-    out_file = audio.download(filename="audio.mp3",
                               output_path = save_dir)
     # Initialize client for the Whisper model
@@ -76,13 +75,13 @@ def transcribe_yt_vid_api(url,api_token):
     import soundfile as sf
     text = ''
-    t=20 # audio chunk length in seconds
     x, sr = librosa.load(out_file, sr=None)
     # This gives x as audio file in numpy array and sr as original sampling rate
     # The audio needs to be split in 20 second chunks since the API call truncates the response
-    for i in range(0, len(x)//(t * sr)):
         y = x[t * sr * i: t * sr *(i+1)]
-        split_path = save_dir+"audio_split.mp3"
         sf.write(split_path, y, sr)
         text += client.automatic_speech_recognition(split_path)
@@ -316,19 +315,22 @@ def get_video(url):
     embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
     return embed_html
-def summarize_youtube_video(url,force_transcribe,use_transcribe_api=False,api_token="",
-                            temperature=1,words=250,use_llm_api=False,do_sample=False):
-    title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,use_transcribe_api,HF_TOKEN)
-    summary, summary_source = summarize_text(title,text,temperature,words,use_llm_api,HF_TOKEN,do_sample)
     return summary, text, transcript_source, summary_source
 html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
-def change_transcribe_api(vis):
-    return gr.Checkbox(value=True, visible=vis)
-def change_api_token(vis):
-    return gr.Textbox(visible=vis)
 def update_source(source):
     return gr.Textbox(info=source)
@@ -345,21 +347,21 @@ with gr.Blocks() as demo:
         with gr.Column(scale=4):
             url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
         with gr.Column(scale=1):
-            api_token = gr.Textbox(label="Paste your Hugging Face API token here (optional):",placeholder="hf_...",visible=False,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
         with gr.Column(scale=1):
             sum_btn = gr.Button("Summarize!")
     with gr.Accordion("Transcription Settings",open=False):
         with gr.Row():
             force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
-            use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",value=True,visible=False)
     with gr.Accordion("Summarization Settings",open=False):
         with gr.Row():
-            use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",value=True,visible=True)
             do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
             temperature = gr.Slider(minimum=0,maximum=1,value=1.0,label="Generation temperature",visible=True)
-            words = gr.Slider(minimum=100,maximum=500,value=150,label="Length of the summary")
     gr.Markdown("# Results")
@@ -372,8 +374,8 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Group():
-            transcript_source = gr.Textbox(visible=False)
             transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
     with gr.Accordion("Notes",open=False):
         gr.Markdown("""
@@ -383,18 +385,18 @@ with gr.Blocks() as demo:
                     """)
     # Defining the interactivity of the UI elements
-    force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
-    use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
-    use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
     transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
     summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
     do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
     # Defining the functions to call on clicking the button
     sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
-    sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,use_transcribe_api,api_token,temperature,words,use_llm_api,do_sample],
                   outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
     sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
 demo.queue()
-demo.launch(share=False)

 transcription_model_id = "openai/whisper-large"
 llm_model_id = "tiiuae/falcon-7b-instruct"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 from youtube_transcript_api import YouTubeTranscriptApi
 import pytube
     # download YouTube video's audio
     yt = YouTube(str(url))
     audio = yt.streams.filter(only_audio = True).first()
+    out_file = audio.download(filename="audio.wav",
                               output_path = save_dir)
     # Initialize client for the Whisper model
     import soundfile as sf
     text = ''
+    t=25 # audio chunk length in seconds
     x, sr = librosa.load(out_file, sr=None)
     # This gives x as audio file in numpy array and sr as original sampling rate
     # The audio needs to be split in 20 second chunks since the API call truncates the response
+    for _,i in enumerate(range(0, (len(x)//(t * sr)) +1)):
         y = x[t * sr * i: t * sr *(i+1)]
+        split_path = os.path.join(save_dir,"audio_split.wav")
         sf.write(split_path, y, sr)
         text += client.automatic_speech_recognition(split_path)
     embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
     return embed_html
+def summarize_youtube_video(url,force_transcribe,api_token="",
+                            temperature=1.0,words=150,do_sample=True):
+    if api_token == "":
+        api_token = HF_TOKEN
+    title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,True,api_token)
+    summary, summary_source = summarize_text(title,text,temperature,words,True,api_token,do_sample)
+    print(text)
     return summary, text, transcript_source, summary_source
 html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
+# def change_transcribe_api(vis):
+#     return gr.Checkbox(value=False, visible=vis)
+# def change_api_token(vis):
+#     return gr.Textbox(visible=vis)
 def update_source(source):
     return gr.Textbox(info=source)
         with gr.Column(scale=4):
             url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
         with gr.Column(scale=1):
+            api_token = gr.Textbox(label="Paste your Hugging Face API token here:",placeholder="hf_...",visible=True,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
         with gr.Column(scale=1):
             sum_btn = gr.Button("Summarize!")
     with gr.Accordion("Transcription Settings",open=False):
         with gr.Row():
             force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
+            # use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",visible=False)
     with gr.Accordion("Summarization Settings",open=False):
         with gr.Row():
+            # use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",visible=True)
             do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
             temperature = gr.Slider(minimum=0,maximum=1,value=1.0,label="Generation temperature",visible=True)
+            words = gr.Slider(minimum=100,maximum=500,value=100,label="Length of the summary")
     gr.Markdown("# Results")
     with gr.Row():
         with gr.Group():
             transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
+            transcript_source = gr.Textbox(visible=False)
     with gr.Accordion("Notes",open=False):
         gr.Markdown("""
                     """)
     # Defining the interactivity of the UI elements
+    # force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
+    # use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
+    # use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
     transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
     summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
     do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
     # Defining the functions to call on clicking the button
     sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
+    sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,api_token,temperature,words,do_sample],
                   outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
     sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
 demo.queue()
+demo.launch(share=False)