Spaces:

Lookimi
/

TuberTranscript

Runtime error

App Files Files Community

Lookimi commited on Feb 4, 2023

Commit

bda77e6

•

1 Parent(s): 9337fa5

Create App.py

Browse files

Files changed (1) hide show

App.py +59 -0

App.py ADDED Viewed

	@@ -0,0 +1,59 @@

+#changing the code to have a Gradio Blocks App Menu on Huggingface Space prompting the channel URL
+#importing the necessary modules
+import os
+import urllib.request
+import re
+import time
+import gradio as gr
+#Creating a Gradio App Menu
+def transcript_extract():
+    #specifying the YouTube channel URL
+    channel_url = gr.inputs.Textbox(label="Channel URL")
+#accessing the webpage
+    page = urllib.request.urlopen(channel_url)
+#reading the source code
+    data = page.read()
+#creating a directory to save the transcripts
+    os.mkdir('Transcripts')
+#finding the transcripts
+    transcript_links = re.findall(r'(\/watch\?v=[A-Za-z0-9_.-]*)', str(data))
+#looping through each transcript to download
+    for link in transcript_links:
+        video_url = 'http://www.youtube.com'+link
+        #access the video page
+        video_page = urllib.request.urlopen(video_url)
+        #read the source code
+        video_data = video_page.read()
+        #find the transcript
+        transcript_link = re.findall(r'(\/timedtext_editor\?[A-Za-z0-9_.-]*)', str(video_data))
+        #check if there is a transcript available
+        if(len(transcript_link) > 0):
+            #access the transcript page
+            transcript_url ='http://www.youtube.com'+ transcript_link[0]
+            transcript_page = urllib.request.urlopen(transcript_url)
+            transcript_data = transcript_page.read()
+            #find the link to the transcript
+            transcript_download_link = re.findall(r'(\/api\/timedtext\?[A-Za-z0-9_.-]*)', str(transcript_data))
+            #check if the transcript is available for download
+            if(len(transcript_download_link) > 0):
+                #download the transcript
+                file_name = "Transcripts/" + link[9:] + ".xml"
+                download_url = 'http://www.youtube.com'+transcript_download_link[0]
+                urllib.request.urlretrieve(download_url, file_name)
+                print("Downloading transcript for video " + link[9:] + "...")
+                time.sleep(3)
+            else:
+                print("Transcript not available for video " + link[9:])
+        else:
+            print("Transcript not available for video " + link[9:])
+#upload to Huggingface Space
+gr.Interface(fn=transcript_extract, inputs="textbox", force_reload=True).launch(share=True)