BilalSardar commited on
Commit
16ed15e
·
0 Parent(s):

Duplicate from BilalSardar/yt-summarizer

Browse files
Files changed (5) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +18 -0
  4. requirements.txt +5 -0
  5. summarize.py +44 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Yt Summarizer
3
+ emoji: 📚
4
+ colorFrom: yellow
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 3.16.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: BilalSardar/yt-summarizer
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from summarize import Summarizer
3
+
4
+ interface = gr.Interface(fn = Summarizer,
5
+ inputs = [gr.inputs.Textbox(lines=2,
6
+ placeholder="Enter your link...",
7
+ label='YouTube Video Link'),
8
+ gr.inputs.Radio(["mT5", "BART"], type="value", label='Model')],
9
+ outputs = [gr.outputs.Textbox(
10
+ label="Summary")],
11
+
12
+ title = "Youtube Video Summarizer",
13
+ examples = [
14
+ ['https://www.youtube.com/watch?v=WSbgixdC9g8', 'BART']
15
+ ],
16
+ enable_queue=True)
17
+
18
+ interface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentencepiece
4
+ youtube-transcript-api
5
+
summarize.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ import sys
3
+
4
+ from youtube_transcript_api import YouTubeTranscriptApi
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
+
7
+ def Summarizer(link, model):
8
+
9
+ video_id = link.split("=")[1]
10
+
11
+ try:
12
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
13
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
14
+
15
+ if model == "Pegasus":
16
+ checkpoint = "google/pegasus-large"
17
+ elif model == "mT5":
18
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
19
+ elif model == "BART":
20
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
23
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
24
+
25
+
26
+ inputs = tokenizer(FinalTranscript,
27
+ max_length=1024,
28
+ truncation=True,
29
+ return_tensors="pt")
30
+
31
+ summary_ids = model.generate(inputs["input_ids"])
32
+ summary = tokenizer.batch_decode(summary_ids,
33
+ skip_special_tokens=True,
34
+ clean_up_tokenization_spaces=False)
35
+
36
+
37
+ return summary[0]
38
+
39
+
40
+ except Exception:
41
+ print(traceback.format_exc())
42
+ # or
43
+ print(sys.exc_info()[2])
44
+