chinhon commited on
Commit
6fcde96
1 Parent(s): 8d14c28

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+
4
+ from gradio.mix import Parallel
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModelForSeq2SeqLM,
8
+ )
9
+
10
+ #define function for text cleaning
11
+ def clean_text(text):
12
+ text = text.encode("ascii", errors="ignore").decode(
13
+ "ascii"
14
+ ) # remove non-ascii, Chinese characters
15
+ text = re.sub(r"http\S+", "", text)
16
+ text = re.sub(r"ADVERTISEMENT", " ", text)
17
+ text = re.sub(r"\n", " ", text)
18
+ text = re.sub(r"\n\n", " ", text)
19
+ text = re.sub(r"\t", " ", text)
20
+ text = text.strip(" ")
21
+ text = re.sub(
22
+ " +", " ", text
23
+ ).strip() # get rid of multiple spaces and replace with a single
24
+ return text
25
+
26
+ # define function for headlines generator 1-3
27
+ modchoice_1 = "chinhon/pegasus-large-commentaries_hd"
28
+
29
+ def commentaries_headline1(text):
30
+ input_text = clean_text(text)
31
+
32
+ tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1)
33
+
34
+ model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1)
35
+
36
+ with tokenizer_1.as_target_tokenizer():
37
+ batch = tokenizer_1(
38
+ input_text, truncation=True, padding="longest", return_tensors="pt"
39
+ )
40
+
41
+ translated = model_1.generate(**batch)
42
+
43
+ summary_1 = tokenizer_1.batch_decode(translated, skip_special_tokens=True)
44
+
45
+ return summary_1[0]
46
+
47
+
48
+ headline1 = gr.Interface(
49
+ fn=commentaries_headline1,
50
+ inputs=gr.inputs.Textbox(),
51
+ outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-large"),
52
+ )
53
+
54
+ modchoice_2 = "chinhon/pegasus-multi_news-commentaries_hdwriter"
55
+
56
+ def commentaries_headline2(text):
57
+ input_text = clean_text(text)
58
+
59
+ tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2)
60
+
61
+ model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2)
62
+
63
+ with tokenizer_2.as_target_tokenizer():
64
+ batch = tokenizer_2(
65
+ input_text, truncation=True, padding="longest", return_tensors="pt"
66
+ )
67
+
68
+ translated = model_2.generate(**batch)
69
+
70
+ summary_2 = tokenizer_2.batch_decode(translated, skip_special_tokens=True)
71
+
72
+ return summary_2[0]
73
+
74
+ headline2 = gr.Interface(
75
+ fn=commentaries_headline2,
76
+ inputs=gr.inputs.Textbox(),
77
+ outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-multi_news"),
78
+ )
79
+
80
+
81
+ modchoice_3 = "chinhon/pegasus-newsroom-commentaries_hdwriter"
82
+
83
+ def commentaries_headline3(text):
84
+ input_text = clean_text(text)
85
+
86
+ tokenizer_3 = AutoTokenizer.from_pretrained(modchoice_3)
87
+
88
+ model_3 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_3)
89
+
90
+ with tokenizer_3.as_target_tokenizer():
91
+ batch = tokenizer_3(
92
+ input_text, truncation=True, padding="longest", return_tensors="pt"
93
+ )
94
+
95
+ translated = model_3.generate(**batch)
96
+
97
+ summary_3 = tokenizer_3.batch_decode(
98
+ translated, skip_special_tokens=True, max_length=100
99
+ )
100
+
101
+ return summary_3[0]
102
+
103
+
104
+ headline3 = gr.Interface(
105
+ fn=commentaries_headline3,
106
+ inputs=gr.inputs.Textbox(),
107
+ outputs=gr.outputs.Textbox(label=" | Model: Fine tuned pegasus-newsroom"),
108
+ )
109
+
110
+ #define Gradio interface for 3 parallel apps
111
+ Parallel(
112
+ headline1,
113
+ headline2,
114
+ headline3,
115
+ title="Commentaries Headlines Generator",
116
+ inputs=gr.inputs.Textbox(
117
+ lines=20,
118
+ label="Paste parts of your commentary here, and choose from 3 suggested headlines",
119
+ ),
120
+ theme="huggingface",
121
+ enable_queue=True
122
+ ).launch()