damand2061 commited on
Commit
87b6d93
1 Parent(s): 3ec24db

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ from nltk.tokenize import sent_tokenize
4
+ from newspaper import Article
5
+
6
+ import nltk
7
+ nltk.download('punkt')
8
+
9
+ # Define available models
10
+ models = {
11
+ "PFSA-ID-MEDWO-IndoBERT-LEM": "damand2061/pfsa-id-medwo-indobert-lem",
12
+ "PFSA-ID-MED-IndoBERT-LEM": "damand2061/pfsa-id-med-indobert-lem"
13
+ }
14
+
15
+ # Define label descriptions for each model
16
+ label_descriptions = {
17
+ "PFSA-ID-MEDWO-IndoBERT-LEM": [
18
+ ["STATEMENT", "Pernyataan"],
19
+ ["CUECOREF", "Isyarat Pronomina"],
20
+ ["CUE", "Isyarat"],
21
+ ["AFFILIATION", "Afiliasi"],
22
+ ["ROLE", "Jabatan"],
23
+ ["PERSONCOREF", "Pronomina Orang"],
24
+ ["PERSON", "Orang"],
25
+ ],
26
+ "PFSA-ID-MED-IndoBERT-LEM": [
27
+ ["EVENT", "Acara"],
28
+ ["LOCATION", "Lokasi"],
29
+ ["DATETIME", "Waktu"],
30
+ ["ISSUE", "Isu"],
31
+ ["STATEMENT", "Pernyataan"],
32
+ ["CUECOREF", "Isyarat Pronomina"],
33
+ ["CUE", "Isyarat"],
34
+ ["AFFILIATION", "Afiliasi"],
35
+ ["ROLE", "Jabatan"],
36
+ ["PERSONCOREF", "Pronomina Orang"],
37
+ ["PERSON", "Orang"],
38
+ ]
39
+ }
40
+
41
+ def load_model(model_name):
42
+ ner_pipeline = pipeline("ner", model=models[model_name])
43
+ ner_pipeline.model.config.id2label = {k: v.replace("L-", "I-").replace("U-", "B-") for k, v in ner_pipeline.model.config.id2label.items()}
44
+ return ner_pipeline
45
+
46
+ def ner_text(Text, model_name):
47
+ ner_pipeline = load_model(model_name)
48
+ all_entities = []
49
+ sentences = sent_tokenize(Text)
50
+ for sent in sentences:
51
+ output = ner_pipeline(sent, aggregation_strategy="max")
52
+ entities = [(ent['word'], ent['entity_group']) for ent in output]
53
+ all_entities.extend(entities)
54
+ return all_entities
55
+
56
+ def ner_link(URL, model_name):
57
+ ner_pipeline = load_model(model_name)
58
+ article = Article(URL, language='id')
59
+ article.download()
60
+ article.parse()
61
+
62
+ paragraph = article.text
63
+
64
+ all_entities = []
65
+ sentences = sent_tokenize(paragraph)
66
+
67
+ for sent in sentences:
68
+ output = ner_pipeline(sent, aggregation_strategy="max")
69
+ entities = [(ent['word'], ent['entity_group']) for ent in output]
70
+ all_entities.extend(entities)
71
+ return all_entities
72
+
73
+ def update_label_descriptions(model_name):
74
+ return gr.DataFrame(label_descriptions[model_name], headers=["Label", "Keterangan"])
75
+
76
+ example_link = [
77
+ "https://www.cnnindonesia.com/nasional/20240911102707-32-1143270/budi-arie-soal-jet-pribadi-erina-hamil-tak-boleh-naik-angkutan-umum",
78
+ "https://nasional.tempo.co/read/1914500/respons-gibran-soal-akun-kaskus-fufufafa-yang-disebut-menyerang-prabowo",
79
+ "https://news.detik.com/berita/d-7535151/komentar-jokowi-dan-gibran-yang-buka-suara-soal-kaesang-naik-jet-pribadi",
80
+ "https://www.liputan6.com/news/read/5699055/jokowi-buka-suara-soal-dugaan-gratifikasi-jet-pribadi-kaesang"
81
+ ]
82
+
83
+ example_text = ["""Jakarta, CNN Indonesia -- Menteri Komunikasi dan Informatika (Menkominfo) Budi Arie Setiadi mengaku sudah mempelajari Akun Kaskus fufufafa yang dituding warganet sebagai akun milik Wapres terpilih Gibran Rakabuming Raka.
84
+
85
+ "Udah, udah, udah (didalami). Iya maksudnya udah kita pelajari," kata Budi di Kompleks Parlemen, Jakarta, Selasa (10/9).
86
+
87
+ Budi Arie menegaskan akun tersebut bukan milik Gibran berdasarkan pendalaman yang dilakukan.
88
+
89
+ "Bukan lah bukan (Gibran)," tegasnya.
90
+
91
+ Kendati demikian, Budi tak menjawab dengan tegas ketika ditanya siapa pemilik sebenarnya akun yang disebut sering menghujat Ketua Umum Gerindra Prabowo Subianto selama masa Pemilu 2014 lalu itu.
92
+
93
+ Sebelumnya, Gibran juga telah buka suara soal akun fufufafa tersebut. Ia mengaku tak tahu menahu soal akun itu. Ia juga meminta wartawan untuk bertanya ke pemilik akun tersebut.
94
+
95
+ "Lha mbuh, takono sing duwe akun, kok aku (Tidak tahu, tanyakan ke yang punya akun. Kok ke saya)," jawab Gibran singkat usai blusukan di Kelurahan Sondakan, Kecamatan Laweyan, Solo, Selasa (10/9).
96
+
97
+ Sejumlah netizen mengunggah tangkapan layar beberapa postingan akun fufufafa di Kaskus yang diduga punya Gibran.
98
+
99
+ Akun itu menyindir Prabowo lewat tulisan, "Kasihan capres yg anaknya fashion designer ****" Postingan itu diunggah 17 September 2017.
100
+
101
+ Adapula tulisan, "Istri cerai anak **** Trus mau lebaran sama siapa?" Tulisan itu diunggah 19 Juni 2018.
102
+
103
+ Warganet menduga akun itu milik Gibran. Hal itu disimpulkan dari salah satu postingan fufufafa yang menyebut akun Twitter miliknya adalah @rkgbrn.
104
+
105
+ Akun Twitter itu pernah di-mention oleh akun @kaesangp milik Kaesang Pangarep, adik Gibran, pada 24 Juni 2012 di Twitter. Namun, cuitan itu telah dihapus.
106
+
107
+ (mab/isn)
108
+ """]
109
+
110
+ with gr.Blocks() as demo:
111
+ gr.Markdown("""
112
+ <div style='text-align: center;'>
113
+ <h1>Indonesian Quotation Extraction and Attribution</h1>
114
+ </div>
115
+ """)
116
+
117
+ with gr.Tabs():
118
+ with gr.TabItem("Input Teks"):
119
+ with gr.Row():
120
+ with gr.Column(scale=2):
121
+ text_input = gr.Textbox(placeholder="Masukkan kalimat...", label="Text")
122
+ model_select_text = gr.Dropdown(choices=list(models.keys()), value=list(models.keys())[0], label="Model")
123
+ text_button = gr.Button("Predict", variant='primary')
124
+ gr.ClearButton(text_input, value="Reset")
125
+ with gr.Column(scale=3):
126
+ text_output = gr.HighlightedText(label="Output")
127
+
128
+ text_button.click(fn=ner_text, inputs=[text_input, model_select_text], outputs=text_output)
129
+
130
+ gr.Examples(example_text, inputs=text_input)
131
+
132
+ with gr.TabItem("Input Tautan"):
133
+ with gr.Row():
134
+ with gr.Column(scale=2):
135
+ link_input = gr.Textbox(placeholder="Masukkan tautan...", label="URL")
136
+ model_select_link = gr.Dropdown(choices=list(models.keys()), value=list(models.keys())[0], label="Model")
137
+ link_button = gr.Button("Predict", variant='primary')
138
+ gr.ClearButton(link_input, value="Reset")
139
+ with gr.Column(scale=3):
140
+ link_output = gr.HighlightedText(label="Output")
141
+
142
+ link_button.click(fn=ner_link, inputs=[link_input, model_select_link], outputs=link_output)
143
+
144
+ gr.Examples(example_link, inputs=link_input)
145
+
146
+ gr.Markdown("## Penjelasan Label")
147
+ label_description = gr.DataFrame(label_descriptions[list(models.keys())[0]], headers=["Label", "Keterangan"])
148
+
149
+ model_select_text.change(fn=update_label_descriptions, inputs=model_select_text, outputs=label_description)
150
+ model_select_link.change(fn=update_label_descriptions, inputs=model_select_link, outputs=label_description)
151
+
152
+ if __name__ == "__main__":
153
+ demo.launch()