StevenLimcorn commited on
Commit
47a6c20
1 Parent(s): 5596de2

Reformat code to be generic, adding new models in model.py

Browse files
__pycache__/model.cpython-311.pyc ADDED
Binary file (3.43 kB). View file
 
__pycache__/script.cpython-311.pyc CHANGED
Binary files a/__pycache__/script.cpython-311.pyc and b/__pycache__/script.cpython-311.pyc differ
 
__pycache__/utils.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ
 
__pycache__/utils.cpython-39.pyc CHANGED
Binary files a/__pycache__/utils.cpython-39.pyc and b/__pycache__/utils.cpython-39.pyc differ
 
app.py CHANGED
@@ -1,125 +1,13 @@
1
- from utils import (
2
- SentenceSimilarity,
3
- pos_tagging,
4
- text_analysis,
5
- text_interface,
6
- sentence_similarity,
7
- )
8
- from script import details
9
- from transformers import pipeline
10
  import gradio as gr
11
- from functools import partial
12
-
13
- pipes = {
14
- "Sentiment Analysis": pipeline(
15
- "text-classification",
16
- model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
17
- tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
18
- ),
19
- "Emotion Classifier": pipeline(
20
- "text-classification",
21
- model="w11wo/indonesian-roberta-base-sentiment-classifier",
22
- tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
23
- ),
24
- "summarization": pipeline(
25
- "summarization",
26
- model="LazarusNLP/IndoNanoT5-base-IndoSum",
27
- tokenizer="LazarusNLP/IndoNanoT5-base-IndoSum",
28
- ),
29
- "sentence-similarity": SentenceSimilarity(model="LazarusNLP/all-indobert-base-v2"),
30
- "POS Tagging": pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
31
- }
32
 
33
  if __name__ == "__main__":
34
- # list of collections of all demos
35
- classifiers = ["Sentiment Analysis", "Emotion Classifier"]
36
- # Summary
37
- summary_interface = gr.Interface.from_pipeline(
38
- pipes["summarization"],
39
- title="Summarization",
40
- examples=details["summarization"]["examples"],
41
- description=details["summarization"]["description"],
42
- allow_flagging="never",
43
- )
44
- # Pos Tagging
45
- pos_interface = gr.Interface(
46
- fn=partial(pos_tagging, pipe=pipes["POS Tagging"]),
47
- inputs=[
48
- gr.Textbox(placeholder="Masukan kalimat di sini...", label="Input Text"),
49
- ],
50
- outputs=[gr.HighlightedText()],
51
- title="POS Tagging",
52
- examples=details["POS Tagging"]["examples"],
53
- description=details["POS Tagging"]["description"],
54
- allow_flagging="never",
55
- )
56
- # Text Analysis
57
- with gr.Blocks() as text_analysis_interface:
58
- gr.Markdown("# Text Analysis")
59
- gr.Markdown(details["Text Analysis"]["description"])
60
- input_text = gr.Textbox(lines=5, label="Input Text")
61
- with gr.Row():
62
- smsa = gr.Label(label="Sentiment Analysis")
63
- emot = gr.Label(label="Emotion Classification")
64
- pos = gr.HighlightedText(label="POS Tagging")
65
- btn = gr.Button("Analyze")
66
- btn.click(
67
- fn=partial(text_analysis, pipes=pipes),
68
- inputs=[input_text],
69
- outputs=[smsa, emot, pos],
70
- )
71
- gr.Examples(
72
- details["Text Analysis"]["examples"],
73
- inputs=input_text,
74
- outputs=[smsa, emot, pos],
75
- )
76
-
77
- with gr.Blocks() as sentence_similarity_interface:
78
- gr.Markdown("# Document Search 🔍")
79
- gr.Markdown(details["sentence-similarity"]["description"])
80
- with gr.Row():
81
- with gr.Column():
82
- input_text = gr.Textbox(lines=5, label="Query")
83
- file_input = gr.File(
84
- label="Documents", file_types=[".txt"], file_count="multiple"
85
- )
86
- button = gr.Button("Search...")
87
- output = gr.Label()
88
- button.click(
89
- fn=partial(sentence_similarity, pipe=pipes["sentence-similarity"]),
90
- inputs=[input_text, file_input],
91
- outputs=[output],
92
- )
93
-
94
- demo_interface = {
95
- "demo": [
96
- text_interface(
97
- pipes[name],
98
- details[name]["examples"],
99
- name,
100
- name,
101
- details[name]["description"],
102
- )
103
- for name in classifiers
104
- ]
105
- + [
106
- sentence_similarity_interface,
107
- summary_interface,
108
- pos_interface,
109
- text_analysis_interface,
110
- ],
111
- "titles": classifiers
112
- + ["Document Search", "Summarization", "POS Tagging", "Text Analysis"],
113
- }
114
-
115
- # with gr.Blocks() as demo:
116
- # with gr.Column():
117
- # gr.Markdown("# Title")
118
- # gr.TabbedInterface(
119
- # demo_interface["demo"], demo_interface["titles"], theme="soft"
120
- # )
121
-
122
- demo = gr.TabbedInterface(
123
- demo_interface["demo"], demo_interface["titles"], theme="soft"
124
- )
125
- demo.launch()
 
1
+ from model import models
 
 
 
 
 
 
 
 
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  if __name__ == "__main__":
5
+ exclude_keys, interfaces, titles = ["interface"], [], []
6
+ for model, args in models.items():
7
+ interface = args["interface"]
8
+ excluded_args = {k: args[k] for k in set(list(args.keys())) - set(exclude_keys)}
9
+ interfaces.append(interface(**excluded_args))
10
+ titles.append(model)
11
+
12
+ demo = gr.TabbedInterface(interfaces, titles, theme="soft")
13
+ demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils import (
2
+ text_analysis_interface,
3
+ token_classification_interface,
4
+ search_interface,
5
+ text_interface,
6
+ SentenceSimilarity,
7
+ )
8
+ from transformers import pipeline
9
+
10
+ models = {
11
+ "Text Analysis": {
12
+ "title": "# Text Analysis",
13
+ "examples": [
14
+ "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
15
+ "saya ganteng, kalau tidak-suka mati saja kamu",
16
+ "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
17
+ ],
18
+ "output_label": ["Sentiment Analysis", "Emotion Classifier", "POS Tagging"],
19
+ "desc": "A tool to showcase the full capabilities of text analysis LazarusNLP has to offer.",
20
+ "interface": text_analysis_interface,
21
+ "pipe": [
22
+ pipeline(
23
+ "text-classification",
24
+ model="w11wo/indonesian-roberta-base-sentiment-classifier",
25
+ tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
26
+ ),
27
+ pipeline(
28
+ "text-classification",
29
+ model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
30
+ tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
31
+ ),
32
+ pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
33
+ ],
34
+ },
35
+ "Sentiment Analysis": {
36
+ "title": "Sentiment Analysis",
37
+ "examples": [
38
+ "saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.",
39
+ "saya sudah transfer ratusan ribu dan sesuai nominal transfer. tapi tiket belum muncul juga. harus diwaspadai ini aplikasi ini.",
40
+ "keren sekali aplikasi ini bisa menunjukan data diri secara detail, sangat di rekomendasikan untuk di pakai.",
41
+ ],
42
+ "output_label": "Sentiment Analysis",
43
+ "desc": "A sentiment-text-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's SmSA dataset consisting of Indonesian comments and reviews.",
44
+ "interface": text_interface,
45
+ "pipe": pipeline(
46
+ "text-classification",
47
+ model="w11wo/indonesian-roberta-base-sentiment-classifier",
48
+ tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
49
+ ),
50
+ },
51
+ "Emotion Detection": {
52
+ "title": "Emotion Classifier",
53
+ "examples": [
54
+ "iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold",
55
+ "saya ganteng, kalau tidak-suka mati saja kamu",
56
+ "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
57
+ ],
58
+ "output_label": "Emotion Classifier",
59
+ "desc": "An emotion classifier based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's EmoT dataset",
60
+ "interface": text_interface,
61
+ "pipe": pipeline(
62
+ "text-classification",
63
+ model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
64
+ tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
65
+ ),
66
+ },
67
+ # "summarization": {
68
+ # "examples": [],
69
+ # "desc": "This model is a fine-tuned version of LazarusNLP/IndoNanoT5-base on the indonlg dataset.",
70
+ # },
71
+ "POS Tagging": {
72
+ "title": "POS Tagging",
73
+ "examples": [
74
+ "iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold",
75
+ "saya ganteng, kalau tidak-suka mati saja kamu",
76
+ "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
77
+ ],
78
+ "output_label": "POS Tagging",
79
+ "desc": "A part-of-speech token-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's POSP dataset consisting of tag-labelled news.",
80
+ "interface": token_classification_interface,
81
+ "pipe": pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
82
+ },
83
+ "Document Search": {
84
+ "title": "# Document Search 🔍",
85
+ "examples": [],
86
+ "output_label": "Top 5 related documents",
87
+ "desc": "A semantic search tool to get the most related documents 📖 based on user's query.",
88
+ "interface": search_interface,
89
+ "pipe": SentenceSimilarity(model="LazarusNLP/all-indobert-base-v2"),
90
+ },
91
+ }
utils.py CHANGED
@@ -1,11 +1,14 @@
1
  import gradio as gr
2
  from functools import partial
3
- from transformers import pipeline
4
  from sentence_transformers import SentenceTransformer, util
5
  from scipy.special import softmax
6
  import os
7
 
8
 
 
 
 
9
  class SentenceSimilarity:
10
 
11
  def __init__(self, model: str):
@@ -31,11 +34,31 @@ def sentence_similarity(text: str, documents: list[str], pipe: SentenceSimilarit
31
 
32
 
33
  # Text Analysis
34
- def cls_inference(input: list[str], pipe: pipeline) -> str:
35
  results = pipe(input, top_k=None)
36
  return {x["label"]: x["score"] for x in results}
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def text_interface(
40
  pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str
41
  ):
@@ -52,15 +75,85 @@ def text_interface(
52
  )
53
 
54
 
55
- # POSP
56
- def pos_tagging(text: str, pipe: pipeline):
57
- output = pipe(text)
58
- return {"text": text, "entities": output}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
- # Text Analysis
62
- def text_analysis(text, pipes: dict):
63
- sa = cls_inference(text, pipes["Sentiment Analysis"])
64
- emot = cls_inference(text, pipes["Emotion Classifier"])
65
- pos = pos_tagging(text, pipes["POS Tagging"])
66
- return (sa, emot, pos)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from functools import partial
3
+ from transformers import pipeline, pipelines
4
  from sentence_transformers import SentenceTransformer, util
5
  from scipy.special import softmax
6
  import os
7
 
8
 
9
+ ######################
10
+ ##### INFERENCE ######
11
+ ######################
12
  class SentenceSimilarity:
13
 
14
  def __init__(self, model: str):
 
34
 
35
 
36
  # Text Analysis
37
+ def cls_inference(input: list[str], pipe: pipeline) -> dict:
38
  results = pipe(input, top_k=None)
39
  return {x["label"]: x["score"] for x in results}
40
 
41
 
42
+ # POSP
43
+ def tagging(text: str, pipe: pipeline):
44
+ output = pipe(text)
45
+ return {"text": text, "entities": output}
46
+
47
+
48
+ # Text Analysis
49
+ def text_analysis(text, pipes: list[pipeline]):
50
+ outputs = []
51
+ for pipe in pipes:
52
+ if isinstance(pipe, pipelines.token_classification.TokenClassificationPipeline):
53
+ outputs.append(tagging(text, pipe))
54
+ else:
55
+ outputs.append(cls_inference(text, pipe))
56
+ return outputs
57
+
58
+
59
+ ######################
60
+ ##### INTERFACE ######
61
+ ######################
62
  def text_interface(
63
  pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str
64
  ):
 
75
  )
76
 
77
 
78
+ def search_interface(
79
+ pipe: SentenceSimilarity,
80
+ examples: list[str],
81
+ output_label: str,
82
+ title: str,
83
+ desc: str,
84
+ ):
85
+ with gr.Blocks() as sentence_similarity_interface:
86
+ gr.Markdown(title)
87
+ gr.Markdown(desc)
88
+ with gr.Row():
89
+ with gr.Column():
90
+ input_text = gr.Textbox(lines=5, label="Query")
91
+ file_input = gr.File(
92
+ label="Documents", file_types=[".txt"], file_count="multiple"
93
+ )
94
+ button = gr.Button("Search...")
95
+ output = gr.Label(output_label)
96
+ button.click(
97
+ fn=partial(sentence_similarity, pipe=pipe),
98
+ inputs=[input_text, file_input],
99
+ outputs=[output],
100
+ )
101
+ return sentence_similarity_interface
102
 
103
 
104
+ def token_classification_interface(
105
+ pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str
106
+ ):
107
+ return gr.Interface(
108
+ fn=partial(tagging, pipe=pipe),
109
+ inputs=[
110
+ gr.Textbox(placeholder="Masukan kalimat di sini...", label="Input Text"),
111
+ ],
112
+ outputs=[gr.HighlightedText(label=output_label)],
113
+ title=title,
114
+ examples=examples,
115
+ description=desc,
116
+ allow_flagging="never",
117
+ )
118
+
119
+
120
+ def text_analysis_interface(
121
+ pipe: list, examples: list[str], output_label: str, title: str, desc: str
122
+ ):
123
+ with gr.Blocks() as text_analysis_interface:
124
+ gr.Markdown(title)
125
+ gr.Markdown(desc)
126
+ input_text = gr.Textbox(lines=5, label="Input Text")
127
+ with gr.Row():
128
+ outputs = [
129
+ (
130
+ gr.HighlightedText(label=label)
131
+ if isinstance(
132
+ p, pipelines.token_classification.TokenClassificationPipeline
133
+ )
134
+ else gr.Label(label=label)
135
+ )
136
+ for label, p in zip(output_label, pipe)
137
+ ]
138
+ btn = gr.Button("Analyze")
139
+ btn.click(
140
+ fn=partial(text_analysis, pipes=pipe),
141
+ inputs=[input_text],
142
+ outputs=outputs,
143
+ )
144
+ gr.Examples(
145
+ examples=examples,
146
+ inputs=input_text,
147
+ outputs=outputs,
148
+ )
149
+ return text_analysis_interface
150
+
151
+
152
+ # Summary
153
+ # summary_interface = gr.Interface.from_pipeline(
154
+ # pipes["summarization"],
155
+ # title="Summarization",
156
+ # examples=details["summarization"]["examples"],
157
+ # description=details["summarization"]["description"],
158
+ # allow_flagging="never",
159
+ # )