Spaces:
Runtime error
Runtime error
matthewfarant
commited on
Commit
β’
d6df412
1
Parent(s):
95ecb1f
Add types
Browse files
app.py
CHANGED
@@ -11,6 +11,9 @@ from simpletransformers.classification import ClassificationModel
|
|
11 |
from transformers import pipeline
|
12 |
import gradio as gr
|
13 |
|
|
|
|
|
|
|
14 |
def is_nan(text):
|
15 |
try:
|
16 |
value = float(text)
|
@@ -43,73 +46,100 @@ def prepare_catalog():
|
|
43 |
return combined_catalog
|
44 |
|
45 |
# Your existing decision function
|
46 |
-
def decision(user_input, catalog, product_name_catalog):
|
47 |
# Initialize the model
|
48 |
-
|
49 |
-
|
|
|
50 |
# Extract formula
|
51 |
user_input_formula = re.findall(r'\d{1,2}\s*[- ]\s*\d{1,2}\s*[- ]\s*\d{1,2}', user_input)
|
52 |
user_input_formula = np.nan if len(user_input_formula) == 0 else user_input_formula[0]
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
71 |
else:
|
72 |
-
return
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
77 |
catalog = prepare_catalog()
|
78 |
-
return decision(input, catalog, "Registered Product")
|
79 |
|
80 |
# Initialize the app
|
81 |
demo = gr.Interface(
|
82 |
fn=app,
|
83 |
-
inputs=
|
|
|
|
|
|
|
84 |
outputs="text",
|
85 |
-
examples= [
|
|
|
|
|
|
|
|
|
|
|
86 |
title = 'Fertilizer Catalog Engine π½',
|
87 |
description = 'Catalog Search Engine and Decision Support System for Fertilizer Company',
|
88 |
article= """
|
89 |
-
|
90 |
-
|
91 |
### About The App
|
92 |
|
93 |
This app is built as a part of the Data Science Weekend (DSW) 2023 Challenge submission. This app aims to help fertilizer companies to map
|
94 |
free-text POS data of multiple types of products into their own fertilizer catalog. By using this app, the company will be able to
|
95 |
decide whether a product is already available in their catalog, or whether it is a new product that needs (and eligible) to be added to
|
96 |
the catalog. <br>
|
97 |
-
|
98 |
### How Does it Work?
|
99 |
-
|
100 |
This app uses a combination of fuzzy matching and machine learning to determine whether a product is already available in the catalog or not.
|
101 |
When a product is not available in the catalog, we will use an IndoBERT model to determine if the product is a fertilizer and eligible to be
|
102 |
added to the catalog. Beforehand, we have fine-tuned the IndoBERT model using a combination of internal and external (web scraping) data, so the
|
103 |
model will be able to learn how fertilizer products (especially the local ones) look like. <br>
|
104 |
-
|
105 |
### What are the Flags For?
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
flagging the prediction result π <br>
|
110 |
-
|
111 |
### I want to test multiple inputs at once!
|
112 |
-
|
113 |
You can also use our app via API by clicking the "Use via API" below. The API will give developer more flexibility to test multiple inputs
|
114 |
programmatically. <br>
|
115 |
|
@@ -124,4 +154,4 @@ demo = gr.Interface(
|
|
124 |
# Run the app
|
125 |
|
126 |
if __name__ == "__main__":
|
127 |
-
demo.launch(show_api=True)
|
|
|
11 |
from transformers import pipeline
|
12 |
import gradio as gr
|
13 |
|
14 |
+
# set current directory
|
15 |
+
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
16 |
+
|
17 |
def is_nan(text):
|
18 |
try:
|
19 |
value = float(text)
|
|
|
46 |
return combined_catalog
|
47 |
|
48 |
# Your existing decision function
|
49 |
+
def decision(user_input, type, catalog, product_name_catalog):
|
50 |
# Initialize the model
|
51 |
+
pipe_detect = pipeline("text-classification", model="matthewfarant/indobert-fertilizer-classifier", token = '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')
|
52 |
+
pipe_match = pipeline("text-classification", model="matthewfarant/indobert-fertilizer-matching", token = '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')
|
53 |
+
|
54 |
# Extract formula
|
55 |
user_input_formula = re.findall(r'\d{1,2}\s*[- ]\s*\d{1,2}\s*[- ]\s*\d{1,2}', user_input)
|
56 |
user_input_formula = np.nan if len(user_input_formula) == 0 else user_input_formula[0]
|
57 |
|
58 |
+
if type == 'Fuzzy Search':
|
59 |
+
# Similar Product
|
60 |
+
catalog['Similarity Score'] = catalog[product_name_catalog].apply(lambda x: fuzz.token_set_ratio(user_input, x, processor=utils.default_process))
|
61 |
+
catalog['Formula Similarity'] = catalog['Formula'].apply(lambda x: fuzz.token_set_ratio(user_input_formula, x, processor=utils.default_process))
|
62 |
+
|
63 |
+
# Take Top Similar Product. Take "Product Catalog" first
|
64 |
+
catalog = catalog.sort_values(by=['Similarity Score', 'Formula Similarity', 'Source'], ascending=[False, False, True]).head(1)
|
65 |
+
|
66 |
+
# Condition
|
67 |
+
if catalog['Similarity Score'].values[0] >= 80 and catalog['Source'].values[0] == 'Product Catalog' and (catalog['Formula Similarity'].values[0] == 100 or is_nan(catalog['Formula'].values[0])):
|
68 |
+
return f"[1] Product is Available in Catalog (SKU Registered as *{catalog['Registered Product'].values[0]}*)"
|
69 |
+
elif catalog['Similarity Score'].values[0] >= 80 and catalog['Source'].values[0] == 'Registered Fertilizers' and (catalog['Formula Similarity'].values[0] == 100 or is_nan(catalog['Formula'].values[0])):
|
70 |
+
return f"[2] Add as New Product (Registered in Kementan as *{catalog['Registered Product'].values[0]}*)"
|
71 |
+
elif catalog['Similarity Score'].values[0] >= 80 and catalog['Formula Similarity'].values[0] < 100:
|
72 |
+
return f"[3] Add as New Product (Similar to *{catalog['Registered Product'].values[0]}* in {catalog['Source'].values[0]} but with different formula)"
|
73 |
+
elif catalog['Similarity Score'].values[0] < 80:
|
74 |
+
if pipe_detect(user_input)[0]['label'] == 'Fertilizer' and pipe_detect(user_input)[0]['score'] > 0.8:
|
75 |
+
return f"[4] Add as New Product ({pipe_detect(user_input)[0]['score'] * 100}% probability of being a fertilizer)"
|
76 |
+
else:
|
77 |
+
return f"[5] Product might not be a Fertilizer ({np.round(pipe_detect(user_input)[0]['score'] * 100,2)}% probability of being a {pipe_detect(user_input)[0]['label']})"
|
78 |
else:
|
79 |
+
return "[6] Product is not a Fertilizer"
|
80 |
+
elif type == 'Training Mode':
|
81 |
+
# Same like above, but only match with catalog[catalog['Source'] == 'Product Catalog']['Registered Product']
|
82 |
+
catalog = catalog[catalog['Source'] == 'Product Catalog']
|
83 |
+
catalog['Similarity Score'] = catalog[product_name_catalog].apply(lambda x: fuzz.token_set_ratio(user_input, x, processor=utils.default_process))
|
84 |
+
catalog['Formula Similarity'] = catalog['Formula'].apply(lambda x: fuzz.token_set_ratio(user_input_formula, x, processor=utils.default_process))
|
85 |
+
|
86 |
+
# Take Top Similar Product
|
87 |
+
catalog = catalog.sort_values(by=['Similarity Score', 'Formula Similarity'], ascending=False).head(1)
|
88 |
+
|
89 |
+
return catalog['Registered Product'].values[0]
|
90 |
+
elif type == 'Probabilistic Search':
|
91 |
+
catalog = catalog[catalog['Source'] == 'Product Catalog']
|
92 |
+
|
93 |
+
# Based on probability
|
94 |
+
|
95 |
+
catalog['Concat Input'] = user_input + ' dan ' + catalog['Registered Product'].astype(str)
|
96 |
+
|
97 |
+
catalog['Similarity Score'] = catalog['Concat Input'].apply(lambda x: pipe_match(x)[0]['score'])
|
98 |
+
|
99 |
+
catalog = catalog.sort_values(by=['Similarity Score'], ascending=False).head(1)
|
100 |
|
101 |
+
return f"{np.round(catalog['Similarity Score'].values[0] * 100,2)}% probability of being a {catalog['Registered Product'].values[0]}"
|
102 |
+
|
103 |
+
|
104 |
+
def app(input, type):
|
105 |
+
if input is None or type is None:
|
106 |
+
return "Please fill in the input and select the search type"
|
107 |
catalog = prepare_catalog()
|
108 |
+
return decision(input, type, catalog, "Registered Product")
|
109 |
|
110 |
# Initialize the app
|
111 |
demo = gr.Interface(
|
112 |
fn=app,
|
113 |
+
inputs=[
|
114 |
+
gr.Textbox(),
|
115 |
+
gr.Radio(["Fuzzy Search", "Probabilistic Search", "Training Mode"], type="value")
|
116 |
+
],
|
117 |
outputs="text",
|
118 |
+
examples= [
|
119 |
+
['Petro Nitrat 16-16-16','Fuzzy Search'],
|
120 |
+
['Petro Nitrat 15-15-15','Fuzzy Search'],
|
121 |
+
['Gramoxone 1 Liter','Fuzzy Search'],
|
122 |
+
['Indomie Goreng Aceh','Fuzzy Search']
|
123 |
+
],
|
124 |
title = 'Fertilizer Catalog Engine π½',
|
125 |
description = 'Catalog Search Engine and Decision Support System for Fertilizer Company',
|
126 |
article= """
|
|
|
|
|
127 |
### About The App
|
128 |
|
129 |
This app is built as a part of the Data Science Weekend (DSW) 2023 Challenge submission. This app aims to help fertilizer companies to map
|
130 |
free-text POS data of multiple types of products into their own fertilizer catalog. By using this app, the company will be able to
|
131 |
decide whether a product is already available in their catalog, or whether it is a new product that needs (and eligible) to be added to
|
132 |
the catalog. <br>
|
|
|
133 |
### How Does it Work?
|
|
|
134 |
This app uses a combination of fuzzy matching and machine learning to determine whether a product is already available in the catalog or not.
|
135 |
When a product is not available in the catalog, we will use an IndoBERT model to determine if the product is a fertilizer and eligible to be
|
136 |
added to the catalog. Beforehand, we have fine-tuned the IndoBERT model using a combination of internal and external (web scraping) data, so the
|
137 |
model will be able to learn how fertilizer products (especially the local ones) look like. <br>
|
|
|
138 |
### What are the Flags For?
|
139 |
+
The flag is a part of the "Active Transfer Learning" feature of this app when the user selects "Training Mode". When a user flags an output as "Correct" or "Incorrect",
|
140 |
+
the developer will be able to fine-tune the model using the user's input, hence improving the model's performance when the user selects "Probabilistic Search". So, please
|
141 |
+
help us to improve the model by flagging the prediction result π <br>
|
|
|
|
|
142 |
### I want to test multiple inputs at once!
|
|
|
143 |
You can also use our app via API by clicking the "Use via API" below. The API will give developer more flexibility to test multiple inputs
|
144 |
programmatically. <br>
|
145 |
|
|
|
154 |
# Run the app
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
+
demo.launch(show_api=True)
|