Spaces:
Runtime error
Runtime error
Commit
·
d4218cc
1
Parent(s):
2034b44
Fix Issues
Browse files- app.py +10 -4
- fetch_prod.py +10 -0
app.py
CHANGED
@@ -63,8 +63,12 @@ def classify(text, is_unseen):
|
|
63 |
def scrape_click(url):
|
64 |
out = scraper.get_product(url)
|
65 |
if isinstance(out, str):
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
68 |
|
69 |
text = out['description']
|
70 |
if text not in cache:
|
@@ -94,7 +98,7 @@ with gr.Blocks(css="#warning {height: 100%}") as demo:
|
|
94 |
description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://github.com/Pranjal2041' style='text-decoration:none' target='_blank'>Pranjal Aggarwal, </a> <a href='' style='text-decoration:none' target='_blank'>Ameet Deshpande, </a> <a href='' style='text-decoration:none' target='_blank'>Karthik Narasimhan </a> </p>" \
|
95 |
+ "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://sites.google.com/view/semsup-xc/home' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/' target='_blank'>Arxiv</a> | <a href='https://github.com/princeton-nlp/SemSup-XC' target='_blank'>Github Repo</a></p>" \
|
96 |
+ "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'> \
|
97 |
-
Extreme classification (XC) considers the scenario of predicting over a very large number of classes (thousands to millions), with real-world applications including serving search engine results, e-commerce product tagging, and news article classification.
|
98 |
</p>" \
|
99 |
# gr.HTML(description)
|
100 |
gr.Markdown(description)
|
@@ -117,7 +121,8 @@ with gr.Blocks(css="#warning {height: 100%}") as demo:
|
|
117 |
<br>
|
118 |
<br>
|
119 |
Our model was trained on over 1 million product descriptions from Amazon on 6500 different categories.
|
120 |
-
SemSup-XC can generalize to unseen labels.
|
|
|
121 |
You can also fetch product descriptions by simply entering the product link, and classify categories on both seen and unseen labels.
|
122 |
"""
|
123 |
)
|
@@ -182,6 +187,7 @@ with gr.Blocks(css="#warning {height: 100%}") as demo:
|
|
182 |
|
183 |
# classify_btn.click(lambda value, is_unseen: gr.update(value = classify(value, is_unseen == 'Unseen Labels'), visible = True), inputs = [text_box, radio_btn], outputs=classified_labels_text)
|
184 |
classify_btn.click(lambda value, is_unseen: gr.update(value = format_labels_html(classify(value, is_unseen == 'Unseen Labels'), desc_is_visible = descriptions_visible), visible = True), inputs = [text_box, radio_btn], outputs=label_html)
|
|
|
185 |
|
186 |
random_example_btn.click(lambda value: gr.update(value = get_random_example()), inputs= random_example_btn, outputs=text_box)
|
187 |
random_example_btn.click(lambda value: (gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)), inputs = random_example_btn, outputs=[label_html, gold_labels, toggle_descriptions])
|
|
|
63 |
def scrape_click(url):
|
64 |
out = scraper.get_product(url)
|
65 |
if isinstance(out, str):
|
66 |
+
if out == 'Invalid URL':
|
67 |
+
gr.Error("Please enter a valid Amazon URL")
|
68 |
+
else:
|
69 |
+
gr.Error("Error Occured. Check the URL or try again later.")
|
70 |
+
print('Error Occured', out)
|
71 |
+
return
|
72 |
|
73 |
text = out['description']
|
74 |
if text not in cache:
|
|
|
98 |
description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://github.com/Pranjal2041' style='text-decoration:none' target='_blank'>Pranjal Aggarwal, </a> <a href='' style='text-decoration:none' target='_blank'>Ameet Deshpande, </a> <a href='' style='text-decoration:none' target='_blank'>Karthik Narasimhan </a> </p>" \
|
99 |
+ "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://sites.google.com/view/semsup-xc/home' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/' target='_blank'>Arxiv</a> | <a href='https://github.com/princeton-nlp/SemSup-XC' target='_blank'>Github Repo</a></p>" \
|
100 |
+ "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'> \
|
101 |
+
Extreme classification (XC) considers the scenario of predicting over a very large number of classes (thousands to millions), with real-world applications including serving search engine results, e-commerce product tagging, and news article classification. A real-life requirement in this domain is to predict from labels unseen during training(Zero-Shot), however there have been very little success in this domain. To this end, we propose SemSup-XC, a model that achieves state-of-the-art zero-shot (ZS) and few-shot (FS) performance on three extreme classification benchmarks spanning various domains. Instead of treating labels as class ids, our model learns from diverse descriptions of them, thereby attaining a more better understanding of the label space, evident from qualitative and quantitative results. \
|
102 |
</p>" \
|
103 |
# gr.HTML(description)
|
104 |
gr.Markdown(description)
|
|
|
121 |
<br>
|
122 |
<br>
|
123 |
Our model was trained on over 1 million product descriptions from Amazon on 6500 different categories.
|
124 |
+
SemSup-XC can generalize to both seen and unseen labels.
|
125 |
+
You can either use already available examples or enter your own text to classify.
|
126 |
You can also fetch product descriptions by simply entering the product link, and classify categories on both seen and unseen labels.
|
127 |
"""
|
128 |
)
|
|
|
187 |
|
188 |
# classify_btn.click(lambda value, is_unseen: gr.update(value = classify(value, is_unseen == 'Unseen Labels'), visible = True), inputs = [text_box, radio_btn], outputs=classified_labels_text)
|
189 |
classify_btn.click(lambda value, is_unseen: gr.update(value = format_labels_html(classify(value, is_unseen == 'Unseen Labels'), desc_is_visible = descriptions_visible), visible = True), inputs = [text_box, radio_btn], outputs=label_html)
|
190 |
+
classify_btn.click(lambda x: gr.update(visible=True), inputs = classify_btn, outputs = label_html)
|
191 |
|
192 |
random_example_btn.click(lambda value: gr.update(value = get_random_example()), inputs= random_example_btn, outputs=text_box)
|
193 |
random_example_btn.click(lambda value: (gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)), inputs = random_example_btn, outputs=[label_html, gold_labels, toggle_descriptions])
|
fetch_prod.py
CHANGED
@@ -10,7 +10,17 @@ class Scraper:
|
|
10 |
def __init__(self):
|
11 |
...
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def get_product(self, url : str) -> Dict:
|
|
|
|
|
|
|
14 |
webpage = requests.get(url, headers=FakeHttpHeader().as_header_dict())
|
15 |
f = open('webpage_out.html','w')
|
16 |
f.write(webpage.content.decode())
|
|
|
10 |
def __init__(self):
|
11 |
...
|
12 |
|
13 |
+
def sanity_url(self, url : str) -> bool:
|
14 |
+
if url.find('amazon')==-1:
|
15 |
+
return False
|
16 |
+
if url.find('product')==-1:
|
17 |
+
return False
|
18 |
+
return True
|
19 |
+
|
20 |
def get_product(self, url : str) -> Dict:
|
21 |
+
if not self.sanity_url(url):
|
22 |
+
return 'Invalid URL'
|
23 |
+
|
24 |
webpage = requests.get(url, headers=FakeHttpHeader().as_header_dict())
|
25 |
f = open('webpage_out.html','w')
|
26 |
f.write(webpage.content.decode())
|