Spaces:

Pranjal2041
/

SemSup-XC

Runtime error

App Files Files Community

Pranjal2041 commited on Dec 20, 2022

Commit

d4218cc

1 Parent(s): 2034b44

Fix Issues

Browse files

Files changed (2) hide show

app.py +10 -4
fetch_prod.py +10 -0

app.py CHANGED Viewed

@@ -63,8 +63,12 @@ def classify(text, is_unseen):
 def scrape_click(url):
     out = scraper.get_product(url)
     if isinstance(out, str):
-        print('Error Occured', out)
-        return
     text = out['description']
     if text not in cache:
@@ -94,7 +98,7 @@ with gr.Blocks(css="#warning {height: 100%}") as demo:
         description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://github.com/Pranjal2041' style='text-decoration:none' target='_blank'>Pranjal Aggarwal, </a> <a href='' style='text-decoration:none' target='_blank'>Ameet Deshpande, </a> <a href='' style='text-decoration:none' target='_blank'>Karthik Narasimhan </a> </p>" \
             + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'>  <a href='https://sites.google.com/view/semsup-xc/home' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/' target='_blank'>Arxiv</a> | <a href='https://github.com/princeton-nlp/SemSup-XC' target='_blank'>Github Repo</a></p>" \
             + "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'>  \
-                Extreme classification (XC) considers the scenario of predicting over a very large number of classes (thousands to millions), with real-world applications including serving search engine results, e-commerce product tagging, and news article classification. The zero-shot version of this task involves the addition of new categories at test time, requiring models to generalize to novel classes without additional training data (e.g. one may add a new class “fidget spinner” for e-commerce product tagging). In this paper, we develop SEMSUP-XC, a model that achieves state-of-the-art zero-shot (ZS) and few-shot (FS) performance on three extreme classification benchmarks spanning the domains of law, e-commerce, and Wikipedia. SEMSUP-XC builds upon the recently proposed framework of semantic supervision that uses semantic label descriptions to represent and generalize to classes (e.g., “fidget spinner” described as “A popular spinning toy intended as a stress reliever”). Specifically, we use a combination of contrastive learning, a hybrid lexico-semantic similarity module and automated description collection to train SEMSUP-XC efficiently over extremely large class spaces. SEMSUP-XC significantly outperforms baselines and state-of-the-art models on all three datasets, by up to 6-10 precision@1 points on zero-shot classification and >10 precision points on few-shot classification, with similar gains for recall@10 (3 for zero-shot and 2 for few-shot). Our ablation studies show the relative importance of various components and conclude the combined importance of the proposed architecture and automatically scraped descriptions with improvements up to 33 precision@1 points. Furthermore, qualitative analyses demonstrate SEMSUP-XC's better understanding of label space than other state-of-the-art models.  \
                 </p>" \
         # gr.HTML(description)
         gr.Markdown(description)
@@ -117,7 +121,8 @@ with gr.Blocks(css="#warning {height: 100%}") as demo:
         <br>
         <br>
         Our model was trained on over 1 million product descriptions from Amazon on 6500 different categories.
-        SemSup-XC can generalize to unseen labels.
         You can also fetch product descriptions by simply entering the product link, and classify categories on both seen and unseen labels.
         """
     )
@@ -182,6 +187,7 @@ with gr.Blocks(css="#warning {height: 100%}") as demo:
             # classify_btn.click(lambda value, is_unseen: gr.update(value = classify(value, is_unseen == 'Unseen Labels'), visible = True), inputs = [text_box, radio_btn], outputs=classified_labels_text)
             classify_btn.click(lambda value, is_unseen: gr.update(value = format_labels_html(classify(value, is_unseen == 'Unseen Labels'), desc_is_visible = descriptions_visible), visible = True), inputs = [text_box, radio_btn], outputs=label_html)
             random_example_btn.click(lambda value: gr.update(value = get_random_example()), inputs= random_example_btn, outputs=text_box)
             random_example_btn.click(lambda value: (gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)), inputs = random_example_btn, outputs=[label_html, gold_labels, toggle_descriptions])

 def scrape_click(url):
     out = scraper.get_product(url)
     if isinstance(out, str):
+        if out == 'Invalid URL':
+            gr.Error("Please enter a valid Amazon URL")
+        else:
+            gr.Error("Error Occured. Check the URL or try again later.")
+            print('Error Occured', out)
+            return
     text = out['description']
     if text not in cache:
         description = "<p style='font-size: 14px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://github.com/Pranjal2041' style='text-decoration:none' target='_blank'>Pranjal Aggarwal, </a> <a href='' style='text-decoration:none' target='_blank'>Ameet Deshpande, </a> <a href='' style='text-decoration:none' target='_blank'>Karthik Narasimhan </a> </p>" \
             + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'>  <a href='https://sites.google.com/view/semsup-xc/home' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/' target='_blank'>Arxiv</a> | <a href='https://github.com/princeton-nlp/SemSup-XC' target='_blank'>Github Repo</a></p>" \
             + "<p style='text-align: center; margin: 5px; font-size: 14px; font-weight: w300;'>  \
+                Extreme classification (XC) considers the scenario of predicting over a very large number of classes (thousands to millions), with real-world applications including serving search engine results, e-commerce product tagging, and news article classification. A real-life requirement in this domain is to predict from labels unseen during training(Zero-Shot), however there have been very little success in this domain. To this end, we propose SemSup-XC, a model that achieves state-of-the-art zero-shot (ZS) and few-shot (FS) performance on three extreme classification benchmarks spanning various domains. Instead of treating labels as class ids, our model learns from diverse descriptions of them, thereby attaining a more better understanding of the label space, evident from qualitative and quantitative results.   \
                 </p>" \
         # gr.HTML(description)
         gr.Markdown(description)
         <br>
         <br>
         Our model was trained on over 1 million product descriptions from Amazon on 6500 different categories.
+        SemSup-XC can generalize to both seen and unseen labels.
+        You can either use already available examples or enter your own text to classify.
         You can also fetch product descriptions by simply entering the product link, and classify categories on both seen and unseen labels.
         """
     )
             # classify_btn.click(lambda value, is_unseen: gr.update(value = classify(value, is_unseen == 'Unseen Labels'), visible = True), inputs = [text_box, radio_btn], outputs=classified_labels_text)
             classify_btn.click(lambda value, is_unseen: gr.update(value = format_labels_html(classify(value, is_unseen == 'Unseen Labels'), desc_is_visible = descriptions_visible), visible = True), inputs = [text_box, radio_btn], outputs=label_html)
+            classify_btn.click(lambda x: gr.update(visible=True), inputs = classify_btn, outputs = label_html)
             random_example_btn.click(lambda value: gr.update(value = get_random_example()), inputs= random_example_btn, outputs=text_box)
             random_example_btn.click(lambda value: (gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)), inputs = random_example_btn, outputs=[label_html, gold_labels, toggle_descriptions])

fetch_prod.py CHANGED Viewed

@@ -10,7 +10,17 @@ class Scraper:
     def __init__(self):
         ...
     def get_product(self, url : str) -> Dict:
         webpage = requests.get(url, headers=FakeHttpHeader().as_header_dict())
         f = open('webpage_out.html','w')
         f.write(webpage.content.decode())

     def __init__(self):
         ...
+    def sanity_url(self, url : str) -> bool:
+        if url.find('amazon')==-1:
+            return False
+        if url.find('product')==-1:
+            return False
+        return True
     def get_product(self, url : str) -> Dict:
+        if not self.sanity_url(url):
+            return 'Invalid URL'
         webpage = requests.get(url, headers=FakeHttpHeader().as_header_dict())
         f = open('webpage_out.html','w')
         f.write(webpage.content.decode())