Spaces:
Runtime error
Runtime error
File size: 6,544 Bytes
f657d03 71775e2 f657d03 71775e2 ad2aa16 71775e2 ad2aa16 71775e2 97a23a7 71775e2 97a23a7 71775e2 f657d03 71775e2 97a23a7 71775e2 97a23a7 71775e2 f657d03 71775e2 f657d03 71775e2 f657d03 f1a2cd3 0261490 b2dc915 ad2aa16 f657d03 efbdffe ad2aa16 b2dc915 0261490 49d0e05 0261490 f2a3610 aa955f2 4d7e750 34aaf31 efbdffe 4d7e750 ad2aa16 f657d03 71775e2 ad2aa16 f657d03 71775e2 f657d03 71775e2 97a23a7 71775e2 f657d03 71775e2 f657d03 b2dc915 49d0e05 0261490 e4404b6 4d7e750 34aaf31 ad2aa16 73bedae f657d03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import datetime
import gradio as gr
from langdetect import detect, DetectorFactory, detect_langs
from transformers import pipeline
models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
'de': 'Sahajtomar/German_Zeroshot', # German
'es': 'Recognai/zeroshot_selectra_medium', # Spanish
'it': 'joeddav/xlm-roberta-large-xnli', # Italian
'ru': 'DeepPavlov/xlm-roberta-large-en-ru-mnli', # Russian
'tr': 'vicgalle/xlm-roberta-large-xnli-anli', # Turkish
'no': 'NbAiLab/nb-bert-base-mnli'} # Norsk
hypothesis_templates = {'en': 'This example is {}.', # English
'de': 'Dieses beispiel ist {}.', # German
'es': 'Este ejemplo es {}.', # Spanish
'it': 'Questo esempio è {}.', # Italian
'ru': 'Этот пример {}.', # Russian
'tr': 'Bu örnek {}.', # Turkish
'no': 'Dette eksempelet er {}.'} # Norsk
def detect_lang(sequence, labels):
DetectorFactory.seed = 0
seq_lang = 'en'
try:
seq_lang = detect(sequence)
lbl_lang = detect(labels)
except:
print("Language detection failed!",
"Date:{}, Sequence:{}, Labels:{}".format(
str(datetime.datetime.now()),
labels))
if seq_lang != lbl_lang:
print("Different languages detected for sequence and labels!",
"Date:{}, Sequence:{}, Labels:{}, Sequence Language:{}, Label Language:{}".format(
str(datetime.datetime.now()),
sequence,
labels,
seq_lang,
lbl_lang))
if seq_lang in models:
print("Sequence Language detected.",
"Date:{}, Sequence:{}, Sequence Language:{}".format(
str(datetime.datetime.now()),
sequence,
seq_lang))
else:
print("Language not supported. Defaulting to English!",
"Date:{}, Sequence:{}, Sequence Language:{}".format(
str(datetime.datetime.now()),
sequence,
seq_lang))
seq_lang = 'en'
return seq_lang
def sequence_to_classify(sequence, labels):
label_clean = str(labels).split(",")
lang = detect_lang(sequence, labels)
classifier = pipeline("zero-shot-classification",
hypothesis_template=hypothesis_templates[lang],
model=models[lang])
response = classifier(sequence, label_clean, multi_label=True)
predicted_labels = response['labels']
predicted_scores = response['scores']
clean_output = {idx: float(predicted_scores.pop(0)) for idx in predicted_labels}
print("Date:{}, Sequence:{}, Labels: {}".format(
str(datetime.datetime.now()),
sequence,
predicted_labels))
return clean_output
example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
However, some will become seriously ill and require medical attention."
example_labels1 = "business,health related,politics,climate change"
example_text2 = "Elephants are"
example_labels2 = "big,small,strong,fast,carnivorous"
example_text3 = "Elephants"
example_labels3 = "are big,can be very small,generally not strong enough,are faster than you think"
example_text4 = "Dogs are man's best friend"
example_labels4 = "positive,negative,neutral"
example_text5 = "Amar sonar bangla ami tomay bhalobasi"
example_labels5 = "bhalo,kharap"
example_text6 = "Letzte Woche gab es einen Selbstmord in einer nahe gelegenen kolonie"
example_labels6 = "verbrechen,tragödie,stehlen"
example_text7 = "El autor se perfila, a los 50 años de su muerte, como uno de los grandes de su siglo"
example_labels7 = "cultura,sociedad,economia,salud,deportes"
example_text8 = "Россия в среду заявила, что военные учения в аннексированном Москвой Крыму закончились \
и что солдаты возвращаются в свои гарнизоны, на следующий день после того, как она объявила о первом выводе \
войск от границ Украины."
example_labels8 = "новости,комедия"
example_text9 = "I quattro registi - Federico Fellini, Pier Paolo Pasolini, Bernardo Bertolucci e Vittorio De Sica - \
hanno utilizzato stili di ripresa diversi, ma hanno fortemente influenzato le giovani generazioni di registi."
example_labels9 = "cinema,politica,cibo"
example_text10 = "Ja, vi elsker dette landet,\
som det stiger frem,\
furet, værbitt over vannet,\
med de tusen hjem.\
Og som fedres kamp har hevet\
det av nød til seir"
example_labels10 = "helse,sport,religion,mat,patriotisme og nasjonalisme"
example_text11 = "Şampiyonlar Ligi’nde 5. hafta oynanan karşılaşmaların ardından sona erdi. Real Madrid, \
Inter ve Sporting oynadıkları mücadeleler sonrasında Son 16 turuna yükselmeyi başardı. \
Gecenin dev mücadelesinde ise Manchester City, PSG’yi yenerek liderliği garantiledi."
example_labels11 = "dünya,ekonomi,kültür,sağlık,siyaset,spor,teknoloji"
iface = gr.Interface(
title="Multilingual Multi-label Zero-shot Classification",
description="Currently supported languages are English, German, Spanish, Italian, Russian, Turkish, Norsk.",
fn=sequence_to_classify,
inputs=[gr.inputs.Textbox(lines=20,
label="Please enter the text you would like to classify...",
placeholder="Text here..."),
gr.inputs.Textbox(lines=5,
label="Possible candidate labels (separated by comma)...",
placeholder="Labels here separated by comma...")],
outputs=gr.outputs.Label(num_top_classes=5),
capture_session=True,
#interpretation="default",
examples=[
[example_text1, example_labels1],
[example_text2, example_labels2],
[example_text3, example_labels3],
[example_text4, example_labels4],
[example_text5, example_labels5],
[example_text6, example_labels6],
[example_text7, example_labels7],
[example_text8, example_labels8],
[example_text9, example_labels9],
[example_text10, example_labels10],
[example_text11, example_labels11]]
)
iface.launch()
|