from utils import ( text_analysis_interface, token_classification_interface, search_interface, text_interface, SentenceSimilarity, ) from transformers import pipeline models = { "Text Analysis": { "title": "# Text Analysis", "examples": [ "saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.", "saya ganteng, kalau tidak-suka mati saja kamu", "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya", ], "output_label": [ "Sentiment Analysis", "Emotion Classifier", "POS Tagging", "NERP Tagging", ], "desc": "A tool to showcase the full capabilities of text analysis LazarusNLP has to offer.", "interface": text_analysis_interface, "pipe": [ pipeline( "text-classification", model="w11wo/indonesian-roberta-base-sentiment-classifier", tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier", ), pipeline( "text-classification", model="StevenLimcorn/indonesian-roberta-base-emotion-classifier", tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier", ), pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"), pipeline(model="w11wo/indonesian-roberta-base-nerp-tagger"), ], }, "Sentiment Analysis": { "title": "Sentiment Analysis", "examples": [ "saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.", "saya sudah transfer ratusan ribu dan sesuai nominal transfer. tapi tiket belum muncul juga. harus diwaspadai ini aplikasi ini.", "keren sekali aplikasi ini bisa menunjukan data diri secara detail, sangat di rekomendasikan untuk di pakai.", ], "output_label": "Sentiment Analysis", "desc": "A sentiment-text-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's SmSA dataset consisting of Indonesian comments and reviews.", "interface": text_interface, "pipe": pipeline( "text-classification", model="w11wo/indonesian-roberta-base-sentiment-classifier", tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier", ), }, "Emotion Detection": { "title": "Emotion Classifier", "examples": [ "iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold", "saya ganteng, kalau tidak-suka mati saja kamu", "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya", ], "output_label": "Emotion Classifier", "desc": "An emotion classifier based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's EmoT dataset", "interface": text_interface, "pipe": pipeline( "text-classification", model="StevenLimcorn/indonesian-roberta-base-emotion-classifier", tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier", ), }, # "summarization": { # "examples": [], # "desc": "This model is a fine-tuned version of LazarusNLP/IndoNanoT5-base on the indonlg dataset.", # }, "POS Tagging": { "title": "POS Tagging", "examples": [ "iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold", "saya ganteng, kalau tidak-suka mati saja kamu", "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya", ], "output_label": "POS Tagging", "desc": "A part-of-speech token-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's POSP dataset consisting of tag-labelled news.", "interface": token_classification_interface, "pipe": pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"), }, "NER Tagging": { "title": "NER Tagging", "examples": [ "Paris adalah ibukota dari negara Prancis.", "Kuasa hukum teamster berasal dari Edmonton.", "Jakarta, Indonesia akan menjadi bagian salah satu tempat yang akan didatangi.", ], "output_label": "NER Tagging", "desc": "A NER Tagging token-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's NERP dataset consisting of tag-labelled news.", "interface": token_classification_interface, "pipe": pipeline(model="w11wo/indonesian-roberta-base-nerp-tagger"), }, "Document Search": { "title": "# Document Search 🔍", "examples": ["Stadion bola Indonesia.", "Rusia dan Serbia", "Politik."], "output_label": "Top 5 related documents", "desc": "A semantic search tool to get the most related documents 📖 based on user's query.", "interface": search_interface, "pipe": SentenceSimilarity(model="LazarusNLP/all-indobert-base-v2"), "sample": "sample.json", }, }