NusaBERT

Sleeping

File size: 5,000 Bytes

47a6c20

from utils import (
    text_analysis_interface,
    token_classification_interface,
    search_interface,
    text_interface,
    SentenceSimilarity,
)
from transformers import pipeline

models = {
    "Text Analysis": {
        "title": "# Text Analysis",
        "examples": [
            "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
            "saya ganteng, kalau tidak-suka mati saja kamu",
            "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
        ],
        "output_label": ["Sentiment Analysis", "Emotion Classifier", "POS Tagging"],
        "desc": "A tool to showcase the full capabilities of text analysis LazarusNLP has to offer.",
        "interface": text_analysis_interface,
        "pipe": [
            pipeline(
                "text-classification",
                model="w11wo/indonesian-roberta-base-sentiment-classifier",
                tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
            ),
            pipeline(
                "text-classification",
                model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
                tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
            ),
            pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
        ],
    },
    "Sentiment Analysis": {
        "title": "Sentiment Analysis",
        "examples": [
            "saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.",
            "saya sudah transfer ratusan ribu dan sesuai nominal transfer. tapi tiket belum muncul juga. harus diwaspadai ini aplikasi ini.",
            "keren sekali aplikasi ini bisa menunjukan data diri secara detail, sangat di rekomendasikan untuk di pakai.",
        ],
        "output_label": "Sentiment Analysis",
        "desc": "A sentiment-text-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's SmSA dataset consisting of Indonesian comments and reviews.",
        "interface": text_interface,
        "pipe": pipeline(
            "text-classification",
            model="w11wo/indonesian-roberta-base-sentiment-classifier",
            tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
        ),
    },
    "Emotion Detection": {
        "title": "Emotion Classifier",
        "examples": [
            "iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold",
            "saya ganteng, kalau tidak-suka mati saja kamu",
            "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
        ],
        "output_label": "Emotion Classifier",
        "desc": "An emotion classifier based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's EmoT dataset",
        "interface": text_interface,
        "pipe": pipeline(
            "text-classification",
            model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
            tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
        ),
    },
    # "summarization": {
    #     "examples": [],
    #     "desc": "This model is a fine-tuned version of LazarusNLP/IndoNanoT5-base on the indonlg dataset.",
    # },
    "POS Tagging": {
        "title": "POS Tagging",
        "examples": [
            "iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold",
            "saya ganteng, kalau tidak-suka mati saja kamu",
            "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
        ],
        "output_label": "POS Tagging",
        "desc": "A part-of-speech token-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's POSP dataset consisting of tag-labelled news.",
        "interface": token_classification_interface,
        "pipe": pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
    },
    "Document Search": {
        "title": "# Document Search 🔍",
        "examples": [],
        "output_label": "Top 5 related documents",
        "desc": "A semantic search tool to get the most related documents 📖 based on user's query.",
        "interface": search_interface,
        "pipe": SentenceSimilarity(model="LazarusNLP/all-indobert-base-v2"),
    },
}