File size: 2,816 Bytes
0e73e91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d06411a
250d588
 
 
0e73e91
 
 
250d588
0e73e91
 
 
 
 
 
 
 
 
 
 
 
 
b80c0fd
0e73e91
 
 
 
d0a55ad
0e73e91
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
import logging
import math
import os
import random

import datasets
from datasets import load_dataset, load_metric
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import gradio as gr

import transformers
from accelerate import Accelerator # huggingface package
from transformers import (
    AdamW,
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    PretrainedConfig,
    SchedulerType,
    default_data_collator,
    get_scheduler,
    set_seed,
    BertTokenizer,
)
from transformers.utils.versions import require_version

import torch
from test_module.modeling_transkimer import BertForSequenceClassification as TranskimerForSequenceClassification
from test_module.modeling_transkimer_roberta import RobertaForSequenceClassification as TranskimerRobertaForSequenceClassification
from test_module.modeling_utils import convert_softmax_mask_to_digit

from blackbox_utils.my_attack import CharacterAttack
from transformers import glue_processors as processors


task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
    "imdb": ("text", None),
}

model_path_dict = {
    "transkimer_sst2_not_pad":'./not_pad_0.5',
}


datasets.utils.logging.set_verbosity_error()
transformers.utils.logging.set_verbosity_error()


task_name = 'sst2'
model_type = 'transkimer'

processor = processors['sst-2']()
label_list = processor.get_labels()
label_to_id = {v: i for i, v in enumerate(label_list)}

# Load pretrained model and tokenizer
model_path_key =  f'{model_type}_{task_name}_not_pad'
model_path = model_path_dict[model_path_key]
config = AutoConfig.from_pretrained(model_path, num_labels=len(label_list), finetuning_task=task_name)
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True)
model = TranskimerForSequenceClassification.from_pretrained(model_path,from_tf=bool(".ckpt" in model_path),config=config,)

# Preprocessing the datasets
sentence1_key, sentence2_key = task_to_keys[task_name]
    

padding = False

attack = CharacterAttack(f'{model_type}_{task_name}',model,tokenizer,device='cpu',max_per=10,padding=padding,max_length=128,label_to_id=label_to_id,sentence1_key=sentence1_key,sentence2_key=sentence2_key)


def greet(text):
    text_input = (text,None)
    outputs,time = attack.get_prob(text_input)
    _,token_remained,_ = attack.output_analysis(outputs)
    return time,token_remained.item()

iface = gr.Interface(fn=greet, inputs=["text"], outputs=["number","number"])
iface.launch()