import argparse import logging import math import os import random import datasets from datasets import load_dataset, load_metric from torch.utils.data import DataLoader from tqdm.auto import tqdm import gradio as gr import transformers from accelerate import Accelerator # huggingface package from transformers import ( AdamW, AutoConfig, AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, PretrainedConfig, SchedulerType, default_data_collator, get_scheduler, set_seed, BertTokenizer, ) from transformers.utils.versions import require_version import torch from test_module.modeling_transkimer import BertForSequenceClassification as TranskimerForSequenceClassification from test_module.modeling_transkimer_roberta import RobertaForSequenceClassification as TranskimerRobertaForSequenceClassification from test_module.modeling_utils import convert_softmax_mask_to_digit from blackbox_utils.my_attack import CharacterAttack from transformers import glue_processors as processors task_to_keys = { "cola": ("sentence", None), "mnli": ("premise", "hypothesis"), "mrpc": ("sentence1", "sentence2"), "qnli": ("question", "sentence"), "qqp": ("question1", "question2"), "rte": ("sentence1", "sentence2"), "sst2": ("sentence", None), "stsb": ("sentence1", "sentence2"), "wnli": ("sentence1", "sentence2"), "imdb": ("text", None), } model_path_dict = { "transkimer_sst2_not_pad":'./not_pad_0.5', } datasets.utils.logging.set_verbosity_error() transformers.utils.logging.set_verbosity_error() task_name = 'sst2' model_type = 'transkimer' processor = processors['sst-2']() label_list = processor.get_labels() label_to_id = {v: i for i, v in enumerate(label_list)} # Load pretrained model and tokenizer model_path_key = f'{model_type}_{task_name}_not_pad' model_path = model_path_dict[model_path_key] config = AutoConfig.from_pretrained(model_path, num_labels=len(label_list), finetuning_task=task_name) tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True) model = TranskimerForSequenceClassification.from_pretrained(model_path,from_tf=bool(".ckpt" in model_path),config=config,) # Preprocessing the datasets sentence1_key, sentence2_key = task_to_keys[task_name] padding = False attack = CharacterAttack(f'{model_type}_{task_name}',model,tokenizer,device='cpu',max_per=10,padding=padding,max_length=128,label_to_id=label_to_id,sentence1_key=sentence1_key,sentence2_key=sentence2_key) def greet(text): text_input = (text,None) outputs,time = attack.get_prob(text_input) _,token_remained,_ = attack.output_analysis(outputs) return time,token_remained.item() iface = gr.Interface(fn=greet, inputs=["text"], outputs=["number","number"]) iface.launch()