Spaces:
Runtime error
Runtime error
File size: 6,012 Bytes
6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 8d6f7ae 6275761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
from transformers import ConvBertTokenizer, TFConvBertModel
import tensorflow as tf
import numpy as np
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from transformers import *
import os
from text_cleaning import clean_text
from huggingface_hub import hf_hub_download
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
# gpu_number = 1 #### GPU number
# gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
# tf.config.experimental.set_visible_devices(gpus[gpu_number], 'GPU')
# logical_gpus = tf.config.experimental.list_logical_devices('GPU')
# print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
MAX_LENGTH = 32
BATCH_SIZE = 256
model_name = 'dbmdz/convbert-base-turkish-mc4-uncased'
tokenizer = ConvBertTokenizer.from_pretrained(model_name)
CUDA_VISIBLE_DEVICES=4
label_to_name = {0:"INSULT",
1:"OTHER",
2:"PROFANITY",
3:"RACIST",
4:"SEXIST"}
custom_object = {"TFConvBertModel": TFConvBertModel, "K":K}
second_model_1_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_0.h5")
second_model_1 = tf.keras.models.load_model(second_model_1_path, custom_objects=custom_object, compile=False)
second_model_2_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_1.h5")
second_model_2 = tf.keras.models.load_model(second_model_2_model_path, custom_objects=custom_object, compile=False)
second_model_3_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_2.h5")
second_model_3 = tf.keras.models.load_model(second_model_3_model_path, custom_objects=custom_object, compile=False)
second_model_4_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_3.h5")
second_model_4 = tf.keras.models.load_model(second_model_4_model_path, custom_objects=custom_object, compile=False)
second_model_5_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_4.h5")
second_model_5 = tf.keras.models.load_model(second_model_5_model_path, custom_objects=custom_object, compile=False)
third_model_1_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_0.h5")
third_model_1 = tf.keras.models.load_model(third_model_1_path, custom_objects=custom_object, compile=False)
third_model_2_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_1.h5")
third_model_2 = tf.keras.models.load_model(third_model_2_path, custom_objects=custom_object, compile=False)
third_model_3_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_2.h5")
third_model_3 = tf.keras.models.load_model(third_model_3_path, custom_objects=custom_object, compile=False)
third_model_4_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_3.h5")
third_model_4 = tf.keras.models.load_model(third_model_4_path, custom_objects=custom_object, compile=False)
third_model_5_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_4.h5")
third_model_5 = tf.keras.models.load_model(third_model_5_path, custom_objects=custom_object, compile=False)
model_path1 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model0.h5")
first_model_1 = tf.keras.models.load_model(model_path1, custom_objects=custom_object, compile=False)
model_path2 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model1.h5")
first_model_2 = tf.keras.models.load_model(model_path2, custom_objects=custom_object, compile=False)
model_path3 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model2.h5")
first_model_3 = tf.keras.models.load_model(model_path3, custom_objects=custom_object, compile=False)
model_path4 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model3.h5")
first_model_4 = tf.keras.models.load_model(model_path4, custom_objects=custom_object, compile=False)
model_path5 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model4.h5")
first_model_5 = tf.keras.models.load_model(model_path5, custom_objects=custom_object, compile=False)
def bert_encode(data):
tokens = tokenizer.batch_encode_plus(data, max_length=MAX_LENGTH, padding='max_length', truncation=True)
return tf.constant(tokens['input_ids'])
def test_predict(text):
test_encoded = bert_encode(text)
test_dataset = (
tf.data.Dataset
.from_tensor_slices((test_encoded))
.batch(BATCH_SIZE))
y_kfold_second = 0
y_kfold_third = 0
y_kfold_first = 0
for model in [second_model_1, second_model_2, second_model_3, second_model_4, second_model_5]:
y_kfold_second += model.predict(test_dataset)
for model in [third_model_1, third_model_2, third_model_3, third_model_4, third_model_5]:
y_kfold_third += model.predict(test_dataset)
for model in [first_model_1, first_model_2, first_model_3, first_model_4, first_model_5]:
y_kfold_first += model.predict(test_dataset)
y_pred_all = 0.39 * y_kfold_first / 5 + 0.38 * y_kfold_second / 5 + 0.23 * y_kfold_third / 5
# y_pred_all = y_kfold_first
preds = np.argmax(y_pred_all, 1)
preds_names = [label_to_name[pred] for pred in preds]
return preds_names |