File size: 6,012 Bytes
6275761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
8d6f7ae
6275761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from transformers import ConvBertTokenizer, TFConvBertModel
import tensorflow as tf
import numpy as np
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from transformers import *
import os
from text_cleaning import clean_text
from huggingface_hub import hf_hub_download


os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

# gpu_number = 1 #### GPU number 
# gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
#     tf.config.experimental.set_visible_devices(gpus[gpu_number], 'GPU') 
#     logical_gpus = tf.config.experimental.list_logical_devices('GPU')
#     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")

MAX_LENGTH = 32
BATCH_SIZE = 256

model_name = 'dbmdz/convbert-base-turkish-mc4-uncased'
tokenizer = ConvBertTokenizer.from_pretrained(model_name)
CUDA_VISIBLE_DEVICES=4

label_to_name = {0:"INSULT", 
                 1:"OTHER", 
                 2:"PROFANITY", 
                 3:"RACIST", 
                 4:"SEXIST"}


custom_object = {"TFConvBertModel": TFConvBertModel, "K":K}

second_model_1_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_0.h5")
second_model_1 = tf.keras.models.load_model(second_model_1_path, custom_objects=custom_object, compile=False)

second_model_2_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_1.h5")
second_model_2 = tf.keras.models.load_model(second_model_2_model_path, custom_objects=custom_object, compile=False)

second_model_3_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_2.h5")
second_model_3 = tf.keras.models.load_model(second_model_3_model_path, custom_objects=custom_object, compile=False)

second_model_4_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_3.h5")
second_model_4 = tf.keras.models.load_model(second_model_4_model_path, custom_objects=custom_object, compile=False)

second_model_5_model_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="2inci_model_mc4_emir_aug_data_dropout01_4.h5")
second_model_5 = tf.keras.models.load_model(second_model_5_model_path, custom_objects=custom_object, compile=False)

third_model_1_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_0.h5")
third_model_1 = tf.keras.models.load_model(third_model_1_path, custom_objects=custom_object, compile=False)

third_model_2_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_1.h5")
third_model_2 = tf.keras.models.load_model(third_model_2_path, custom_objects=custom_object, compile=False)

third_model_3_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_2.h5")
third_model_3 = tf.keras.models.load_model(third_model_3_path, custom_objects=custom_object, compile=False)

third_model_4_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_3.h5")
third_model_4 = tf.keras.models.load_model(third_model_4_path, custom_objects=custom_object, compile=False)

third_model_5_path = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="3uncu_model_mc4_emir_aug_data_4.h5")
third_model_5 = tf.keras.models.load_model(third_model_5_path, custom_objects=custom_object, compile=False)

model_path1 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model0.h5")
first_model_1 = tf.keras.models.load_model(model_path1, custom_objects=custom_object, compile=False)

model_path2 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model1.h5")
first_model_2 = tf.keras.models.load_model(model_path2, custom_objects=custom_object, compile=False)

model_path3 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model2.h5")
first_model_3 = tf.keras.models.load_model(model_path3, custom_objects=custom_object, compile=False)

model_path4 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model3.h5")
first_model_4 = tf.keras.models.load_model(model_path4, custom_objects=custom_object, compile=False)

model_path5 = hf_hub_download(repo_id="emirkocak/TRT_Data_Warriors_tackling_hate_speech", filename="model4.h5")
first_model_5 = tf.keras.models.load_model(model_path5, custom_objects=custom_object, compile=False)

def bert_encode(data):
    tokens = tokenizer.batch_encode_plus(data, max_length=MAX_LENGTH, padding='max_length', truncation=True)
    
    return tf.constant(tokens['input_ids'])


def test_predict(text):
    
    test_encoded = bert_encode(text)
    test_dataset = (
        tf.data.Dataset
        .from_tensor_slices((test_encoded))
        .batch(BATCH_SIZE))

    y_kfold_second = 0
    y_kfold_third = 0
    y_kfold_first = 0

    for model in [second_model_1, second_model_2, second_model_3, second_model_4, second_model_5]:
        y_kfold_second += model.predict(test_dataset)

    for model in [third_model_1, third_model_2, third_model_3, third_model_4, third_model_5]:
        y_kfold_third += model.predict(test_dataset)

    for model in [first_model_1, first_model_2, first_model_3, first_model_4, first_model_5]:
        y_kfold_first += model.predict(test_dataset)

    y_pred_all = 0.39 * y_kfold_first / 5 + 0.38 * y_kfold_second / 5 + 0.23 * y_kfold_third / 5

    # y_pred_all = y_kfold_first
    preds = np.argmax(y_pred_all, 1)

    preds_names = [label_to_name[pred] for pred in preds]

    return preds_names