delarosajav95 commited on
Commit
6a6df83
·
verified ·
1 Parent(s): ec74d1a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1124/config.json +28 -0
  2. checkpoint-1124/model.safetensors +3 -0
  3. checkpoint-1124/optimizer.pt +3 -0
  4. checkpoint-1124/rng_state.pth +3 -0
  5. checkpoint-1124/scheduler.pt +3 -0
  6. checkpoint-1124/special_tokens_map.json +37 -0
  7. checkpoint-1124/tokenizer.json +0 -0
  8. checkpoint-1124/tokenizer_config.json +58 -0
  9. checkpoint-1124/trainer_state.json +922 -0
  10. checkpoint-1124/training_args.bin +3 -0
  11. checkpoint-1124/vocab.txt +0 -0
  12. checkpoint-281/config.json +28 -0
  13. checkpoint-281/model.safetensors +3 -0
  14. checkpoint-281/optimizer.pt +3 -0
  15. checkpoint-281/rng_state.pth +3 -0
  16. checkpoint-281/scheduler.pt +3 -0
  17. checkpoint-281/special_tokens_map.json +37 -0
  18. checkpoint-281/tokenizer.json +0 -0
  19. checkpoint-281/tokenizer_config.json +58 -0
  20. checkpoint-281/trainer_state.json +262 -0
  21. checkpoint-281/training_args.bin +3 -0
  22. checkpoint-281/vocab.txt +0 -0
  23. checkpoint-562/config.json +28 -0
  24. checkpoint-562/model.safetensors +3 -0
  25. checkpoint-562/optimizer.pt +3 -0
  26. checkpoint-562/rng_state.pth +3 -0
  27. checkpoint-562/scheduler.pt +3 -0
  28. checkpoint-562/special_tokens_map.json +37 -0
  29. checkpoint-562/tokenizer.json +0 -0
  30. checkpoint-562/tokenizer_config.json +58 -0
  31. checkpoint-562/trainer_state.json +482 -0
  32. checkpoint-562/training_args.bin +3 -0
  33. checkpoint-562/vocab.txt +0 -0
  34. checkpoint-843/config.json +28 -0
  35. checkpoint-843/model.safetensors +3 -0
  36. checkpoint-843/optimizer.pt +3 -0
  37. checkpoint-843/rng_state.pth +3 -0
  38. checkpoint-843/scheduler.pt +3 -0
  39. checkpoint-843/special_tokens_map.json +37 -0
  40. checkpoint-843/tokenizer.json +0 -0
  41. checkpoint-843/tokenizer_config.json +58 -0
  42. checkpoint-843/trainer_state.json +702 -0
  43. checkpoint-843/training_args.bin +3 -0
  44. checkpoint-843/vocab.txt +0 -0
  45. config.json +28 -0
  46. events.out.tfevents.1735166948.12b685f0bef2.317.0 +3 -0
  47. events.out.tfevents.1735167321.12b685f0bef2.317.1 +3 -0
  48. events.out.tfevents.1735167522.12b685f0bef2.317.2 +3 -0
  49. model.safetensors +3 -0
  50. special_tokens_map.json +37 -0
checkpoint-1124/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.47.1",
25
+ "type_vocab_size": 2,
26
+ "use_cache": true,
27
+ "vocab_size": 31002
28
+ }
checkpoint-1124/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e177e2a47fdac098a1f71d136199e251cd143f5f231b72b9c61a1ba37daf395b
3
+ size 439433208
checkpoint-1124/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef0b4be91ff88def6f22ff783080da4a10d95afddb1821844e4e5509a56aeb0
3
+ size 878987514
checkpoint-1124/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021dafcf09515589eb5b96af4bf26593ca1bfd3e97d9e2105bd5d30b530c14ef
3
+ size 14244
checkpoint-1124/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d70b37b1c5b5de28c487e573ab9b4f4517b10d5f7302c777cee3c4b45873974
3
+ size 1064
checkpoint-1124/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1124/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1124/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-1124/trainer_state.json ADDED
@@ -0,0 +1,922 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03607647866010666,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/HateSpeech-BETO-cased/checkpoint-1124",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1124,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03558718861209965,
13
+ "grad_norm": 7.08419132232666,
14
+ "learning_rate": 2.2271714922049e-07,
15
+ "loss": 0.4085,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0711743772241993,
20
+ "grad_norm": 23.322711944580078,
21
+ "learning_rate": 4.4543429844098e-07,
22
+ "loss": 0.3565,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10676156583629894,
27
+ "grad_norm": 8.696600914001465,
28
+ "learning_rate": 6.6815144766147e-07,
29
+ "loss": 0.3482,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.1423487544483986,
34
+ "grad_norm": 14.429851531982422,
35
+ "learning_rate": 8.9086859688196e-07,
36
+ "loss": 0.3711,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17793594306049823,
41
+ "grad_norm": 9.41112995147705,
42
+ "learning_rate": 1.11358574610245e-06,
43
+ "loss": 0.337,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.21352313167259787,
48
+ "grad_norm": 12.487126350402832,
49
+ "learning_rate": 1.33630289532294e-06,
50
+ "loss": 0.3413,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.2491103202846975,
55
+ "grad_norm": 17.9010009765625,
56
+ "learning_rate": 1.55902004454343e-06,
57
+ "loss": 0.3154,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2846975088967972,
62
+ "grad_norm": 17.09844207763672,
63
+ "learning_rate": 1.78173719376392e-06,
64
+ "loss": 0.2304,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3202846975088968,
69
+ "grad_norm": 19.929523468017578,
70
+ "learning_rate": 2.00445434298441e-06,
71
+ "loss": 0.308,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.35587188612099646,
76
+ "grad_norm": 5.709442615509033,
77
+ "learning_rate": 2.2271714922049e-06,
78
+ "loss": 0.2692,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3914590747330961,
83
+ "grad_norm": 19.828474044799805,
84
+ "learning_rate": 2.44988864142539e-06,
85
+ "loss": 0.2255,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.42704626334519574,
90
+ "grad_norm": 17.53233528137207,
91
+ "learning_rate": 2.67260579064588e-06,
92
+ "loss": 0.1917,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4626334519572954,
97
+ "grad_norm": 14.940444946289062,
98
+ "learning_rate": 2.8953229398663702e-06,
99
+ "loss": 0.3019,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.498220640569395,
104
+ "grad_norm": 1.711176872253418,
105
+ "learning_rate": 3.11804008908686e-06,
106
+ "loss": 0.2032,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5338078291814946,
111
+ "grad_norm": 7.716080665588379,
112
+ "learning_rate": 3.34075723830735e-06,
113
+ "loss": 0.2764,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.5693950177935944,
118
+ "grad_norm": 20.839887619018555,
119
+ "learning_rate": 3.56347438752784e-06,
120
+ "loss": 0.2067,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.604982206405694,
125
+ "grad_norm": 2.853170156478882,
126
+ "learning_rate": 3.78619153674833e-06,
127
+ "loss": 0.243,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6405693950177936,
132
+ "grad_norm": 6.61458683013916,
133
+ "learning_rate": 4.00890868596882e-06,
134
+ "loss": 0.1886,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6761565836298933,
139
+ "grad_norm": 3.499279022216797,
140
+ "learning_rate": 4.231625835189309e-06,
141
+ "loss": 0.1987,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.7117437722419929,
146
+ "grad_norm": 32.111114501953125,
147
+ "learning_rate": 4.4543429844098e-06,
148
+ "loss": 0.2239,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7473309608540926,
153
+ "grad_norm": 3.83589506149292,
154
+ "learning_rate": 4.67706013363029e-06,
155
+ "loss": 0.1891,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7829181494661922,
160
+ "grad_norm": 3.3233108520507812,
161
+ "learning_rate": 4.89977728285078e-06,
162
+ "loss": 0.1335,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.8185053380782918,
167
+ "grad_norm": 19.36582374572754,
168
+ "learning_rate": 5.12249443207127e-06,
169
+ "loss": 0.0513,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.8540925266903915,
174
+ "grad_norm": 0.7740408182144165,
175
+ "learning_rate": 5.34521158129176e-06,
176
+ "loss": 0.1499,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8896797153024911,
181
+ "grad_norm": 0.18774119019508362,
182
+ "learning_rate": 5.5679287305122494e-06,
183
+ "loss": 0.1303,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.9252669039145908,
188
+ "grad_norm": 0.2991533875465393,
189
+ "learning_rate": 5.7906458797327404e-06,
190
+ "loss": 0.0695,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9608540925266904,
195
+ "grad_norm": 0.08448722958564758,
196
+ "learning_rate": 6.01336302895323e-06,
197
+ "loss": 0.1109,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.99644128113879,
202
+ "grad_norm": 30.85651397705078,
203
+ "learning_rate": 6.23608017817372e-06,
204
+ "loss": 0.2155,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 1.0,
209
+ "eval_accuracy": 0.9559412550066756,
210
+ "eval_f1_per_label": [
211
+ 0.9255079006772009,
212
+ 0.9687203791469194
213
+ ],
214
+ "eval_f1_weighted": 0.9556239404117711,
215
+ "eval_loss": 0.17148031294345856,
216
+ "eval_precision_per_label": [
217
+ 0.9490740740740741,
218
+ 0.9587242026266416
219
+ ],
220
+ "eval_precision_weighted": 0.9557995308223255,
221
+ "eval_recall_per_label": [
222
+ 0.9030837004405287,
223
+ 0.9789272030651341
224
+ ],
225
+ "eval_recall_weighted": 0.9559412550066756,
226
+ "eval_runtime": 1.3508,
227
+ "eval_samples_per_second": 554.494,
228
+ "eval_steps_per_second": 69.589,
229
+ "step": 281
230
+ },
231
+ {
232
+ "epoch": 1.0320284697508897,
233
+ "grad_norm": 37.36682891845703,
234
+ "learning_rate": 6.458797327394209e-06,
235
+ "loss": 0.2318,
236
+ "step": 290
237
+ },
238
+ {
239
+ "epoch": 1.0676156583629894,
240
+ "grad_norm": 3.181389093399048,
241
+ "learning_rate": 6.6815144766147e-06,
242
+ "loss": 0.29,
243
+ "step": 300
244
+ },
245
+ {
246
+ "epoch": 1.103202846975089,
247
+ "grad_norm": 0.6792677640914917,
248
+ "learning_rate": 6.9042316258351895e-06,
249
+ "loss": 0.0621,
250
+ "step": 310
251
+ },
252
+ {
253
+ "epoch": 1.1387900355871885,
254
+ "grad_norm": 31.2163143157959,
255
+ "learning_rate": 7.12694877505568e-06,
256
+ "loss": 0.3279,
257
+ "step": 320
258
+ },
259
+ {
260
+ "epoch": 1.1743772241992882,
261
+ "grad_norm": 32.702980041503906,
262
+ "learning_rate": 7.349665924276171e-06,
263
+ "loss": 0.0851,
264
+ "step": 330
265
+ },
266
+ {
267
+ "epoch": 1.209964412811388,
268
+ "grad_norm": 76.18331146240234,
269
+ "learning_rate": 7.57238307349666e-06,
270
+ "loss": 0.3339,
271
+ "step": 340
272
+ },
273
+ {
274
+ "epoch": 1.2455516014234875,
275
+ "grad_norm": 76.69121551513672,
276
+ "learning_rate": 7.79510022271715e-06,
277
+ "loss": 0.0757,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 1.281138790035587,
282
+ "grad_norm": 0.06500601023435593,
283
+ "learning_rate": 8.01781737193764e-06,
284
+ "loss": 0.188,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 1.3167259786476868,
289
+ "grad_norm": 95.82825469970703,
290
+ "learning_rate": 8.24053452115813e-06,
291
+ "loss": 0.2278,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 1.3523131672597866,
296
+ "grad_norm": 49.43801498413086,
297
+ "learning_rate": 8.463251670378619e-06,
298
+ "loss": 0.2501,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 1.387900355871886,
303
+ "grad_norm": 40.980369567871094,
304
+ "learning_rate": 8.68596881959911e-06,
305
+ "loss": 0.1424,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 1.4234875444839858,
310
+ "grad_norm": 1.2029680013656616,
311
+ "learning_rate": 8.9086859688196e-06,
312
+ "loss": 0.1464,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 1.4590747330960854,
317
+ "grad_norm": 26.718364715576172,
318
+ "learning_rate": 9.13140311804009e-06,
319
+ "loss": 0.2213,
320
+ "step": 410
321
+ },
322
+ {
323
+ "epoch": 1.4946619217081851,
324
+ "grad_norm": 29.254457473754883,
325
+ "learning_rate": 9.35412026726058e-06,
326
+ "loss": 0.1796,
327
+ "step": 420
328
+ },
329
+ {
330
+ "epoch": 1.5302491103202847,
331
+ "grad_norm": 0.17349238693714142,
332
+ "learning_rate": 9.57683741648107e-06,
333
+ "loss": 0.1021,
334
+ "step": 430
335
+ },
336
+ {
337
+ "epoch": 1.5658362989323842,
338
+ "grad_norm": 0.27866968512535095,
339
+ "learning_rate": 9.79955456570156e-06,
340
+ "loss": 0.2656,
341
+ "step": 440
342
+ },
343
+ {
344
+ "epoch": 1.601423487544484,
345
+ "grad_norm": 0.02428502030670643,
346
+ "learning_rate": 9.985185185185185e-06,
347
+ "loss": 0.0504,
348
+ "step": 450
349
+ },
350
+ {
351
+ "epoch": 1.6370106761565837,
352
+ "grad_norm": 0.0974503755569458,
353
+ "learning_rate": 9.837037037037038e-06,
354
+ "loss": 0.062,
355
+ "step": 460
356
+ },
357
+ {
358
+ "epoch": 1.6725978647686834,
359
+ "grad_norm": 1.3685508966445923,
360
+ "learning_rate": 9.688888888888889e-06,
361
+ "loss": 0.0459,
362
+ "step": 470
363
+ },
364
+ {
365
+ "epoch": 1.708185053380783,
366
+ "grad_norm": 1.2251461744308472,
367
+ "learning_rate": 9.540740740740742e-06,
368
+ "loss": 0.1173,
369
+ "step": 480
370
+ },
371
+ {
372
+ "epoch": 1.7437722419928825,
373
+ "grad_norm": 62.15313720703125,
374
+ "learning_rate": 9.392592592592593e-06,
375
+ "loss": 0.1621,
376
+ "step": 490
377
+ },
378
+ {
379
+ "epoch": 1.7793594306049823,
380
+ "grad_norm": 0.03026748262345791,
381
+ "learning_rate": 9.244444444444445e-06,
382
+ "loss": 0.1064,
383
+ "step": 500
384
+ },
385
+ {
386
+ "epoch": 1.814946619217082,
387
+ "grad_norm": 0.4413605332374573,
388
+ "learning_rate": 9.096296296296298e-06,
389
+ "loss": 0.1794,
390
+ "step": 510
391
+ },
392
+ {
393
+ "epoch": 1.8505338078291815,
394
+ "grad_norm": 0.08637289702892303,
395
+ "learning_rate": 8.948148148148149e-06,
396
+ "loss": 0.0915,
397
+ "step": 520
398
+ },
399
+ {
400
+ "epoch": 1.886120996441281,
401
+ "grad_norm": 0.09529650211334229,
402
+ "learning_rate": 8.8e-06,
403
+ "loss": 0.052,
404
+ "step": 530
405
+ },
406
+ {
407
+ "epoch": 1.9217081850533808,
408
+ "grad_norm": 107.46017456054688,
409
+ "learning_rate": 8.651851851851852e-06,
410
+ "loss": 0.1408,
411
+ "step": 540
412
+ },
413
+ {
414
+ "epoch": 1.9572953736654806,
415
+ "grad_norm": 26.60268211364746,
416
+ "learning_rate": 8.503703703703705e-06,
417
+ "loss": 0.1605,
418
+ "step": 550
419
+ },
420
+ {
421
+ "epoch": 1.99288256227758,
422
+ "grad_norm": 0.018591415137052536,
423
+ "learning_rate": 8.355555555555556e-06,
424
+ "loss": 0.0914,
425
+ "step": 560
426
+ },
427
+ {
428
+ "epoch": 2.0,
429
+ "eval_accuracy": 0.9732977303070761,
430
+ "eval_f1_per_label": [
431
+ 0.9541284403669725,
432
+ 0.9811676082862524
433
+ ],
434
+ "eval_f1_weighted": 0.9729728270877522,
435
+ "eval_loss": 0.1353287547826767,
436
+ "eval_precision_per_label": [
437
+ 0.9952153110047847,
438
+ 0.9648148148148148
439
+ ],
440
+ "eval_precision_weighted": 0.9740283163303329,
441
+ "eval_recall_per_label": [
442
+ 0.9162995594713657,
443
+ 0.9980842911877394
444
+ ],
445
+ "eval_recall_weighted": 0.9732977303070761,
446
+ "eval_runtime": 1.309,
447
+ "eval_samples_per_second": 572.195,
448
+ "eval_steps_per_second": 71.811,
449
+ "step": 562
450
+ },
451
+ {
452
+ "epoch": 2.0284697508896796,
453
+ "grad_norm": 0.015944845974445343,
454
+ "learning_rate": 8.207407407407409e-06,
455
+ "loss": 0.0011,
456
+ "step": 570
457
+ },
458
+ {
459
+ "epoch": 2.0640569395017794,
460
+ "grad_norm": 0.03662079572677612,
461
+ "learning_rate": 8.05925925925926e-06,
462
+ "loss": 0.0475,
463
+ "step": 580
464
+ },
465
+ {
466
+ "epoch": 2.099644128113879,
467
+ "grad_norm": 0.03478744253516197,
468
+ "learning_rate": 7.911111111111112e-06,
469
+ "loss": 0.0017,
470
+ "step": 590
471
+ },
472
+ {
473
+ "epoch": 2.135231316725979,
474
+ "grad_norm": 0.03390470892190933,
475
+ "learning_rate": 7.762962962962963e-06,
476
+ "loss": 0.0054,
477
+ "step": 600
478
+ },
479
+ {
480
+ "epoch": 2.170818505338078,
481
+ "grad_norm": 0.007953139953315258,
482
+ "learning_rate": 7.614814814814816e-06,
483
+ "loss": 0.0096,
484
+ "step": 610
485
+ },
486
+ {
487
+ "epoch": 2.206405693950178,
488
+ "grad_norm": 45.853023529052734,
489
+ "learning_rate": 7.4666666666666675e-06,
490
+ "loss": 0.0893,
491
+ "step": 620
492
+ },
493
+ {
494
+ "epoch": 2.2419928825622777,
495
+ "grad_norm": 0.03887489438056946,
496
+ "learning_rate": 7.31851851851852e-06,
497
+ "loss": 0.0004,
498
+ "step": 630
499
+ },
500
+ {
501
+ "epoch": 2.277580071174377,
502
+ "grad_norm": 0.04745912551879883,
503
+ "learning_rate": 7.170370370370371e-06,
504
+ "loss": 0.1963,
505
+ "step": 640
506
+ },
507
+ {
508
+ "epoch": 2.3131672597864767,
509
+ "grad_norm": 0.05072787404060364,
510
+ "learning_rate": 7.022222222222222e-06,
511
+ "loss": 0.0231,
512
+ "step": 650
513
+ },
514
+ {
515
+ "epoch": 2.3487544483985765,
516
+ "grad_norm": 0.026582635939121246,
517
+ "learning_rate": 6.8740740740740745e-06,
518
+ "loss": 0.0149,
519
+ "step": 660
520
+ },
521
+ {
522
+ "epoch": 2.3843416370106763,
523
+ "grad_norm": 0.014482633210718632,
524
+ "learning_rate": 6.725925925925927e-06,
525
+ "loss": 0.0994,
526
+ "step": 670
527
+ },
528
+ {
529
+ "epoch": 2.419928825622776,
530
+ "grad_norm": 0.025208059698343277,
531
+ "learning_rate": 6.577777777777779e-06,
532
+ "loss": 0.0006,
533
+ "step": 680
534
+ },
535
+ {
536
+ "epoch": 2.4555160142348753,
537
+ "grad_norm": 0.01709064655005932,
538
+ "learning_rate": 6.42962962962963e-06,
539
+ "loss": 0.1219,
540
+ "step": 690
541
+ },
542
+ {
543
+ "epoch": 2.491103202846975,
544
+ "grad_norm": 0.1339588463306427,
545
+ "learning_rate": 6.2814814814814814e-06,
546
+ "loss": 0.0006,
547
+ "step": 700
548
+ },
549
+ {
550
+ "epoch": 2.526690391459075,
551
+ "grad_norm": 24.798721313476562,
552
+ "learning_rate": 6.133333333333334e-06,
553
+ "loss": 0.0029,
554
+ "step": 710
555
+ },
556
+ {
557
+ "epoch": 2.562277580071174,
558
+ "grad_norm": 1.3540862798690796,
559
+ "learning_rate": 5.985185185185186e-06,
560
+ "loss": 0.081,
561
+ "step": 720
562
+ },
563
+ {
564
+ "epoch": 2.597864768683274,
565
+ "grad_norm": 0.04109741002321243,
566
+ "learning_rate": 5.837037037037038e-06,
567
+ "loss": 0.0006,
568
+ "step": 730
569
+ },
570
+ {
571
+ "epoch": 2.6334519572953736,
572
+ "grad_norm": 0.010597619228065014,
573
+ "learning_rate": 5.688888888888889e-06,
574
+ "loss": 0.013,
575
+ "step": 740
576
+ },
577
+ {
578
+ "epoch": 2.6690391459074734,
579
+ "grad_norm": 0.02383551187813282,
580
+ "learning_rate": 5.540740740740741e-06,
581
+ "loss": 0.0008,
582
+ "step": 750
583
+ },
584
+ {
585
+ "epoch": 2.704626334519573,
586
+ "grad_norm": 1.2794164419174194,
587
+ "learning_rate": 5.392592592592593e-06,
588
+ "loss": 0.1707,
589
+ "step": 760
590
+ },
591
+ {
592
+ "epoch": 2.7402135231316724,
593
+ "grad_norm": 0.014402506873011589,
594
+ "learning_rate": 5.244444444444445e-06,
595
+ "loss": 0.0864,
596
+ "step": 770
597
+ },
598
+ {
599
+ "epoch": 2.775800711743772,
600
+ "grad_norm": 0.013125807978212833,
601
+ "learning_rate": 5.096296296296297e-06,
602
+ "loss": 0.0205,
603
+ "step": 780
604
+ },
605
+ {
606
+ "epoch": 2.811387900355872,
607
+ "grad_norm": 0.2091672271490097,
608
+ "learning_rate": 4.9481481481481485e-06,
609
+ "loss": 0.0027,
610
+ "step": 790
611
+ },
612
+ {
613
+ "epoch": 2.8469750889679717,
614
+ "grad_norm": 0.13218224048614502,
615
+ "learning_rate": 4.800000000000001e-06,
616
+ "loss": 0.0004,
617
+ "step": 800
618
+ },
619
+ {
620
+ "epoch": 2.882562277580071,
621
+ "grad_norm": 2.1445350646972656,
622
+ "learning_rate": 4.651851851851853e-06,
623
+ "loss": 0.1045,
624
+ "step": 810
625
+ },
626
+ {
627
+ "epoch": 2.9181494661921707,
628
+ "grad_norm": 0.016628708690404892,
629
+ "learning_rate": 4.503703703703704e-06,
630
+ "loss": 0.0005,
631
+ "step": 820
632
+ },
633
+ {
634
+ "epoch": 2.9537366548042705,
635
+ "grad_norm": 0.04098201170563698,
636
+ "learning_rate": 4.3555555555555555e-06,
637
+ "loss": 0.0569,
638
+ "step": 830
639
+ },
640
+ {
641
+ "epoch": 2.9893238434163703,
642
+ "grad_norm": 0.7140023708343506,
643
+ "learning_rate": 4.207407407407408e-06,
644
+ "loss": 0.0014,
645
+ "step": 840
646
+ },
647
+ {
648
+ "epoch": 3.0,
649
+ "eval_accuracy": 0.9893190921228304,
650
+ "eval_f1_per_label": [
651
+ 0.9821428571428571,
652
+ 0.9923809523809524
653
+ ],
654
+ "eval_f1_weighted": 0.989278085065802,
655
+ "eval_loss": 0.050094157457351685,
656
+ "eval_precision_per_label": [
657
+ 0.995475113122172,
658
+ 0.9867424242424242
659
+ ],
660
+ "eval_precision_weighted": 0.9893890469069138,
661
+ "eval_recall_per_label": [
662
+ 0.9691629955947136,
663
+ 0.9980842911877394
664
+ ],
665
+ "eval_recall_weighted": 0.9893190921228304,
666
+ "eval_runtime": 1.3422,
667
+ "eval_samples_per_second": 558.027,
668
+ "eval_steps_per_second": 70.033,
669
+ "step": 843
670
+ },
671
+ {
672
+ "epoch": 3.0249110320284696,
673
+ "grad_norm": 0.009055254980921745,
674
+ "learning_rate": 4.05925925925926e-06,
675
+ "loss": 0.124,
676
+ "step": 850
677
+ },
678
+ {
679
+ "epoch": 3.0604982206405693,
680
+ "grad_norm": 0.005354477558284998,
681
+ "learning_rate": 3.911111111111112e-06,
682
+ "loss": 0.0008,
683
+ "step": 860
684
+ },
685
+ {
686
+ "epoch": 3.096085409252669,
687
+ "grad_norm": 0.006784161552786827,
688
+ "learning_rate": 3.7629629629629633e-06,
689
+ "loss": 0.0736,
690
+ "step": 870
691
+ },
692
+ {
693
+ "epoch": 3.131672597864769,
694
+ "grad_norm": 0.007448482792824507,
695
+ "learning_rate": 3.614814814814815e-06,
696
+ "loss": 0.0047,
697
+ "step": 880
698
+ },
699
+ {
700
+ "epoch": 3.167259786476868,
701
+ "grad_norm": 0.0065169306471943855,
702
+ "learning_rate": 3.4666666666666672e-06,
703
+ "loss": 0.0003,
704
+ "step": 890
705
+ },
706
+ {
707
+ "epoch": 3.202846975088968,
708
+ "grad_norm": 0.00644827401265502,
709
+ "learning_rate": 3.3185185185185185e-06,
710
+ "loss": 0.0007,
711
+ "step": 900
712
+ },
713
+ {
714
+ "epoch": 3.2384341637010676,
715
+ "grad_norm": 12.251897811889648,
716
+ "learning_rate": 3.1703703703703707e-06,
717
+ "loss": 0.0016,
718
+ "step": 910
719
+ },
720
+ {
721
+ "epoch": 3.2740213523131674,
722
+ "grad_norm": 0.010300640016794205,
723
+ "learning_rate": 3.0222222222222225e-06,
724
+ "loss": 0.0002,
725
+ "step": 920
726
+ },
727
+ {
728
+ "epoch": 3.309608540925267,
729
+ "grad_norm": 0.006541989278048277,
730
+ "learning_rate": 2.874074074074074e-06,
731
+ "loss": 0.0054,
732
+ "step": 930
733
+ },
734
+ {
735
+ "epoch": 3.3451957295373664,
736
+ "grad_norm": 0.08283556252717972,
737
+ "learning_rate": 2.7259259259259264e-06,
738
+ "loss": 0.0006,
739
+ "step": 940
740
+ },
741
+ {
742
+ "epoch": 3.380782918149466,
743
+ "grad_norm": 0.005921730771660805,
744
+ "learning_rate": 2.577777777777778e-06,
745
+ "loss": 0.0003,
746
+ "step": 950
747
+ },
748
+ {
749
+ "epoch": 3.416370106761566,
750
+ "grad_norm": 0.004639245569705963,
751
+ "learning_rate": 2.42962962962963e-06,
752
+ "loss": 0.0003,
753
+ "step": 960
754
+ },
755
+ {
756
+ "epoch": 3.4519572953736652,
757
+ "grad_norm": 0.16998454928398132,
758
+ "learning_rate": 2.2814814814814816e-06,
759
+ "loss": 0.0007,
760
+ "step": 970
761
+ },
762
+ {
763
+ "epoch": 3.487544483985765,
764
+ "grad_norm": 0.005660816095769405,
765
+ "learning_rate": 2.133333333333334e-06,
766
+ "loss": 0.0241,
767
+ "step": 980
768
+ },
769
+ {
770
+ "epoch": 3.5231316725978647,
771
+ "grad_norm": 0.01572972722351551,
772
+ "learning_rate": 1.985185185185185e-06,
773
+ "loss": 0.0022,
774
+ "step": 990
775
+ },
776
+ {
777
+ "epoch": 3.5587188612099645,
778
+ "grad_norm": 0.01506053563207388,
779
+ "learning_rate": 1.837037037037037e-06,
780
+ "loss": 0.0017,
781
+ "step": 1000
782
+ },
783
+ {
784
+ "epoch": 3.5943060498220643,
785
+ "grad_norm": 0.06420188397169113,
786
+ "learning_rate": 1.688888888888889e-06,
787
+ "loss": 0.0981,
788
+ "step": 1010
789
+ },
790
+ {
791
+ "epoch": 3.6298932384341636,
792
+ "grad_norm": 0.050418056547641754,
793
+ "learning_rate": 1.540740740740741e-06,
794
+ "loss": 0.0002,
795
+ "step": 1020
796
+ },
797
+ {
798
+ "epoch": 3.6654804270462633,
799
+ "grad_norm": 0.1331329494714737,
800
+ "learning_rate": 1.3925925925925925e-06,
801
+ "loss": 0.0002,
802
+ "step": 1030
803
+ },
804
+ {
805
+ "epoch": 3.701067615658363,
806
+ "grad_norm": 0.0047861747443675995,
807
+ "learning_rate": 1.2444444444444445e-06,
808
+ "loss": 0.0007,
809
+ "step": 1040
810
+ },
811
+ {
812
+ "epoch": 3.7366548042704624,
813
+ "grad_norm": 0.004302954766899347,
814
+ "learning_rate": 1.0962962962962965e-06,
815
+ "loss": 0.0002,
816
+ "step": 1050
817
+ },
818
+ {
819
+ "epoch": 3.772241992882562,
820
+ "grad_norm": 0.0052270120941102505,
821
+ "learning_rate": 9.481481481481482e-07,
822
+ "loss": 0.0003,
823
+ "step": 1060
824
+ },
825
+ {
826
+ "epoch": 3.807829181494662,
827
+ "grad_norm": 0.006781002506613731,
828
+ "learning_rate": 8.000000000000001e-07,
829
+ "loss": 0.0003,
830
+ "step": 1070
831
+ },
832
+ {
833
+ "epoch": 3.8434163701067616,
834
+ "grad_norm": 0.006440193857997656,
835
+ "learning_rate": 6.518518518518518e-07,
836
+ "loss": 0.0003,
837
+ "step": 1080
838
+ },
839
+ {
840
+ "epoch": 3.8790035587188614,
841
+ "grad_norm": 0.009248029440641403,
842
+ "learning_rate": 5.037037037037038e-07,
843
+ "loss": 0.0061,
844
+ "step": 1090
845
+ },
846
+ {
847
+ "epoch": 3.914590747330961,
848
+ "grad_norm": 0.011829700320959091,
849
+ "learning_rate": 3.555555555555556e-07,
850
+ "loss": 0.0003,
851
+ "step": 1100
852
+ },
853
+ {
854
+ "epoch": 3.9501779359430604,
855
+ "grad_norm": 0.024425974115729332,
856
+ "learning_rate": 2.074074074074074e-07,
857
+ "loss": 0.0676,
858
+ "step": 1110
859
+ },
860
+ {
861
+ "epoch": 3.98576512455516,
862
+ "grad_norm": 0.00512115890160203,
863
+ "learning_rate": 5.9259259259259263e-08,
864
+ "loss": 0.0002,
865
+ "step": 1120
866
+ },
867
+ {
868
+ "epoch": 4.0,
869
+ "eval_accuracy": 0.9933244325767691,
870
+ "eval_f1_per_label": [
871
+ 0.9888641425389755,
872
+ 0.9952335557673975
873
+ ],
874
+ "eval_f1_weighted": 0.9933031728530427,
875
+ "eval_loss": 0.03607647866010666,
876
+ "eval_precision_per_label": [
877
+ 1.0,
878
+ 0.9905123339658444
879
+ ],
880
+ "eval_precision_weighted": 0.9933877681310691,
881
+ "eval_recall_per_label": [
882
+ 0.9779735682819384,
883
+ 1.0
884
+ ],
885
+ "eval_recall_weighted": 0.9933244325767691,
886
+ "eval_runtime": 5.013,
887
+ "eval_samples_per_second": 149.411,
888
+ "eval_steps_per_second": 18.751,
889
+ "step": 1124
890
+ }
891
+ ],
892
+ "logging_steps": 10,
893
+ "max_steps": 1124,
894
+ "num_input_tokens_seen": 0,
895
+ "num_train_epochs": 4,
896
+ "save_steps": 500,
897
+ "stateful_callbacks": {
898
+ "EarlyStoppingCallback": {
899
+ "args": {
900
+ "early_stopping_patience": 3,
901
+ "early_stopping_threshold": 0.0
902
+ },
903
+ "attributes": {
904
+ "early_stopping_patience_counter": 0
905
+ }
906
+ },
907
+ "TrainerControl": {
908
+ "args": {
909
+ "should_epoch_stop": false,
910
+ "should_evaluate": false,
911
+ "should_log": false,
912
+ "should_save": true,
913
+ "should_training_stop": true
914
+ },
915
+ "attributes": {}
916
+ }
917
+ },
918
+ "total_flos": 87830786944920.0,
919
+ "train_batch_size": 8,
920
+ "trial_name": null,
921
+ "trial_params": null
922
+ }
checkpoint-1124/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b507d5295c042031ea8738de84bfa33f52abd9440ecb04319aeef48c60251cc
3
+ size 5368
checkpoint-1124/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-281/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.47.1",
25
+ "type_vocab_size": 2,
26
+ "use_cache": true,
27
+ "vocab_size": 31002
28
+ }
checkpoint-281/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdb993d49b41c4c1fcbbb39dbc455ee9d162c89a4c768e8a7a5ccae5440c764
3
+ size 439433208
checkpoint-281/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771a327041481181f4909d2f2cd4e95738535c6e48b1d36c9ad4850215e49913
3
+ size 878987514
checkpoint-281/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5a76b935800a43630c09c75898ccd728dcc94793a2964832f19ec46670522f
3
+ size 14244
checkpoint-281/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6cfc827ec3fd27bbfd0b5cb5e18a8d936dac58d028a6d7a8b604d7c97cc3381
3
+ size 1064
checkpoint-281/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-281/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-281/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-281/trainer_state.json ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.17148031294345856,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/HateSpeech-BETO-cased/checkpoint-281",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 281,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03558718861209965,
13
+ "grad_norm": 7.08419132232666,
14
+ "learning_rate": 2.2271714922049e-07,
15
+ "loss": 0.4085,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0711743772241993,
20
+ "grad_norm": 23.322711944580078,
21
+ "learning_rate": 4.4543429844098e-07,
22
+ "loss": 0.3565,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10676156583629894,
27
+ "grad_norm": 8.696600914001465,
28
+ "learning_rate": 6.6815144766147e-07,
29
+ "loss": 0.3482,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.1423487544483986,
34
+ "grad_norm": 14.429851531982422,
35
+ "learning_rate": 8.9086859688196e-07,
36
+ "loss": 0.3711,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17793594306049823,
41
+ "grad_norm": 9.41112995147705,
42
+ "learning_rate": 1.11358574610245e-06,
43
+ "loss": 0.337,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.21352313167259787,
48
+ "grad_norm": 12.487126350402832,
49
+ "learning_rate": 1.33630289532294e-06,
50
+ "loss": 0.3413,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.2491103202846975,
55
+ "grad_norm": 17.9010009765625,
56
+ "learning_rate": 1.55902004454343e-06,
57
+ "loss": 0.3154,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2846975088967972,
62
+ "grad_norm": 17.09844207763672,
63
+ "learning_rate": 1.78173719376392e-06,
64
+ "loss": 0.2304,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3202846975088968,
69
+ "grad_norm": 19.929523468017578,
70
+ "learning_rate": 2.00445434298441e-06,
71
+ "loss": 0.308,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.35587188612099646,
76
+ "grad_norm": 5.709442615509033,
77
+ "learning_rate": 2.2271714922049e-06,
78
+ "loss": 0.2692,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3914590747330961,
83
+ "grad_norm": 19.828474044799805,
84
+ "learning_rate": 2.44988864142539e-06,
85
+ "loss": 0.2255,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.42704626334519574,
90
+ "grad_norm": 17.53233528137207,
91
+ "learning_rate": 2.67260579064588e-06,
92
+ "loss": 0.1917,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4626334519572954,
97
+ "grad_norm": 14.940444946289062,
98
+ "learning_rate": 2.8953229398663702e-06,
99
+ "loss": 0.3019,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.498220640569395,
104
+ "grad_norm": 1.711176872253418,
105
+ "learning_rate": 3.11804008908686e-06,
106
+ "loss": 0.2032,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5338078291814946,
111
+ "grad_norm": 7.716080665588379,
112
+ "learning_rate": 3.34075723830735e-06,
113
+ "loss": 0.2764,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.5693950177935944,
118
+ "grad_norm": 20.839887619018555,
119
+ "learning_rate": 3.56347438752784e-06,
120
+ "loss": 0.2067,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.604982206405694,
125
+ "grad_norm": 2.853170156478882,
126
+ "learning_rate": 3.78619153674833e-06,
127
+ "loss": 0.243,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6405693950177936,
132
+ "grad_norm": 6.61458683013916,
133
+ "learning_rate": 4.00890868596882e-06,
134
+ "loss": 0.1886,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6761565836298933,
139
+ "grad_norm": 3.499279022216797,
140
+ "learning_rate": 4.231625835189309e-06,
141
+ "loss": 0.1987,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.7117437722419929,
146
+ "grad_norm": 32.111114501953125,
147
+ "learning_rate": 4.4543429844098e-06,
148
+ "loss": 0.2239,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7473309608540926,
153
+ "grad_norm": 3.83589506149292,
154
+ "learning_rate": 4.67706013363029e-06,
155
+ "loss": 0.1891,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7829181494661922,
160
+ "grad_norm": 3.3233108520507812,
161
+ "learning_rate": 4.89977728285078e-06,
162
+ "loss": 0.1335,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.8185053380782918,
167
+ "grad_norm": 19.36582374572754,
168
+ "learning_rate": 5.12249443207127e-06,
169
+ "loss": 0.0513,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.8540925266903915,
174
+ "grad_norm": 0.7740408182144165,
175
+ "learning_rate": 5.34521158129176e-06,
176
+ "loss": 0.1499,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8896797153024911,
181
+ "grad_norm": 0.18774119019508362,
182
+ "learning_rate": 5.5679287305122494e-06,
183
+ "loss": 0.1303,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.9252669039145908,
188
+ "grad_norm": 0.2991533875465393,
189
+ "learning_rate": 5.7906458797327404e-06,
190
+ "loss": 0.0695,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9608540925266904,
195
+ "grad_norm": 0.08448722958564758,
196
+ "learning_rate": 6.01336302895323e-06,
197
+ "loss": 0.1109,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.99644128113879,
202
+ "grad_norm": 30.85651397705078,
203
+ "learning_rate": 6.23608017817372e-06,
204
+ "loss": 0.2155,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 1.0,
209
+ "eval_accuracy": 0.9559412550066756,
210
+ "eval_f1_per_label": [
211
+ 0.9255079006772009,
212
+ 0.9687203791469194
213
+ ],
214
+ "eval_f1_weighted": 0.9556239404117711,
215
+ "eval_loss": 0.17148031294345856,
216
+ "eval_precision_per_label": [
217
+ 0.9490740740740741,
218
+ 0.9587242026266416
219
+ ],
220
+ "eval_precision_weighted": 0.9557995308223255,
221
+ "eval_recall_per_label": [
222
+ 0.9030837004405287,
223
+ 0.9789272030651341
224
+ ],
225
+ "eval_recall_weighted": 0.9559412550066756,
226
+ "eval_runtime": 1.3508,
227
+ "eval_samples_per_second": 554.494,
228
+ "eval_steps_per_second": 69.589,
229
+ "step": 281
230
+ }
231
+ ],
232
+ "logging_steps": 10,
233
+ "max_steps": 1124,
234
+ "num_input_tokens_seen": 0,
235
+ "num_train_epochs": 4,
236
+ "save_steps": 500,
237
+ "stateful_callbacks": {
238
+ "EarlyStoppingCallback": {
239
+ "args": {
240
+ "early_stopping_patience": 3,
241
+ "early_stopping_threshold": 0.0
242
+ },
243
+ "attributes": {
244
+ "early_stopping_patience_counter": 0
245
+ }
246
+ },
247
+ "TrainerControl": {
248
+ "args": {
249
+ "should_epoch_stop": false,
250
+ "should_evaluate": false,
251
+ "should_log": false,
252
+ "should_save": true,
253
+ "should_training_stop": false
254
+ },
255
+ "attributes": {}
256
+ }
257
+ },
258
+ "total_flos": 21962578679640.0,
259
+ "train_batch_size": 8,
260
+ "trial_name": null,
261
+ "trial_params": null
262
+ }
checkpoint-281/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b507d5295c042031ea8738de84bfa33f52abd9440ecb04319aeef48c60251cc
3
+ size 5368
checkpoint-281/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-562/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.47.1",
25
+ "type_vocab_size": 2,
26
+ "use_cache": true,
27
+ "vocab_size": 31002
28
+ }
checkpoint-562/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b19f9c00971c19b2edf8e1d8be4d704e00d162677a0ad1b622d5e94f0f6b8ce5
3
+ size 439433208
checkpoint-562/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d2e4a01719d785dc02d76374f1a32373d4892ed92e45cb81ba5f0507333a0c
3
+ size 878987514
checkpoint-562/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4320cf79393d41a5c3747b22ed1eb9ca5f29d5f3ce7110a2a19afe7013a6e8c
3
+ size 14244
checkpoint-562/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b061e09194d1c509c3f2204af9e5d068a8b8fffa0a8816fafb4a31f7f39f8e56
3
+ size 1064
checkpoint-562/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-562/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-562/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-562/trainer_state.json ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1353287547826767,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/HateSpeech-BETO-cased/checkpoint-562",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 562,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03558718861209965,
13
+ "grad_norm": 7.08419132232666,
14
+ "learning_rate": 2.2271714922049e-07,
15
+ "loss": 0.4085,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0711743772241993,
20
+ "grad_norm": 23.322711944580078,
21
+ "learning_rate": 4.4543429844098e-07,
22
+ "loss": 0.3565,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10676156583629894,
27
+ "grad_norm": 8.696600914001465,
28
+ "learning_rate": 6.6815144766147e-07,
29
+ "loss": 0.3482,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.1423487544483986,
34
+ "grad_norm": 14.429851531982422,
35
+ "learning_rate": 8.9086859688196e-07,
36
+ "loss": 0.3711,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17793594306049823,
41
+ "grad_norm": 9.41112995147705,
42
+ "learning_rate": 1.11358574610245e-06,
43
+ "loss": 0.337,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.21352313167259787,
48
+ "grad_norm": 12.487126350402832,
49
+ "learning_rate": 1.33630289532294e-06,
50
+ "loss": 0.3413,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.2491103202846975,
55
+ "grad_norm": 17.9010009765625,
56
+ "learning_rate": 1.55902004454343e-06,
57
+ "loss": 0.3154,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2846975088967972,
62
+ "grad_norm": 17.09844207763672,
63
+ "learning_rate": 1.78173719376392e-06,
64
+ "loss": 0.2304,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3202846975088968,
69
+ "grad_norm": 19.929523468017578,
70
+ "learning_rate": 2.00445434298441e-06,
71
+ "loss": 0.308,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.35587188612099646,
76
+ "grad_norm": 5.709442615509033,
77
+ "learning_rate": 2.2271714922049e-06,
78
+ "loss": 0.2692,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3914590747330961,
83
+ "grad_norm": 19.828474044799805,
84
+ "learning_rate": 2.44988864142539e-06,
85
+ "loss": 0.2255,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.42704626334519574,
90
+ "grad_norm": 17.53233528137207,
91
+ "learning_rate": 2.67260579064588e-06,
92
+ "loss": 0.1917,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4626334519572954,
97
+ "grad_norm": 14.940444946289062,
98
+ "learning_rate": 2.8953229398663702e-06,
99
+ "loss": 0.3019,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.498220640569395,
104
+ "grad_norm": 1.711176872253418,
105
+ "learning_rate": 3.11804008908686e-06,
106
+ "loss": 0.2032,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5338078291814946,
111
+ "grad_norm": 7.716080665588379,
112
+ "learning_rate": 3.34075723830735e-06,
113
+ "loss": 0.2764,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.5693950177935944,
118
+ "grad_norm": 20.839887619018555,
119
+ "learning_rate": 3.56347438752784e-06,
120
+ "loss": 0.2067,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.604982206405694,
125
+ "grad_norm": 2.853170156478882,
126
+ "learning_rate": 3.78619153674833e-06,
127
+ "loss": 0.243,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6405693950177936,
132
+ "grad_norm": 6.61458683013916,
133
+ "learning_rate": 4.00890868596882e-06,
134
+ "loss": 0.1886,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6761565836298933,
139
+ "grad_norm": 3.499279022216797,
140
+ "learning_rate": 4.231625835189309e-06,
141
+ "loss": 0.1987,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.7117437722419929,
146
+ "grad_norm": 32.111114501953125,
147
+ "learning_rate": 4.4543429844098e-06,
148
+ "loss": 0.2239,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7473309608540926,
153
+ "grad_norm": 3.83589506149292,
154
+ "learning_rate": 4.67706013363029e-06,
155
+ "loss": 0.1891,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7829181494661922,
160
+ "grad_norm": 3.3233108520507812,
161
+ "learning_rate": 4.89977728285078e-06,
162
+ "loss": 0.1335,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.8185053380782918,
167
+ "grad_norm": 19.36582374572754,
168
+ "learning_rate": 5.12249443207127e-06,
169
+ "loss": 0.0513,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.8540925266903915,
174
+ "grad_norm": 0.7740408182144165,
175
+ "learning_rate": 5.34521158129176e-06,
176
+ "loss": 0.1499,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8896797153024911,
181
+ "grad_norm": 0.18774119019508362,
182
+ "learning_rate": 5.5679287305122494e-06,
183
+ "loss": 0.1303,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.9252669039145908,
188
+ "grad_norm": 0.2991533875465393,
189
+ "learning_rate": 5.7906458797327404e-06,
190
+ "loss": 0.0695,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9608540925266904,
195
+ "grad_norm": 0.08448722958564758,
196
+ "learning_rate": 6.01336302895323e-06,
197
+ "loss": 0.1109,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.99644128113879,
202
+ "grad_norm": 30.85651397705078,
203
+ "learning_rate": 6.23608017817372e-06,
204
+ "loss": 0.2155,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 1.0,
209
+ "eval_accuracy": 0.9559412550066756,
210
+ "eval_f1_per_label": [
211
+ 0.9255079006772009,
212
+ 0.9687203791469194
213
+ ],
214
+ "eval_f1_weighted": 0.9556239404117711,
215
+ "eval_loss": 0.17148031294345856,
216
+ "eval_precision_per_label": [
217
+ 0.9490740740740741,
218
+ 0.9587242026266416
219
+ ],
220
+ "eval_precision_weighted": 0.9557995308223255,
221
+ "eval_recall_per_label": [
222
+ 0.9030837004405287,
223
+ 0.9789272030651341
224
+ ],
225
+ "eval_recall_weighted": 0.9559412550066756,
226
+ "eval_runtime": 1.3508,
227
+ "eval_samples_per_second": 554.494,
228
+ "eval_steps_per_second": 69.589,
229
+ "step": 281
230
+ },
231
+ {
232
+ "epoch": 1.0320284697508897,
233
+ "grad_norm": 37.36682891845703,
234
+ "learning_rate": 6.458797327394209e-06,
235
+ "loss": 0.2318,
236
+ "step": 290
237
+ },
238
+ {
239
+ "epoch": 1.0676156583629894,
240
+ "grad_norm": 3.181389093399048,
241
+ "learning_rate": 6.6815144766147e-06,
242
+ "loss": 0.29,
243
+ "step": 300
244
+ },
245
+ {
246
+ "epoch": 1.103202846975089,
247
+ "grad_norm": 0.6792677640914917,
248
+ "learning_rate": 6.9042316258351895e-06,
249
+ "loss": 0.0621,
250
+ "step": 310
251
+ },
252
+ {
253
+ "epoch": 1.1387900355871885,
254
+ "grad_norm": 31.2163143157959,
255
+ "learning_rate": 7.12694877505568e-06,
256
+ "loss": 0.3279,
257
+ "step": 320
258
+ },
259
+ {
260
+ "epoch": 1.1743772241992882,
261
+ "grad_norm": 32.702980041503906,
262
+ "learning_rate": 7.349665924276171e-06,
263
+ "loss": 0.0851,
264
+ "step": 330
265
+ },
266
+ {
267
+ "epoch": 1.209964412811388,
268
+ "grad_norm": 76.18331146240234,
269
+ "learning_rate": 7.57238307349666e-06,
270
+ "loss": 0.3339,
271
+ "step": 340
272
+ },
273
+ {
274
+ "epoch": 1.2455516014234875,
275
+ "grad_norm": 76.69121551513672,
276
+ "learning_rate": 7.79510022271715e-06,
277
+ "loss": 0.0757,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 1.281138790035587,
282
+ "grad_norm": 0.06500601023435593,
283
+ "learning_rate": 8.01781737193764e-06,
284
+ "loss": 0.188,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 1.3167259786476868,
289
+ "grad_norm": 95.82825469970703,
290
+ "learning_rate": 8.24053452115813e-06,
291
+ "loss": 0.2278,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 1.3523131672597866,
296
+ "grad_norm": 49.43801498413086,
297
+ "learning_rate": 8.463251670378619e-06,
298
+ "loss": 0.2501,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 1.387900355871886,
303
+ "grad_norm": 40.980369567871094,
304
+ "learning_rate": 8.68596881959911e-06,
305
+ "loss": 0.1424,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 1.4234875444839858,
310
+ "grad_norm": 1.2029680013656616,
311
+ "learning_rate": 8.9086859688196e-06,
312
+ "loss": 0.1464,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 1.4590747330960854,
317
+ "grad_norm": 26.718364715576172,
318
+ "learning_rate": 9.13140311804009e-06,
319
+ "loss": 0.2213,
320
+ "step": 410
321
+ },
322
+ {
323
+ "epoch": 1.4946619217081851,
324
+ "grad_norm": 29.254457473754883,
325
+ "learning_rate": 9.35412026726058e-06,
326
+ "loss": 0.1796,
327
+ "step": 420
328
+ },
329
+ {
330
+ "epoch": 1.5302491103202847,
331
+ "grad_norm": 0.17349238693714142,
332
+ "learning_rate": 9.57683741648107e-06,
333
+ "loss": 0.1021,
334
+ "step": 430
335
+ },
336
+ {
337
+ "epoch": 1.5658362989323842,
338
+ "grad_norm": 0.27866968512535095,
339
+ "learning_rate": 9.79955456570156e-06,
340
+ "loss": 0.2656,
341
+ "step": 440
342
+ },
343
+ {
344
+ "epoch": 1.601423487544484,
345
+ "grad_norm": 0.02428502030670643,
346
+ "learning_rate": 9.985185185185185e-06,
347
+ "loss": 0.0504,
348
+ "step": 450
349
+ },
350
+ {
351
+ "epoch": 1.6370106761565837,
352
+ "grad_norm": 0.0974503755569458,
353
+ "learning_rate": 9.837037037037038e-06,
354
+ "loss": 0.062,
355
+ "step": 460
356
+ },
357
+ {
358
+ "epoch": 1.6725978647686834,
359
+ "grad_norm": 1.3685508966445923,
360
+ "learning_rate": 9.688888888888889e-06,
361
+ "loss": 0.0459,
362
+ "step": 470
363
+ },
364
+ {
365
+ "epoch": 1.708185053380783,
366
+ "grad_norm": 1.2251461744308472,
367
+ "learning_rate": 9.540740740740742e-06,
368
+ "loss": 0.1173,
369
+ "step": 480
370
+ },
371
+ {
372
+ "epoch": 1.7437722419928825,
373
+ "grad_norm": 62.15313720703125,
374
+ "learning_rate": 9.392592592592593e-06,
375
+ "loss": 0.1621,
376
+ "step": 490
377
+ },
378
+ {
379
+ "epoch": 1.7793594306049823,
380
+ "grad_norm": 0.03026748262345791,
381
+ "learning_rate": 9.244444444444445e-06,
382
+ "loss": 0.1064,
383
+ "step": 500
384
+ },
385
+ {
386
+ "epoch": 1.814946619217082,
387
+ "grad_norm": 0.4413605332374573,
388
+ "learning_rate": 9.096296296296298e-06,
389
+ "loss": 0.1794,
390
+ "step": 510
391
+ },
392
+ {
393
+ "epoch": 1.8505338078291815,
394
+ "grad_norm": 0.08637289702892303,
395
+ "learning_rate": 8.948148148148149e-06,
396
+ "loss": 0.0915,
397
+ "step": 520
398
+ },
399
+ {
400
+ "epoch": 1.886120996441281,
401
+ "grad_norm": 0.09529650211334229,
402
+ "learning_rate": 8.8e-06,
403
+ "loss": 0.052,
404
+ "step": 530
405
+ },
406
+ {
407
+ "epoch": 1.9217081850533808,
408
+ "grad_norm": 107.46017456054688,
409
+ "learning_rate": 8.651851851851852e-06,
410
+ "loss": 0.1408,
411
+ "step": 540
412
+ },
413
+ {
414
+ "epoch": 1.9572953736654806,
415
+ "grad_norm": 26.60268211364746,
416
+ "learning_rate": 8.503703703703705e-06,
417
+ "loss": 0.1605,
418
+ "step": 550
419
+ },
420
+ {
421
+ "epoch": 1.99288256227758,
422
+ "grad_norm": 0.018591415137052536,
423
+ "learning_rate": 8.355555555555556e-06,
424
+ "loss": 0.0914,
425
+ "step": 560
426
+ },
427
+ {
428
+ "epoch": 2.0,
429
+ "eval_accuracy": 0.9732977303070761,
430
+ "eval_f1_per_label": [
431
+ 0.9541284403669725,
432
+ 0.9811676082862524
433
+ ],
434
+ "eval_f1_weighted": 0.9729728270877522,
435
+ "eval_loss": 0.1353287547826767,
436
+ "eval_precision_per_label": [
437
+ 0.9952153110047847,
438
+ 0.9648148148148148
439
+ ],
440
+ "eval_precision_weighted": 0.9740283163303329,
441
+ "eval_recall_per_label": [
442
+ 0.9162995594713657,
443
+ 0.9980842911877394
444
+ ],
445
+ "eval_recall_weighted": 0.9732977303070761,
446
+ "eval_runtime": 1.309,
447
+ "eval_samples_per_second": 572.195,
448
+ "eval_steps_per_second": 71.811,
449
+ "step": 562
450
+ }
451
+ ],
452
+ "logging_steps": 10,
453
+ "max_steps": 1124,
454
+ "num_input_tokens_seen": 0,
455
+ "num_train_epochs": 4,
456
+ "save_steps": 500,
457
+ "stateful_callbacks": {
458
+ "EarlyStoppingCallback": {
459
+ "args": {
460
+ "early_stopping_patience": 3,
461
+ "early_stopping_threshold": 0.0
462
+ },
463
+ "attributes": {
464
+ "early_stopping_patience_counter": 0
465
+ }
466
+ },
467
+ "TrainerControl": {
468
+ "args": {
469
+ "should_epoch_stop": false,
470
+ "should_evaluate": false,
471
+ "should_log": false,
472
+ "should_save": true,
473
+ "should_training_stop": false
474
+ },
475
+ "attributes": {}
476
+ }
477
+ },
478
+ "total_flos": 43858865706660.0,
479
+ "train_batch_size": 8,
480
+ "trial_name": null,
481
+ "trial_params": null
482
+ }
checkpoint-562/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b507d5295c042031ea8738de84bfa33f52abd9440ecb04319aeef48c60251cc
3
+ size 5368
checkpoint-562/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-843/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.47.1",
25
+ "type_vocab_size": 2,
26
+ "use_cache": true,
27
+ "vocab_size": 31002
28
+ }
checkpoint-843/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0956304b7ff12d2dfee5adad24464b7ae977468deed34bd98521a2984d4f1735
3
+ size 439433208
checkpoint-843/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef750725ca960bf0392c7ac97b92dc6719a0ea1dabae966d2ea046a2c16a4774
3
+ size 878987514
checkpoint-843/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:899371b0d2e8ffb9842a0871af4aa027406ec3d0f4861c7f41740823936b6822
3
+ size 14244
checkpoint-843/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae2e18e385abc3535d6a780402aa1cd9a5a9884f425ffb90f1a8d74382d5897a
3
+ size 1064
checkpoint-843/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-843/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-843/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "3": {
20
+ "content": "[UNK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-843/trainer_state.json ADDED
@@ -0,0 +1,702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.050094157457351685,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/HateSpeech-BETO-cased/checkpoint-843",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 843,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03558718861209965,
13
+ "grad_norm": 7.08419132232666,
14
+ "learning_rate": 2.2271714922049e-07,
15
+ "loss": 0.4085,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0711743772241993,
20
+ "grad_norm": 23.322711944580078,
21
+ "learning_rate": 4.4543429844098e-07,
22
+ "loss": 0.3565,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.10676156583629894,
27
+ "grad_norm": 8.696600914001465,
28
+ "learning_rate": 6.6815144766147e-07,
29
+ "loss": 0.3482,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.1423487544483986,
34
+ "grad_norm": 14.429851531982422,
35
+ "learning_rate": 8.9086859688196e-07,
36
+ "loss": 0.3711,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.17793594306049823,
41
+ "grad_norm": 9.41112995147705,
42
+ "learning_rate": 1.11358574610245e-06,
43
+ "loss": 0.337,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.21352313167259787,
48
+ "grad_norm": 12.487126350402832,
49
+ "learning_rate": 1.33630289532294e-06,
50
+ "loss": 0.3413,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.2491103202846975,
55
+ "grad_norm": 17.9010009765625,
56
+ "learning_rate": 1.55902004454343e-06,
57
+ "loss": 0.3154,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2846975088967972,
62
+ "grad_norm": 17.09844207763672,
63
+ "learning_rate": 1.78173719376392e-06,
64
+ "loss": 0.2304,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3202846975088968,
69
+ "grad_norm": 19.929523468017578,
70
+ "learning_rate": 2.00445434298441e-06,
71
+ "loss": 0.308,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.35587188612099646,
76
+ "grad_norm": 5.709442615509033,
77
+ "learning_rate": 2.2271714922049e-06,
78
+ "loss": 0.2692,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3914590747330961,
83
+ "grad_norm": 19.828474044799805,
84
+ "learning_rate": 2.44988864142539e-06,
85
+ "loss": 0.2255,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.42704626334519574,
90
+ "grad_norm": 17.53233528137207,
91
+ "learning_rate": 2.67260579064588e-06,
92
+ "loss": 0.1917,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4626334519572954,
97
+ "grad_norm": 14.940444946289062,
98
+ "learning_rate": 2.8953229398663702e-06,
99
+ "loss": 0.3019,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.498220640569395,
104
+ "grad_norm": 1.711176872253418,
105
+ "learning_rate": 3.11804008908686e-06,
106
+ "loss": 0.2032,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5338078291814946,
111
+ "grad_norm": 7.716080665588379,
112
+ "learning_rate": 3.34075723830735e-06,
113
+ "loss": 0.2764,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.5693950177935944,
118
+ "grad_norm": 20.839887619018555,
119
+ "learning_rate": 3.56347438752784e-06,
120
+ "loss": 0.2067,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.604982206405694,
125
+ "grad_norm": 2.853170156478882,
126
+ "learning_rate": 3.78619153674833e-06,
127
+ "loss": 0.243,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6405693950177936,
132
+ "grad_norm": 6.61458683013916,
133
+ "learning_rate": 4.00890868596882e-06,
134
+ "loss": 0.1886,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6761565836298933,
139
+ "grad_norm": 3.499279022216797,
140
+ "learning_rate": 4.231625835189309e-06,
141
+ "loss": 0.1987,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.7117437722419929,
146
+ "grad_norm": 32.111114501953125,
147
+ "learning_rate": 4.4543429844098e-06,
148
+ "loss": 0.2239,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.7473309608540926,
153
+ "grad_norm": 3.83589506149292,
154
+ "learning_rate": 4.67706013363029e-06,
155
+ "loss": 0.1891,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7829181494661922,
160
+ "grad_norm": 3.3233108520507812,
161
+ "learning_rate": 4.89977728285078e-06,
162
+ "loss": 0.1335,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.8185053380782918,
167
+ "grad_norm": 19.36582374572754,
168
+ "learning_rate": 5.12249443207127e-06,
169
+ "loss": 0.0513,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.8540925266903915,
174
+ "grad_norm": 0.7740408182144165,
175
+ "learning_rate": 5.34521158129176e-06,
176
+ "loss": 0.1499,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8896797153024911,
181
+ "grad_norm": 0.18774119019508362,
182
+ "learning_rate": 5.5679287305122494e-06,
183
+ "loss": 0.1303,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.9252669039145908,
188
+ "grad_norm": 0.2991533875465393,
189
+ "learning_rate": 5.7906458797327404e-06,
190
+ "loss": 0.0695,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9608540925266904,
195
+ "grad_norm": 0.08448722958564758,
196
+ "learning_rate": 6.01336302895323e-06,
197
+ "loss": 0.1109,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.99644128113879,
202
+ "grad_norm": 30.85651397705078,
203
+ "learning_rate": 6.23608017817372e-06,
204
+ "loss": 0.2155,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 1.0,
209
+ "eval_accuracy": 0.9559412550066756,
210
+ "eval_f1_per_label": [
211
+ 0.9255079006772009,
212
+ 0.9687203791469194
213
+ ],
214
+ "eval_f1_weighted": 0.9556239404117711,
215
+ "eval_loss": 0.17148031294345856,
216
+ "eval_precision_per_label": [
217
+ 0.9490740740740741,
218
+ 0.9587242026266416
219
+ ],
220
+ "eval_precision_weighted": 0.9557995308223255,
221
+ "eval_recall_per_label": [
222
+ 0.9030837004405287,
223
+ 0.9789272030651341
224
+ ],
225
+ "eval_recall_weighted": 0.9559412550066756,
226
+ "eval_runtime": 1.3508,
227
+ "eval_samples_per_second": 554.494,
228
+ "eval_steps_per_second": 69.589,
229
+ "step": 281
230
+ },
231
+ {
232
+ "epoch": 1.0320284697508897,
233
+ "grad_norm": 37.36682891845703,
234
+ "learning_rate": 6.458797327394209e-06,
235
+ "loss": 0.2318,
236
+ "step": 290
237
+ },
238
+ {
239
+ "epoch": 1.0676156583629894,
240
+ "grad_norm": 3.181389093399048,
241
+ "learning_rate": 6.6815144766147e-06,
242
+ "loss": 0.29,
243
+ "step": 300
244
+ },
245
+ {
246
+ "epoch": 1.103202846975089,
247
+ "grad_norm": 0.6792677640914917,
248
+ "learning_rate": 6.9042316258351895e-06,
249
+ "loss": 0.0621,
250
+ "step": 310
251
+ },
252
+ {
253
+ "epoch": 1.1387900355871885,
254
+ "grad_norm": 31.2163143157959,
255
+ "learning_rate": 7.12694877505568e-06,
256
+ "loss": 0.3279,
257
+ "step": 320
258
+ },
259
+ {
260
+ "epoch": 1.1743772241992882,
261
+ "grad_norm": 32.702980041503906,
262
+ "learning_rate": 7.349665924276171e-06,
263
+ "loss": 0.0851,
264
+ "step": 330
265
+ },
266
+ {
267
+ "epoch": 1.209964412811388,
268
+ "grad_norm": 76.18331146240234,
269
+ "learning_rate": 7.57238307349666e-06,
270
+ "loss": 0.3339,
271
+ "step": 340
272
+ },
273
+ {
274
+ "epoch": 1.2455516014234875,
275
+ "grad_norm": 76.69121551513672,
276
+ "learning_rate": 7.79510022271715e-06,
277
+ "loss": 0.0757,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 1.281138790035587,
282
+ "grad_norm": 0.06500601023435593,
283
+ "learning_rate": 8.01781737193764e-06,
284
+ "loss": 0.188,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 1.3167259786476868,
289
+ "grad_norm": 95.82825469970703,
290
+ "learning_rate": 8.24053452115813e-06,
291
+ "loss": 0.2278,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 1.3523131672597866,
296
+ "grad_norm": 49.43801498413086,
297
+ "learning_rate": 8.463251670378619e-06,
298
+ "loss": 0.2501,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 1.387900355871886,
303
+ "grad_norm": 40.980369567871094,
304
+ "learning_rate": 8.68596881959911e-06,
305
+ "loss": 0.1424,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 1.4234875444839858,
310
+ "grad_norm": 1.2029680013656616,
311
+ "learning_rate": 8.9086859688196e-06,
312
+ "loss": 0.1464,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 1.4590747330960854,
317
+ "grad_norm": 26.718364715576172,
318
+ "learning_rate": 9.13140311804009e-06,
319
+ "loss": 0.2213,
320
+ "step": 410
321
+ },
322
+ {
323
+ "epoch": 1.4946619217081851,
324
+ "grad_norm": 29.254457473754883,
325
+ "learning_rate": 9.35412026726058e-06,
326
+ "loss": 0.1796,
327
+ "step": 420
328
+ },
329
+ {
330
+ "epoch": 1.5302491103202847,
331
+ "grad_norm": 0.17349238693714142,
332
+ "learning_rate": 9.57683741648107e-06,
333
+ "loss": 0.1021,
334
+ "step": 430
335
+ },
336
+ {
337
+ "epoch": 1.5658362989323842,
338
+ "grad_norm": 0.27866968512535095,
339
+ "learning_rate": 9.79955456570156e-06,
340
+ "loss": 0.2656,
341
+ "step": 440
342
+ },
343
+ {
344
+ "epoch": 1.601423487544484,
345
+ "grad_norm": 0.02428502030670643,
346
+ "learning_rate": 9.985185185185185e-06,
347
+ "loss": 0.0504,
348
+ "step": 450
349
+ },
350
+ {
351
+ "epoch": 1.6370106761565837,
352
+ "grad_norm": 0.0974503755569458,
353
+ "learning_rate": 9.837037037037038e-06,
354
+ "loss": 0.062,
355
+ "step": 460
356
+ },
357
+ {
358
+ "epoch": 1.6725978647686834,
359
+ "grad_norm": 1.3685508966445923,
360
+ "learning_rate": 9.688888888888889e-06,
361
+ "loss": 0.0459,
362
+ "step": 470
363
+ },
364
+ {
365
+ "epoch": 1.708185053380783,
366
+ "grad_norm": 1.2251461744308472,
367
+ "learning_rate": 9.540740740740742e-06,
368
+ "loss": 0.1173,
369
+ "step": 480
370
+ },
371
+ {
372
+ "epoch": 1.7437722419928825,
373
+ "grad_norm": 62.15313720703125,
374
+ "learning_rate": 9.392592592592593e-06,
375
+ "loss": 0.1621,
376
+ "step": 490
377
+ },
378
+ {
379
+ "epoch": 1.7793594306049823,
380
+ "grad_norm": 0.03026748262345791,
381
+ "learning_rate": 9.244444444444445e-06,
382
+ "loss": 0.1064,
383
+ "step": 500
384
+ },
385
+ {
386
+ "epoch": 1.814946619217082,
387
+ "grad_norm": 0.4413605332374573,
388
+ "learning_rate": 9.096296296296298e-06,
389
+ "loss": 0.1794,
390
+ "step": 510
391
+ },
392
+ {
393
+ "epoch": 1.8505338078291815,
394
+ "grad_norm": 0.08637289702892303,
395
+ "learning_rate": 8.948148148148149e-06,
396
+ "loss": 0.0915,
397
+ "step": 520
398
+ },
399
+ {
400
+ "epoch": 1.886120996441281,
401
+ "grad_norm": 0.09529650211334229,
402
+ "learning_rate": 8.8e-06,
403
+ "loss": 0.052,
404
+ "step": 530
405
+ },
406
+ {
407
+ "epoch": 1.9217081850533808,
408
+ "grad_norm": 107.46017456054688,
409
+ "learning_rate": 8.651851851851852e-06,
410
+ "loss": 0.1408,
411
+ "step": 540
412
+ },
413
+ {
414
+ "epoch": 1.9572953736654806,
415
+ "grad_norm": 26.60268211364746,
416
+ "learning_rate": 8.503703703703705e-06,
417
+ "loss": 0.1605,
418
+ "step": 550
419
+ },
420
+ {
421
+ "epoch": 1.99288256227758,
422
+ "grad_norm": 0.018591415137052536,
423
+ "learning_rate": 8.355555555555556e-06,
424
+ "loss": 0.0914,
425
+ "step": 560
426
+ },
427
+ {
428
+ "epoch": 2.0,
429
+ "eval_accuracy": 0.9732977303070761,
430
+ "eval_f1_per_label": [
431
+ 0.9541284403669725,
432
+ 0.9811676082862524
433
+ ],
434
+ "eval_f1_weighted": 0.9729728270877522,
435
+ "eval_loss": 0.1353287547826767,
436
+ "eval_precision_per_label": [
437
+ 0.9952153110047847,
438
+ 0.9648148148148148
439
+ ],
440
+ "eval_precision_weighted": 0.9740283163303329,
441
+ "eval_recall_per_label": [
442
+ 0.9162995594713657,
443
+ 0.9980842911877394
444
+ ],
445
+ "eval_recall_weighted": 0.9732977303070761,
446
+ "eval_runtime": 1.309,
447
+ "eval_samples_per_second": 572.195,
448
+ "eval_steps_per_second": 71.811,
449
+ "step": 562
450
+ },
451
+ {
452
+ "epoch": 2.0284697508896796,
453
+ "grad_norm": 0.015944845974445343,
454
+ "learning_rate": 8.207407407407409e-06,
455
+ "loss": 0.0011,
456
+ "step": 570
457
+ },
458
+ {
459
+ "epoch": 2.0640569395017794,
460
+ "grad_norm": 0.03662079572677612,
461
+ "learning_rate": 8.05925925925926e-06,
462
+ "loss": 0.0475,
463
+ "step": 580
464
+ },
465
+ {
466
+ "epoch": 2.099644128113879,
467
+ "grad_norm": 0.03478744253516197,
468
+ "learning_rate": 7.911111111111112e-06,
469
+ "loss": 0.0017,
470
+ "step": 590
471
+ },
472
+ {
473
+ "epoch": 2.135231316725979,
474
+ "grad_norm": 0.03390470892190933,
475
+ "learning_rate": 7.762962962962963e-06,
476
+ "loss": 0.0054,
477
+ "step": 600
478
+ },
479
+ {
480
+ "epoch": 2.170818505338078,
481
+ "grad_norm": 0.007953139953315258,
482
+ "learning_rate": 7.614814814814816e-06,
483
+ "loss": 0.0096,
484
+ "step": 610
485
+ },
486
+ {
487
+ "epoch": 2.206405693950178,
488
+ "grad_norm": 45.853023529052734,
489
+ "learning_rate": 7.4666666666666675e-06,
490
+ "loss": 0.0893,
491
+ "step": 620
492
+ },
493
+ {
494
+ "epoch": 2.2419928825622777,
495
+ "grad_norm": 0.03887489438056946,
496
+ "learning_rate": 7.31851851851852e-06,
497
+ "loss": 0.0004,
498
+ "step": 630
499
+ },
500
+ {
501
+ "epoch": 2.277580071174377,
502
+ "grad_norm": 0.04745912551879883,
503
+ "learning_rate": 7.170370370370371e-06,
504
+ "loss": 0.1963,
505
+ "step": 640
506
+ },
507
+ {
508
+ "epoch": 2.3131672597864767,
509
+ "grad_norm": 0.05072787404060364,
510
+ "learning_rate": 7.022222222222222e-06,
511
+ "loss": 0.0231,
512
+ "step": 650
513
+ },
514
+ {
515
+ "epoch": 2.3487544483985765,
516
+ "grad_norm": 0.026582635939121246,
517
+ "learning_rate": 6.8740740740740745e-06,
518
+ "loss": 0.0149,
519
+ "step": 660
520
+ },
521
+ {
522
+ "epoch": 2.3843416370106763,
523
+ "grad_norm": 0.014482633210718632,
524
+ "learning_rate": 6.725925925925927e-06,
525
+ "loss": 0.0994,
526
+ "step": 670
527
+ },
528
+ {
529
+ "epoch": 2.419928825622776,
530
+ "grad_norm": 0.025208059698343277,
531
+ "learning_rate": 6.577777777777779e-06,
532
+ "loss": 0.0006,
533
+ "step": 680
534
+ },
535
+ {
536
+ "epoch": 2.4555160142348753,
537
+ "grad_norm": 0.01709064655005932,
538
+ "learning_rate": 6.42962962962963e-06,
539
+ "loss": 0.1219,
540
+ "step": 690
541
+ },
542
+ {
543
+ "epoch": 2.491103202846975,
544
+ "grad_norm": 0.1339588463306427,
545
+ "learning_rate": 6.2814814814814814e-06,
546
+ "loss": 0.0006,
547
+ "step": 700
548
+ },
549
+ {
550
+ "epoch": 2.526690391459075,
551
+ "grad_norm": 24.798721313476562,
552
+ "learning_rate": 6.133333333333334e-06,
553
+ "loss": 0.0029,
554
+ "step": 710
555
+ },
556
+ {
557
+ "epoch": 2.562277580071174,
558
+ "grad_norm": 1.3540862798690796,
559
+ "learning_rate": 5.985185185185186e-06,
560
+ "loss": 0.081,
561
+ "step": 720
562
+ },
563
+ {
564
+ "epoch": 2.597864768683274,
565
+ "grad_norm": 0.04109741002321243,
566
+ "learning_rate": 5.837037037037038e-06,
567
+ "loss": 0.0006,
568
+ "step": 730
569
+ },
570
+ {
571
+ "epoch": 2.6334519572953736,
572
+ "grad_norm": 0.010597619228065014,
573
+ "learning_rate": 5.688888888888889e-06,
574
+ "loss": 0.013,
575
+ "step": 740
576
+ },
577
+ {
578
+ "epoch": 2.6690391459074734,
579
+ "grad_norm": 0.02383551187813282,
580
+ "learning_rate": 5.540740740740741e-06,
581
+ "loss": 0.0008,
582
+ "step": 750
583
+ },
584
+ {
585
+ "epoch": 2.704626334519573,
586
+ "grad_norm": 1.2794164419174194,
587
+ "learning_rate": 5.392592592592593e-06,
588
+ "loss": 0.1707,
589
+ "step": 760
590
+ },
591
+ {
592
+ "epoch": 2.7402135231316724,
593
+ "grad_norm": 0.014402506873011589,
594
+ "learning_rate": 5.244444444444445e-06,
595
+ "loss": 0.0864,
596
+ "step": 770
597
+ },
598
+ {
599
+ "epoch": 2.775800711743772,
600
+ "grad_norm": 0.013125807978212833,
601
+ "learning_rate": 5.096296296296297e-06,
602
+ "loss": 0.0205,
603
+ "step": 780
604
+ },
605
+ {
606
+ "epoch": 2.811387900355872,
607
+ "grad_norm": 0.2091672271490097,
608
+ "learning_rate": 4.9481481481481485e-06,
609
+ "loss": 0.0027,
610
+ "step": 790
611
+ },
612
+ {
613
+ "epoch": 2.8469750889679717,
614
+ "grad_norm": 0.13218224048614502,
615
+ "learning_rate": 4.800000000000001e-06,
616
+ "loss": 0.0004,
617
+ "step": 800
618
+ },
619
+ {
620
+ "epoch": 2.882562277580071,
621
+ "grad_norm": 2.1445350646972656,
622
+ "learning_rate": 4.651851851851853e-06,
623
+ "loss": 0.1045,
624
+ "step": 810
625
+ },
626
+ {
627
+ "epoch": 2.9181494661921707,
628
+ "grad_norm": 0.016628708690404892,
629
+ "learning_rate": 4.503703703703704e-06,
630
+ "loss": 0.0005,
631
+ "step": 820
632
+ },
633
+ {
634
+ "epoch": 2.9537366548042705,
635
+ "grad_norm": 0.04098201170563698,
636
+ "learning_rate": 4.3555555555555555e-06,
637
+ "loss": 0.0569,
638
+ "step": 830
639
+ },
640
+ {
641
+ "epoch": 2.9893238434163703,
642
+ "grad_norm": 0.7140023708343506,
643
+ "learning_rate": 4.207407407407408e-06,
644
+ "loss": 0.0014,
645
+ "step": 840
646
+ },
647
+ {
648
+ "epoch": 3.0,
649
+ "eval_accuracy": 0.9893190921228304,
650
+ "eval_f1_per_label": [
651
+ 0.9821428571428571,
652
+ 0.9923809523809524
653
+ ],
654
+ "eval_f1_weighted": 0.989278085065802,
655
+ "eval_loss": 0.050094157457351685,
656
+ "eval_precision_per_label": [
657
+ 0.995475113122172,
658
+ 0.9867424242424242
659
+ ],
660
+ "eval_precision_weighted": 0.9893890469069138,
661
+ "eval_recall_per_label": [
662
+ 0.9691629955947136,
663
+ 0.9980842911877394
664
+ ],
665
+ "eval_recall_weighted": 0.9893190921228304,
666
+ "eval_runtime": 1.3422,
667
+ "eval_samples_per_second": 558.027,
668
+ "eval_steps_per_second": 70.033,
669
+ "step": 843
670
+ }
671
+ ],
672
+ "logging_steps": 10,
673
+ "max_steps": 1124,
674
+ "num_input_tokens_seen": 0,
675
+ "num_train_epochs": 4,
676
+ "save_steps": 500,
677
+ "stateful_callbacks": {
678
+ "EarlyStoppingCallback": {
679
+ "args": {
680
+ "early_stopping_patience": 3,
681
+ "early_stopping_threshold": 0.0
682
+ },
683
+ "attributes": {
684
+ "early_stopping_patience_counter": 0
685
+ }
686
+ },
687
+ "TrainerControl": {
688
+ "args": {
689
+ "should_epoch_stop": false,
690
+ "should_evaluate": false,
691
+ "should_log": false,
692
+ "should_save": true,
693
+ "should_training_stop": false
694
+ },
695
+ "attributes": {}
696
+ }
697
+ },
698
+ "total_flos": 65856902712120.0,
699
+ "train_batch_size": 8,
700
+ "trial_name": null,
701
+ "trial_params": null
702
+ }
checkpoint-843/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b507d5295c042031ea8738de84bfa33f52abd9440ecb04319aeef48c60251cc
3
+ size 5368
checkpoint-843/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.47.1",
25
+ "type_vocab_size": 2,
26
+ "use_cache": true,
27
+ "vocab_size": 31002
28
+ }
events.out.tfevents.1735166948.12b685f0bef2.317.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff91e417d90b57e5fa0a3824672bd41da0d9670c92dcfd54df562d1a9f836b30
3
+ size 11045
events.out.tfevents.1735167321.12b685f0bef2.317.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f45c584ea0ccbe8d9495fdcc132559045a571b8a2082ef60d3bd0399ded2b887
3
+ size 31168
events.out.tfevents.1735167522.12b685f0bef2.317.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0172fd6d57dbc3c59c6e4699a0f9a0a40f70d0704844e1adee60f8440f076b2
3
+ size 1086
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e177e2a47fdac098a1f71d136199e251cd143f5f231b72b9c61a1ba37daf395b
3
+ size 439433208
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }