kafikani commited on
Commit
e84b4f8
1 Parent(s): 59b9a83

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-classification
6
+ base_model: allenai/longformer-base-4096
7
+ widget:
8
+ - text: "I love AutoTrain"
9
+ ---
10
+
11
+ # Model Trained Using AutoTrain
12
+
13
+ - Problem type: Text Classification
14
+
15
+ ## Validation Metrics
16
+ loss: 1.307055115699768
17
+
18
+ f1_macro: 0.5244016249451032
19
+
20
+ f1_micro: 0.7504835589941973
21
+
22
+ f1_weighted: 0.714761195760481
23
+
24
+ precision_macro: 0.5012229210342417
25
+
26
+ precision_micro: 0.7504835589941973
27
+
28
+ precision_weighted: 0.6860840439724423
29
+
30
+ recall_macro: 0.5532259049014222
31
+
32
+ recall_micro: 0.7504835589941973
33
+
34
+ recall_weighted: 0.7504835589941973
35
+
36
+ accuracy: 0.7504835589941973
checkpoint-6201/config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/longformer-base-4096",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "LongformerForSequenceClassification"
6
+ ],
7
+ "attention_mode": "longformer",
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "attention_window": [
10
+ 512,
11
+ 512,
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512
22
+ ],
23
+ "bos_token_id": 0,
24
+ "eos_token_id": 2,
25
+ "gradient_checkpointing": false,
26
+ "hidden_act": "gelu",
27
+ "hidden_dropout_prob": 0.1,
28
+ "hidden_size": 768,
29
+ "id2label": {
30
+ "0": "negative",
31
+ "1": "neutral",
32
+ "2": "positive"
33
+ },
34
+ "ignore_attention_mask": false,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 3072,
37
+ "label2id": {
38
+ "negative": 0,
39
+ "neutral": 1,
40
+ "positive": 2
41
+ },
42
+ "layer_norm_eps": 1e-05,
43
+ "max_position_embeddings": 4098,
44
+ "model_type": "longformer",
45
+ "num_attention_heads": 12,
46
+ "num_hidden_layers": 12,
47
+ "onnx_export": false,
48
+ "pad_token_id": 1,
49
+ "problem_type": "single_label_classification",
50
+ "sep_token_id": 2,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.45.0",
53
+ "type_vocab_size": 1,
54
+ "vocab_size": 50265
55
+ }
checkpoint-6201/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322dae133d3ce467a820df6105d8c7973c40d913fb2ce1117801d5c7b57c139f
3
+ size 594681260
checkpoint-6201/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b69385ae3f0d8b25a998fc24255da1a205051f30b2e71450956ea059fb08b4e
3
+ size 1189514810
checkpoint-6201/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66988915a0a37f08074f2bbf10041158cae43ca697efd6a7f943e09e0e6bd7e4
3
+ size 13990
checkpoint-6201/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8311d9af8bc0939f4f57ff934d5efeb067fdfe2d024c64e6f9d34a9e095e315b
3
+ size 1064
checkpoint-6201/trainer_state.json ADDED
@@ -0,0 +1,1832 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.307055115699768,
3
+ "best_model_checkpoint": "autotrain-iinjh-0wh75/checkpoint-6201",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6201,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.012094823415578132,
13
+ "grad_norm": 10.670669555664062,
14
+ "learning_rate": 2.012882447665056e-06,
15
+ "loss": 1.0613,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.024189646831156264,
20
+ "grad_norm": 17.839488983154297,
21
+ "learning_rate": 4.025764895330112e-06,
22
+ "loss": 1.0539,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.036284470246734396,
27
+ "grad_norm": 9.838277816772461,
28
+ "learning_rate": 6.038647342995169e-06,
29
+ "loss": 1.0026,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.04837929366231253,
34
+ "grad_norm": 18.464771270751953,
35
+ "learning_rate": 8.051529790660225e-06,
36
+ "loss": 0.9234,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.06047411707789066,
41
+ "grad_norm": 14.502671241760254,
42
+ "learning_rate": 1.0064412238325282e-05,
43
+ "loss": 1.2115,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.07256894049346879,
48
+ "grad_norm": 13.285282135009766,
49
+ "learning_rate": 1.2077294685990338e-05,
50
+ "loss": 1.0436,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.08466376390904692,
55
+ "grad_norm": 10.353692054748535,
56
+ "learning_rate": 1.4090177133655394e-05,
57
+ "loss": 0.9726,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.09675858732462506,
62
+ "grad_norm": 8.703958511352539,
63
+ "learning_rate": 1.610305958132045e-05,
64
+ "loss": 0.8265,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.10885341074020319,
69
+ "grad_norm": 13.100640296936035,
70
+ "learning_rate": 1.8115942028985507e-05,
71
+ "loss": 1.2616,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.12094823415578132,
76
+ "grad_norm": 20.051851272583008,
77
+ "learning_rate": 2.0128824476650564e-05,
78
+ "loss": 1.0757,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.13304305757135945,
83
+ "grad_norm": 10.080187797546387,
84
+ "learning_rate": 2.214170692431562e-05,
85
+ "loss": 1.1058,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.14513788098693758,
90
+ "grad_norm": 1.0888252258300781,
91
+ "learning_rate": 2.4154589371980676e-05,
92
+ "loss": 1.0442,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.15723270440251572,
97
+ "grad_norm": 216.78359985351562,
98
+ "learning_rate": 2.6167471819645733e-05,
99
+ "loss": 0.4581,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.16932752781809385,
104
+ "grad_norm": 3.2515599727630615,
105
+ "learning_rate": 2.8180354267310787e-05,
106
+ "loss": 1.7329,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.18142235123367198,
111
+ "grad_norm": 4.606472969055176,
112
+ "learning_rate": 3.0193236714975848e-05,
113
+ "loss": 1.8193,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.1935171746492501,
118
+ "grad_norm": 4.541548252105713,
119
+ "learning_rate": 3.22061191626409e-05,
120
+ "loss": 1.9815,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.20561199806482824,
125
+ "grad_norm": 0.2636414170265198,
126
+ "learning_rate": 3.421900161030596e-05,
127
+ "loss": 1.1492,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.21770682148040638,
132
+ "grad_norm": 5.543747425079346,
133
+ "learning_rate": 3.6231884057971014e-05,
134
+ "loss": 1.4995,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.2298016448959845,
139
+ "grad_norm": 29.730493545532227,
140
+ "learning_rate": 3.824476650563607e-05,
141
+ "loss": 1.736,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.24189646831156264,
146
+ "grad_norm": 0.5111683011054993,
147
+ "learning_rate": 4.025764895330113e-05,
148
+ "loss": 1.7303,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.2539912917271408,
153
+ "grad_norm": 1.9221380949020386,
154
+ "learning_rate": 4.2270531400966186e-05,
155
+ "loss": 1.3904,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.2660861151427189,
160
+ "grad_norm": 0.305082768201828,
161
+ "learning_rate": 4.428341384863124e-05,
162
+ "loss": 1.6028,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.27818093855829706,
167
+ "grad_norm": 49.53160858154297,
168
+ "learning_rate": 4.62962962962963e-05,
169
+ "loss": 1.5488,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.29027576197387517,
174
+ "grad_norm": 0.5875459313392639,
175
+ "learning_rate": 4.830917874396135e-05,
176
+ "loss": 2.5069,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.30237058538945333,
181
+ "grad_norm": 0.1444765031337738,
182
+ "learning_rate": 4.996415770609319e-05,
183
+ "loss": 1.1429,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.31446540880503143,
188
+ "grad_norm": 6.752070903778076,
189
+ "learning_rate": 4.974014336917563e-05,
190
+ "loss": 1.1868,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.3265602322206096,
195
+ "grad_norm": 2.889754056930542,
196
+ "learning_rate": 4.951612903225807e-05,
197
+ "loss": 1.2563,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.3386550556361877,
202
+ "grad_norm": 0.23045390844345093,
203
+ "learning_rate": 4.92921146953405e-05,
204
+ "loss": 1.5123,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.35074987905176586,
209
+ "grad_norm": 0.20135998725891113,
210
+ "learning_rate": 4.906810035842294e-05,
211
+ "loss": 1.7428,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.36284470246734396,
216
+ "grad_norm": 0.06913596391677856,
217
+ "learning_rate": 4.884408602150538e-05,
218
+ "loss": 1.2342,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.3749395258829221,
223
+ "grad_norm": 2.32104754447937,
224
+ "learning_rate": 4.8620071684587816e-05,
225
+ "loss": 1.4426,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.3870343492985002,
230
+ "grad_norm": 1.5299639701843262,
231
+ "learning_rate": 4.8396057347670255e-05,
232
+ "loss": 1.3624,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.3991291727140784,
237
+ "grad_norm": 0.35129979252815247,
238
+ "learning_rate": 4.8172043010752693e-05,
239
+ "loss": 1.9102,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.4112239961296565,
244
+ "grad_norm": 11.400070190429688,
245
+ "learning_rate": 4.7948028673835125e-05,
246
+ "loss": 1.5198,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.42331881954523465,
251
+ "grad_norm": 13.278685569763184,
252
+ "learning_rate": 4.7724014336917564e-05,
253
+ "loss": 1.146,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.43541364296081275,
258
+ "grad_norm": 14.47873592376709,
259
+ "learning_rate": 4.75e-05,
260
+ "loss": 0.8003,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.4475084663763909,
265
+ "grad_norm": 13.323905944824219,
266
+ "learning_rate": 4.727598566308244e-05,
267
+ "loss": 1.0161,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.459603289791969,
272
+ "grad_norm": 29.149803161621094,
273
+ "learning_rate": 4.705197132616488e-05,
274
+ "loss": 2.1808,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.4716981132075472,
279
+ "grad_norm": 14.055343627929688,
280
+ "learning_rate": 4.682795698924731e-05,
281
+ "loss": 1.9727,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.4837929366231253,
286
+ "grad_norm": 13.733134269714355,
287
+ "learning_rate": 4.660394265232975e-05,
288
+ "loss": 1.3969,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.49588776003870344,
293
+ "grad_norm": 12.515105247497559,
294
+ "learning_rate": 4.637992831541219e-05,
295
+ "loss": 1.1516,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.5079825834542816,
300
+ "grad_norm": 20.534067153930664,
301
+ "learning_rate": 4.615591397849463e-05,
302
+ "loss": 2.2554,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.5200774068698597,
307
+ "grad_norm": 16.800443649291992,
308
+ "learning_rate": 4.5931899641577066e-05,
309
+ "loss": 1.6032,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.5321722302854378,
314
+ "grad_norm": 2.256089448928833,
315
+ "learning_rate": 4.57078853046595e-05,
316
+ "loss": 1.7488,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.5442670537010159,
321
+ "grad_norm": 13.562662124633789,
322
+ "learning_rate": 4.548387096774194e-05,
323
+ "loss": 1.2443,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.5563618771165941,
328
+ "grad_norm": 12.777020454406738,
329
+ "learning_rate": 4.5259856630824375e-05,
330
+ "loss": 1.3996,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.5684567005321722,
335
+ "grad_norm": 11.443547248840332,
336
+ "learning_rate": 4.503584229390681e-05,
337
+ "loss": 1.2757,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.5805515239477503,
342
+ "grad_norm": 3.251011848449707,
343
+ "learning_rate": 4.481182795698925e-05,
344
+ "loss": 1.0835,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.5926463473633284,
349
+ "grad_norm": 12.59429931640625,
350
+ "learning_rate": 4.458781362007169e-05,
351
+ "loss": 1.4228,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.6047411707789067,
356
+ "grad_norm": 1.7253248691558838,
357
+ "learning_rate": 4.436379928315412e-05,
358
+ "loss": 1.7177,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.6168359941944848,
363
+ "grad_norm": 15.0354642868042,
364
+ "learning_rate": 4.413978494623656e-05,
365
+ "loss": 1.7989,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.6289308176100629,
370
+ "grad_norm": 13.84261417388916,
371
+ "learning_rate": 4.3915770609318994e-05,
372
+ "loss": 1.4853,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.6410256410256411,
377
+ "grad_norm": 13.903498649597168,
378
+ "learning_rate": 4.369175627240143e-05,
379
+ "loss": 1.1268,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.6531204644412192,
384
+ "grad_norm": 1.7544844150543213,
385
+ "learning_rate": 4.346774193548388e-05,
386
+ "loss": 1.3478,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.6652152878567973,
391
+ "grad_norm": 16.601131439208984,
392
+ "learning_rate": 4.324372759856631e-05,
393
+ "loss": 1.5037,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.6773101112723754,
398
+ "grad_norm": 0.15384919941425323,
399
+ "learning_rate": 4.301971326164875e-05,
400
+ "loss": 1.8697,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.6894049346879536,
405
+ "grad_norm": 16.029470443725586,
406
+ "learning_rate": 4.279569892473119e-05,
407
+ "loss": 1.7013,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.7014997581035317,
412
+ "grad_norm": 1.9329349994659424,
413
+ "learning_rate": 4.257168458781362e-05,
414
+ "loss": 1.852,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.7135945815191098,
419
+ "grad_norm": 34.523712158203125,
420
+ "learning_rate": 4.234767025089606e-05,
421
+ "loss": 1.1739,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.7256894049346879,
426
+ "grad_norm": 18.080400466918945,
427
+ "learning_rate": 4.2123655913978496e-05,
428
+ "loss": 1.1161,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.7377842283502661,
433
+ "grad_norm": 13.704357147216797,
434
+ "learning_rate": 4.1899641577060935e-05,
435
+ "loss": 1.1861,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.7498790517658442,
440
+ "grad_norm": 10.594738006591797,
441
+ "learning_rate": 4.167562724014337e-05,
442
+ "loss": 1.2265,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.7619738751814223,
447
+ "grad_norm": 1.4613639116287231,
448
+ "learning_rate": 4.1451612903225805e-05,
449
+ "loss": 1.173,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.7740686985970004,
454
+ "grad_norm": 0.7370879054069519,
455
+ "learning_rate": 4.1227598566308244e-05,
456
+ "loss": 1.9062,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.7861635220125787,
461
+ "grad_norm": 1.4284896850585938,
462
+ "learning_rate": 4.100358422939068e-05,
463
+ "loss": 1.9307,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.7982583454281568,
468
+ "grad_norm": 3.1330041885375977,
469
+ "learning_rate": 4.077956989247312e-05,
470
+ "loss": 1.4191,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.8103531688437349,
475
+ "grad_norm": 25.01806640625,
476
+ "learning_rate": 4.055555555555556e-05,
477
+ "loss": 1.8695,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.822447992259313,
482
+ "grad_norm": 24.93403434753418,
483
+ "learning_rate": 4.0331541218638e-05,
484
+ "loss": 1.0304,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.8345428156748912,
489
+ "grad_norm": 0.39635589718818665,
490
+ "learning_rate": 4.010752688172043e-05,
491
+ "loss": 1.5612,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 0.8466376390904693,
496
+ "grad_norm": 0.25706031918525696,
497
+ "learning_rate": 3.988351254480287e-05,
498
+ "loss": 1.6167,
499
+ "step": 1750
500
+ },
501
+ {
502
+ "epoch": 0.8587324625060474,
503
+ "grad_norm": 204.7973175048828,
504
+ "learning_rate": 3.965949820788531e-05,
505
+ "loss": 1.033,
506
+ "step": 1775
507
+ },
508
+ {
509
+ "epoch": 0.8708272859216255,
510
+ "grad_norm": 0.1547696739435196,
511
+ "learning_rate": 3.9435483870967746e-05,
512
+ "loss": 0.9405,
513
+ "step": 1800
514
+ },
515
+ {
516
+ "epoch": 0.8829221093372037,
517
+ "grad_norm": 25.602577209472656,
518
+ "learning_rate": 3.9211469534050185e-05,
519
+ "loss": 1.2914,
520
+ "step": 1825
521
+ },
522
+ {
523
+ "epoch": 0.8950169327527818,
524
+ "grad_norm": 25.09197425842285,
525
+ "learning_rate": 3.8987455197132616e-05,
526
+ "loss": 0.8189,
527
+ "step": 1850
528
+ },
529
+ {
530
+ "epoch": 0.9071117561683599,
531
+ "grad_norm": 0.08802329748868942,
532
+ "learning_rate": 3.8763440860215055e-05,
533
+ "loss": 1.7206,
534
+ "step": 1875
535
+ },
536
+ {
537
+ "epoch": 0.919206579583938,
538
+ "grad_norm": 23.29674530029297,
539
+ "learning_rate": 3.8539426523297494e-05,
540
+ "loss": 2.5356,
541
+ "step": 1900
542
+ },
543
+ {
544
+ "epoch": 0.9313014029995162,
545
+ "grad_norm": 24.580432891845703,
546
+ "learning_rate": 3.8315412186379926e-05,
547
+ "loss": 1.6752,
548
+ "step": 1925
549
+ },
550
+ {
551
+ "epoch": 0.9433962264150944,
552
+ "grad_norm": 2.269275665283203,
553
+ "learning_rate": 3.809139784946237e-05,
554
+ "loss": 1.9433,
555
+ "step": 1950
556
+ },
557
+ {
558
+ "epoch": 0.9554910498306725,
559
+ "grad_norm": 3.1146695613861084,
560
+ "learning_rate": 3.786738351254481e-05,
561
+ "loss": 1.562,
562
+ "step": 1975
563
+ },
564
+ {
565
+ "epoch": 0.9675858732462506,
566
+ "grad_norm": 0.2862231135368347,
567
+ "learning_rate": 3.764336917562724e-05,
568
+ "loss": 0.6273,
569
+ "step": 2000
570
+ },
571
+ {
572
+ "epoch": 0.9796806966618288,
573
+ "grad_norm": 0.39030689001083374,
574
+ "learning_rate": 3.741935483870968e-05,
575
+ "loss": 1.7117,
576
+ "step": 2025
577
+ },
578
+ {
579
+ "epoch": 0.9917755200774069,
580
+ "grad_norm": 25.142972946166992,
581
+ "learning_rate": 3.719534050179211e-05,
582
+ "loss": 1.8362,
583
+ "step": 2050
584
+ },
585
+ {
586
+ "epoch": 1.0,
587
+ "eval_accuracy": 0.6054158607350096,
588
+ "eval_f1_macro": 0.41404572439055193,
589
+ "eval_f1_micro": 0.6054158607350096,
590
+ "eval_f1_weighted": 0.5589758737227555,
591
+ "eval_loss": 1.5838946104049683,
592
+ "eval_precision_macro": 0.46238977618202576,
593
+ "eval_precision_micro": 0.6054158607350096,
594
+ "eval_precision_weighted": 0.6418784559670553,
595
+ "eval_recall_macro": 0.46003784410669596,
596
+ "eval_recall_micro": 0.6054158607350096,
597
+ "eval_recall_weighted": 0.6054158607350096,
598
+ "eval_runtime": 7116.055,
599
+ "eval_samples_per_second": 0.073,
600
+ "eval_steps_per_second": 0.036,
601
+ "step": 2067
602
+ },
603
+ {
604
+ "epoch": 1.003870343492985,
605
+ "grad_norm": 23.918968200683594,
606
+ "learning_rate": 3.697132616487455e-05,
607
+ "loss": 1.7229,
608
+ "step": 2075
609
+ },
610
+ {
611
+ "epoch": 1.0159651669085632,
612
+ "grad_norm": 24.551406860351562,
613
+ "learning_rate": 3.6747311827956996e-05,
614
+ "loss": 0.7897,
615
+ "step": 2100
616
+ },
617
+ {
618
+ "epoch": 1.0280599903241412,
619
+ "grad_norm": 0.6501819491386414,
620
+ "learning_rate": 3.652329749103943e-05,
621
+ "loss": 1.4349,
622
+ "step": 2125
623
+ },
624
+ {
625
+ "epoch": 1.0401548137397194,
626
+ "grad_norm": 0.4270811676979065,
627
+ "learning_rate": 3.6299283154121866e-05,
628
+ "loss": 1.4771,
629
+ "step": 2150
630
+ },
631
+ {
632
+ "epoch": 1.0522496371552976,
633
+ "grad_norm": 25.46353530883789,
634
+ "learning_rate": 3.6075268817204305e-05,
635
+ "loss": 1.6769,
636
+ "step": 2175
637
+ },
638
+ {
639
+ "epoch": 1.0643444605708756,
640
+ "grad_norm": 24.238040924072266,
641
+ "learning_rate": 3.585125448028674e-05,
642
+ "loss": 2.2981,
643
+ "step": 2200
644
+ },
645
+ {
646
+ "epoch": 1.0764392839864538,
647
+ "grad_norm": 24.645816802978516,
648
+ "learning_rate": 3.5627240143369176e-05,
649
+ "loss": 1.9655,
650
+ "step": 2225
651
+ },
652
+ {
653
+ "epoch": 1.0885341074020318,
654
+ "grad_norm": 0.16327662765979767,
655
+ "learning_rate": 3.5403225806451614e-05,
656
+ "loss": 1.2296,
657
+ "step": 2250
658
+ },
659
+ {
660
+ "epoch": 1.10062893081761,
661
+ "grad_norm": 3.4786908626556396,
662
+ "learning_rate": 3.517921146953405e-05,
663
+ "loss": 1.3128,
664
+ "step": 2275
665
+ },
666
+ {
667
+ "epoch": 1.1127237542331883,
668
+ "grad_norm": 0.18501241505146027,
669
+ "learning_rate": 3.495519713261649e-05,
670
+ "loss": 1.1883,
671
+ "step": 2300
672
+ },
673
+ {
674
+ "epoch": 1.1248185776487662,
675
+ "grad_norm": 0.35296130180358887,
676
+ "learning_rate": 3.473118279569892e-05,
677
+ "loss": 1.5325,
678
+ "step": 2325
679
+ },
680
+ {
681
+ "epoch": 1.1369134010643445,
682
+ "grad_norm": 0.24219243228435516,
683
+ "learning_rate": 3.450716845878136e-05,
684
+ "loss": 0.8073,
685
+ "step": 2350
686
+ },
687
+ {
688
+ "epoch": 1.1490082244799227,
689
+ "grad_norm": 0.16509315371513367,
690
+ "learning_rate": 3.42831541218638e-05,
691
+ "loss": 0.4453,
692
+ "step": 2375
693
+ },
694
+ {
695
+ "epoch": 1.1611030478955007,
696
+ "grad_norm": 25.64842987060547,
697
+ "learning_rate": 3.405913978494624e-05,
698
+ "loss": 1.8413,
699
+ "step": 2400
700
+ },
701
+ {
702
+ "epoch": 1.1731978713110789,
703
+ "grad_norm": 0.2987683117389679,
704
+ "learning_rate": 3.383512544802868e-05,
705
+ "loss": 1.3856,
706
+ "step": 2425
707
+ },
708
+ {
709
+ "epoch": 1.185292694726657,
710
+ "grad_norm": 0.1390484720468521,
711
+ "learning_rate": 3.3611111111111116e-05,
712
+ "loss": 1.0936,
713
+ "step": 2450
714
+ },
715
+ {
716
+ "epoch": 1.197387518142235,
717
+ "grad_norm": 152.2466278076172,
718
+ "learning_rate": 3.338709677419355e-05,
719
+ "loss": 1.29,
720
+ "step": 2475
721
+ },
722
+ {
723
+ "epoch": 1.2094823415578133,
724
+ "grad_norm": 0.5964694619178772,
725
+ "learning_rate": 3.316308243727599e-05,
726
+ "loss": 2.0098,
727
+ "step": 2500
728
+ },
729
+ {
730
+ "epoch": 1.2215771649733913,
731
+ "grad_norm": 0.7620899677276611,
732
+ "learning_rate": 3.2939068100358426e-05,
733
+ "loss": 0.9353,
734
+ "step": 2525
735
+ },
736
+ {
737
+ "epoch": 1.2336719883889695,
738
+ "grad_norm": 0.4900791347026825,
739
+ "learning_rate": 3.2715053763440864e-05,
740
+ "loss": 1.1481,
741
+ "step": 2550
742
+ },
743
+ {
744
+ "epoch": 1.2457668118045477,
745
+ "grad_norm": 0.8056408762931824,
746
+ "learning_rate": 3.24910394265233e-05,
747
+ "loss": 1.6924,
748
+ "step": 2575
749
+ },
750
+ {
751
+ "epoch": 1.2578616352201257,
752
+ "grad_norm": 0.34215426445007324,
753
+ "learning_rate": 3.2267025089605735e-05,
754
+ "loss": 1.5408,
755
+ "step": 2600
756
+ },
757
+ {
758
+ "epoch": 1.269956458635704,
759
+ "grad_norm": 0.32340100407600403,
760
+ "learning_rate": 3.204301075268817e-05,
761
+ "loss": 1.9354,
762
+ "step": 2625
763
+ },
764
+ {
765
+ "epoch": 1.282051282051282,
766
+ "grad_norm": 24.62626075744629,
767
+ "learning_rate": 3.181899641577061e-05,
768
+ "loss": 1.9551,
769
+ "step": 2650
770
+ },
771
+ {
772
+ "epoch": 1.2941461054668602,
773
+ "grad_norm": 1.058280348777771,
774
+ "learning_rate": 3.1594982078853044e-05,
775
+ "loss": 1.3507,
776
+ "step": 2675
777
+ },
778
+ {
779
+ "epoch": 1.3062409288824384,
780
+ "grad_norm": 1.5900629758834839,
781
+ "learning_rate": 3.137096774193549e-05,
782
+ "loss": 2.1607,
783
+ "step": 2700
784
+ },
785
+ {
786
+ "epoch": 1.3183357522980166,
787
+ "grad_norm": 24.93858528137207,
788
+ "learning_rate": 3.114695340501792e-05,
789
+ "loss": 1.328,
790
+ "step": 2725
791
+ },
792
+ {
793
+ "epoch": 1.3304305757135946,
794
+ "grad_norm": 24.877849578857422,
795
+ "learning_rate": 3.092293906810036e-05,
796
+ "loss": 1.6651,
797
+ "step": 2750
798
+ },
799
+ {
800
+ "epoch": 1.3425253991291728,
801
+ "grad_norm": 0.48590317368507385,
802
+ "learning_rate": 3.06989247311828e-05,
803
+ "loss": 0.6887,
804
+ "step": 2775
805
+ },
806
+ {
807
+ "epoch": 1.3546202225447508,
808
+ "grad_norm": 0.09108546376228333,
809
+ "learning_rate": 3.0474910394265234e-05,
810
+ "loss": 0.9513,
811
+ "step": 2800
812
+ },
813
+ {
814
+ "epoch": 1.366715045960329,
815
+ "grad_norm": 0.23763756453990936,
816
+ "learning_rate": 3.0250896057347672e-05,
817
+ "loss": 1.5187,
818
+ "step": 2825
819
+ },
820
+ {
821
+ "epoch": 1.3788098693759072,
822
+ "grad_norm": 0.2408967763185501,
823
+ "learning_rate": 3.002688172043011e-05,
824
+ "loss": 1.3683,
825
+ "step": 2850
826
+ },
827
+ {
828
+ "epoch": 1.3909046927914852,
829
+ "grad_norm": 2.7443923950195312,
830
+ "learning_rate": 2.9802867383512546e-05,
831
+ "loss": 1.7777,
832
+ "step": 2875
833
+ },
834
+ {
835
+ "epoch": 1.4029995162070634,
836
+ "grad_norm": 26.244659423828125,
837
+ "learning_rate": 2.9578853046594985e-05,
838
+ "loss": 2.1378,
839
+ "step": 2900
840
+ },
841
+ {
842
+ "epoch": 1.4150943396226414,
843
+ "grad_norm": 158.08462524414062,
844
+ "learning_rate": 2.9354838709677417e-05,
845
+ "loss": 1.28,
846
+ "step": 2925
847
+ },
848
+ {
849
+ "epoch": 1.4271891630382196,
850
+ "grad_norm": 27.04939842224121,
851
+ "learning_rate": 2.913082437275986e-05,
852
+ "loss": 0.8604,
853
+ "step": 2950
854
+ },
855
+ {
856
+ "epoch": 1.4392839864537978,
857
+ "grad_norm": 0.1699460744857788,
858
+ "learning_rate": 2.8906810035842297e-05,
859
+ "loss": 2.7105,
860
+ "step": 2975
861
+ },
862
+ {
863
+ "epoch": 1.4513788098693758,
864
+ "grad_norm": 23.923328399658203,
865
+ "learning_rate": 2.868279569892473e-05,
866
+ "loss": 1.3738,
867
+ "step": 3000
868
+ },
869
+ {
870
+ "epoch": 1.463473633284954,
871
+ "grad_norm": 23.95648765563965,
872
+ "learning_rate": 2.845878136200717e-05,
873
+ "loss": 1.4941,
874
+ "step": 3025
875
+ },
876
+ {
877
+ "epoch": 1.475568456700532,
878
+ "grad_norm": 0.05188923701643944,
879
+ "learning_rate": 2.823476702508961e-05,
880
+ "loss": 0.9538,
881
+ "step": 3050
882
+ },
883
+ {
884
+ "epoch": 1.4876632801161103,
885
+ "grad_norm": 2.9171254634857178,
886
+ "learning_rate": 2.801075268817204e-05,
887
+ "loss": 1.7475,
888
+ "step": 3075
889
+ },
890
+ {
891
+ "epoch": 1.4997581035316885,
892
+ "grad_norm": 6.571992874145508,
893
+ "learning_rate": 2.7786738351254484e-05,
894
+ "loss": 1.1107,
895
+ "step": 3100
896
+ },
897
+ {
898
+ "epoch": 1.5118529269472667,
899
+ "grad_norm": 26.21474838256836,
900
+ "learning_rate": 2.7562724014336922e-05,
901
+ "loss": 1.1843,
902
+ "step": 3125
903
+ },
904
+ {
905
+ "epoch": 1.5239477503628447,
906
+ "grad_norm": 0.16156063973903656,
907
+ "learning_rate": 2.7338709677419354e-05,
908
+ "loss": 1.5585,
909
+ "step": 3150
910
+ },
911
+ {
912
+ "epoch": 1.5360425737784227,
913
+ "grad_norm": 0.13359041512012482,
914
+ "learning_rate": 2.7114695340501796e-05,
915
+ "loss": 1.1763,
916
+ "step": 3175
917
+ },
918
+ {
919
+ "epoch": 1.548137397194001,
920
+ "grad_norm": 0.2726369798183441,
921
+ "learning_rate": 2.6890681003584228e-05,
922
+ "loss": 1.6273,
923
+ "step": 3200
924
+ },
925
+ {
926
+ "epoch": 1.5602322206095791,
927
+ "grad_norm": 0.41835281252861023,
928
+ "learning_rate": 2.6666666666666667e-05,
929
+ "loss": 1.3093,
930
+ "step": 3225
931
+ },
932
+ {
933
+ "epoch": 1.5723270440251573,
934
+ "grad_norm": 0.2902648448944092,
935
+ "learning_rate": 2.6442652329749105e-05,
936
+ "loss": 0.8197,
937
+ "step": 3250
938
+ },
939
+ {
940
+ "epoch": 1.5844218674407353,
941
+ "grad_norm": 0.3506704866886139,
942
+ "learning_rate": 2.621863799283154e-05,
943
+ "loss": 0.8084,
944
+ "step": 3275
945
+ },
946
+ {
947
+ "epoch": 1.5965166908563135,
948
+ "grad_norm": 0.07854276150465012,
949
+ "learning_rate": 2.599462365591398e-05,
950
+ "loss": 0.6503,
951
+ "step": 3300
952
+ },
953
+ {
954
+ "epoch": 1.6086115142718915,
955
+ "grad_norm": 0.24219046533107758,
956
+ "learning_rate": 2.5770609318996418e-05,
957
+ "loss": 0.9614,
958
+ "step": 3325
959
+ },
960
+ {
961
+ "epoch": 1.6207063376874697,
962
+ "grad_norm": 26.107500076293945,
963
+ "learning_rate": 2.5546594982078853e-05,
964
+ "loss": 1.6199,
965
+ "step": 3350
966
+ },
967
+ {
968
+ "epoch": 1.632801161103048,
969
+ "grad_norm": 26.309776306152344,
970
+ "learning_rate": 2.532258064516129e-05,
971
+ "loss": 1.1816,
972
+ "step": 3375
973
+ },
974
+ {
975
+ "epoch": 1.6448959845186262,
976
+ "grad_norm": 0.2523309886455536,
977
+ "learning_rate": 2.5098566308243727e-05,
978
+ "loss": 1.1721,
979
+ "step": 3400
980
+ },
981
+ {
982
+ "epoch": 1.6569908079342042,
983
+ "grad_norm": 26.82816505432129,
984
+ "learning_rate": 2.4874551971326165e-05,
985
+ "loss": 0.696,
986
+ "step": 3425
987
+ },
988
+ {
989
+ "epoch": 1.6690856313497822,
990
+ "grad_norm": 0.15189094841480255,
991
+ "learning_rate": 2.46505376344086e-05,
992
+ "loss": 1.1234,
993
+ "step": 3450
994
+ },
995
+ {
996
+ "epoch": 1.6811804547653604,
997
+ "grad_norm": 0.19785544276237488,
998
+ "learning_rate": 2.4426523297491043e-05,
999
+ "loss": 1.6574,
1000
+ "step": 3475
1001
+ },
1002
+ {
1003
+ "epoch": 1.6932752781809386,
1004
+ "grad_norm": 0.66933673620224,
1005
+ "learning_rate": 2.4202508960573478e-05,
1006
+ "loss": 2.0303,
1007
+ "step": 3500
1008
+ },
1009
+ {
1010
+ "epoch": 1.7053701015965168,
1011
+ "grad_norm": 23.572277069091797,
1012
+ "learning_rate": 2.3978494623655913e-05,
1013
+ "loss": 2.2247,
1014
+ "step": 3525
1015
+ },
1016
+ {
1017
+ "epoch": 1.7174649250120948,
1018
+ "grad_norm": 0.5169154405593872,
1019
+ "learning_rate": 2.3754480286738355e-05,
1020
+ "loss": 0.7378,
1021
+ "step": 3550
1022
+ },
1023
+ {
1024
+ "epoch": 1.7295597484276728,
1025
+ "grad_norm": 0.28006285429000854,
1026
+ "learning_rate": 2.353046594982079e-05,
1027
+ "loss": 1.823,
1028
+ "step": 3575
1029
+ },
1030
+ {
1031
+ "epoch": 1.741654571843251,
1032
+ "grad_norm": 0.47209933400154114,
1033
+ "learning_rate": 2.3306451612903226e-05,
1034
+ "loss": 2.0323,
1035
+ "step": 3600
1036
+ },
1037
+ {
1038
+ "epoch": 1.7537493952588292,
1039
+ "grad_norm": 0.23533566296100616,
1040
+ "learning_rate": 2.3082437275985664e-05,
1041
+ "loss": 0.5909,
1042
+ "step": 3625
1043
+ },
1044
+ {
1045
+ "epoch": 1.7658442186744074,
1046
+ "grad_norm": 0.17464753985404968,
1047
+ "learning_rate": 2.2858422939068103e-05,
1048
+ "loss": 1.0246,
1049
+ "step": 3650
1050
+ },
1051
+ {
1052
+ "epoch": 1.7779390420899854,
1053
+ "grad_norm": 0.18633712828159332,
1054
+ "learning_rate": 2.2634408602150538e-05,
1055
+ "loss": 0.7297,
1056
+ "step": 3675
1057
+ },
1058
+ {
1059
+ "epoch": 1.7900338655055636,
1060
+ "grad_norm": 0.19488303363323212,
1061
+ "learning_rate": 2.2410394265232977e-05,
1062
+ "loss": 1.0616,
1063
+ "step": 3700
1064
+ },
1065
+ {
1066
+ "epoch": 1.8021286889211416,
1067
+ "grad_norm": 0.21784909069538116,
1068
+ "learning_rate": 2.2186379928315412e-05,
1069
+ "loss": 1.5585,
1070
+ "step": 3725
1071
+ },
1072
+ {
1073
+ "epoch": 1.8142235123367199,
1074
+ "grad_norm": 0.2788207530975342,
1075
+ "learning_rate": 2.196236559139785e-05,
1076
+ "loss": 1.201,
1077
+ "step": 3750
1078
+ },
1079
+ {
1080
+ "epoch": 1.826318335752298,
1081
+ "grad_norm": 24.099077224731445,
1082
+ "learning_rate": 2.173835125448029e-05,
1083
+ "loss": 2.076,
1084
+ "step": 3775
1085
+ },
1086
+ {
1087
+ "epoch": 1.8384131591678763,
1088
+ "grad_norm": 0.18294040858745575,
1089
+ "learning_rate": 2.1514336917562725e-05,
1090
+ "loss": 1.384,
1091
+ "step": 3800
1092
+ },
1093
+ {
1094
+ "epoch": 1.8505079825834543,
1095
+ "grad_norm": 24.9930477142334,
1096
+ "learning_rate": 2.129032258064516e-05,
1097
+ "loss": 1.0263,
1098
+ "step": 3825
1099
+ },
1100
+ {
1101
+ "epoch": 1.8626028059990323,
1102
+ "grad_norm": 0.19692493975162506,
1103
+ "learning_rate": 2.1066308243727602e-05,
1104
+ "loss": 1.4213,
1105
+ "step": 3850
1106
+ },
1107
+ {
1108
+ "epoch": 1.8746976294146105,
1109
+ "grad_norm": 0.32238996028900146,
1110
+ "learning_rate": 2.0842293906810037e-05,
1111
+ "loss": 0.9261,
1112
+ "step": 3875
1113
+ },
1114
+ {
1115
+ "epoch": 1.8867924528301887,
1116
+ "grad_norm": 0.16958071291446686,
1117
+ "learning_rate": 2.0618279569892472e-05,
1118
+ "loss": 0.4504,
1119
+ "step": 3900
1120
+ },
1121
+ {
1122
+ "epoch": 1.898887276245767,
1123
+ "grad_norm": 0.187217578291893,
1124
+ "learning_rate": 2.039426523297491e-05,
1125
+ "loss": 1.5949,
1126
+ "step": 3925
1127
+ },
1128
+ {
1129
+ "epoch": 1.910982099661345,
1130
+ "grad_norm": 0.644129753112793,
1131
+ "learning_rate": 2.017025089605735e-05,
1132
+ "loss": 2.5123,
1133
+ "step": 3950
1134
+ },
1135
+ {
1136
+ "epoch": 1.9230769230769231,
1137
+ "grad_norm": 0.2620396018028259,
1138
+ "learning_rate": 1.9946236559139785e-05,
1139
+ "loss": 1.0002,
1140
+ "step": 3975
1141
+ },
1142
+ {
1143
+ "epoch": 1.9351717464925011,
1144
+ "grad_norm": 0.2608689069747925,
1145
+ "learning_rate": 1.9722222222222224e-05,
1146
+ "loss": 1.3737,
1147
+ "step": 4000
1148
+ },
1149
+ {
1150
+ "epoch": 1.9472665699080793,
1151
+ "grad_norm": 24.944629669189453,
1152
+ "learning_rate": 1.9498207885304662e-05,
1153
+ "loss": 1.1471,
1154
+ "step": 4025
1155
+ },
1156
+ {
1157
+ "epoch": 1.9593613933236576,
1158
+ "grad_norm": 23.796247482299805,
1159
+ "learning_rate": 1.9274193548387097e-05,
1160
+ "loss": 2.678,
1161
+ "step": 4050
1162
+ },
1163
+ {
1164
+ "epoch": 1.9714562167392358,
1165
+ "grad_norm": 0.3903225362300873,
1166
+ "learning_rate": 1.9050179211469536e-05,
1167
+ "loss": 1.7349,
1168
+ "step": 4075
1169
+ },
1170
+ {
1171
+ "epoch": 1.9835510401548138,
1172
+ "grad_norm": 23.73769760131836,
1173
+ "learning_rate": 1.882616487455197e-05,
1174
+ "loss": 1.1653,
1175
+ "step": 4100
1176
+ },
1177
+ {
1178
+ "epoch": 1.9956458635703918,
1179
+ "grad_norm": 22.70951271057129,
1180
+ "learning_rate": 1.860215053763441e-05,
1181
+ "loss": 1.9917,
1182
+ "step": 4125
1183
+ },
1184
+ {
1185
+ "epoch": 2.0,
1186
+ "eval_accuracy": 0.6731141199226306,
1187
+ "eval_f1_macro": 0.4615303615303616,
1188
+ "eval_f1_micro": 0.6731141199226306,
1189
+ "eval_f1_weighted": 0.6321285489564019,
1190
+ "eval_loss": 1.3909244537353516,
1191
+ "eval_precision_macro": 0.4655498128675815,
1192
+ "eval_precision_micro": 0.6731141199226306,
1193
+ "eval_precision_weighted": 0.6287401060434273,
1194
+ "eval_recall_macro": 0.4847830067753159,
1195
+ "eval_recall_micro": 0.6731141199226306,
1196
+ "eval_recall_weighted": 0.6731141199226306,
1197
+ "eval_runtime": 6650.8901,
1198
+ "eval_samples_per_second": 0.078,
1199
+ "eval_steps_per_second": 0.039,
1200
+ "step": 4134
1201
+ },
1202
+ {
1203
+ "epoch": 2.00774068698597,
1204
+ "grad_norm": 0.9518312811851501,
1205
+ "learning_rate": 1.837813620071685e-05,
1206
+ "loss": 0.8235,
1207
+ "step": 4150
1208
+ },
1209
+ {
1210
+ "epoch": 2.019835510401548,
1211
+ "grad_norm": 0.2822599411010742,
1212
+ "learning_rate": 1.8154121863799284e-05,
1213
+ "loss": 0.7381,
1214
+ "step": 4175
1215
+ },
1216
+ {
1217
+ "epoch": 2.0319303338171264,
1218
+ "grad_norm": 0.2420988231897354,
1219
+ "learning_rate": 1.793010752688172e-05,
1220
+ "loss": 1.0998,
1221
+ "step": 4200
1222
+ },
1223
+ {
1224
+ "epoch": 2.0440251572327046,
1225
+ "grad_norm": 0.23866595327854156,
1226
+ "learning_rate": 1.770609318996416e-05,
1227
+ "loss": 1.5128,
1228
+ "step": 4225
1229
+ },
1230
+ {
1231
+ "epoch": 2.0561199806482824,
1232
+ "grad_norm": 0.20324940979480743,
1233
+ "learning_rate": 1.7482078853046596e-05,
1234
+ "loss": 0.6954,
1235
+ "step": 4250
1236
+ },
1237
+ {
1238
+ "epoch": 2.0682148040638606,
1239
+ "grad_norm": 0.23257912695407867,
1240
+ "learning_rate": 1.725806451612903e-05,
1241
+ "loss": 2.1005,
1242
+ "step": 4275
1243
+ },
1244
+ {
1245
+ "epoch": 2.080309627479439,
1246
+ "grad_norm": 0.28216153383255005,
1247
+ "learning_rate": 1.703405017921147e-05,
1248
+ "loss": 0.6184,
1249
+ "step": 4300
1250
+ },
1251
+ {
1252
+ "epoch": 2.092404450895017,
1253
+ "grad_norm": 0.2500057518482208,
1254
+ "learning_rate": 1.681003584229391e-05,
1255
+ "loss": 1.82,
1256
+ "step": 4325
1257
+ },
1258
+ {
1259
+ "epoch": 2.1044992743105952,
1260
+ "grad_norm": 0.442717045545578,
1261
+ "learning_rate": 1.6586021505376344e-05,
1262
+ "loss": 2.2307,
1263
+ "step": 4350
1264
+ },
1265
+ {
1266
+ "epoch": 2.116594097726173,
1267
+ "grad_norm": 44.982303619384766,
1268
+ "learning_rate": 1.6362007168458783e-05,
1269
+ "loss": 1.16,
1270
+ "step": 4375
1271
+ },
1272
+ {
1273
+ "epoch": 2.1286889211417512,
1274
+ "grad_norm": 34.185569763183594,
1275
+ "learning_rate": 1.6137992831541218e-05,
1276
+ "loss": 1.8138,
1277
+ "step": 4400
1278
+ },
1279
+ {
1280
+ "epoch": 2.1407837445573294,
1281
+ "grad_norm": 0.5167245864868164,
1282
+ "learning_rate": 1.5913978494623657e-05,
1283
+ "loss": 1.5199,
1284
+ "step": 4425
1285
+ },
1286
+ {
1287
+ "epoch": 2.1528785679729077,
1288
+ "grad_norm": 25.86376190185547,
1289
+ "learning_rate": 1.5689964157706095e-05,
1290
+ "loss": 1.1301,
1291
+ "step": 4450
1292
+ },
1293
+ {
1294
+ "epoch": 2.164973391388486,
1295
+ "grad_norm": 0.7654205560684204,
1296
+ "learning_rate": 1.546594982078853e-05,
1297
+ "loss": 0.9802,
1298
+ "step": 4475
1299
+ },
1300
+ {
1301
+ "epoch": 2.1770682148040637,
1302
+ "grad_norm": 0.22726771235466003,
1303
+ "learning_rate": 1.5241935483870967e-05,
1304
+ "loss": 1.3874,
1305
+ "step": 4500
1306
+ },
1307
+ {
1308
+ "epoch": 2.189163038219642,
1309
+ "grad_norm": 0.3605280816555023,
1310
+ "learning_rate": 1.5017921146953406e-05,
1311
+ "loss": 1.9075,
1312
+ "step": 4525
1313
+ },
1314
+ {
1315
+ "epoch": 2.20125786163522,
1316
+ "grad_norm": 0.28213346004486084,
1317
+ "learning_rate": 1.4793906810035843e-05,
1318
+ "loss": 1.0298,
1319
+ "step": 4550
1320
+ },
1321
+ {
1322
+ "epoch": 2.2133526850507983,
1323
+ "grad_norm": 0.18000219762325287,
1324
+ "learning_rate": 1.456989247311828e-05,
1325
+ "loss": 1.5587,
1326
+ "step": 4575
1327
+ },
1328
+ {
1329
+ "epoch": 2.2254475084663765,
1330
+ "grad_norm": 25.92272186279297,
1331
+ "learning_rate": 1.4345878136200718e-05,
1332
+ "loss": 1.1183,
1333
+ "step": 4600
1334
+ },
1335
+ {
1336
+ "epoch": 2.2375423318819547,
1337
+ "grad_norm": 0.14410781860351562,
1338
+ "learning_rate": 1.4121863799283155e-05,
1339
+ "loss": 1.6175,
1340
+ "step": 4625
1341
+ },
1342
+ {
1343
+ "epoch": 2.2496371552975325,
1344
+ "grad_norm": 0.16350312530994415,
1345
+ "learning_rate": 1.3897849462365592e-05,
1346
+ "loss": 1.2121,
1347
+ "step": 4650
1348
+ },
1349
+ {
1350
+ "epoch": 2.2617319787131107,
1351
+ "grad_norm": 48.49122619628906,
1352
+ "learning_rate": 1.367383512544803e-05,
1353
+ "loss": 1.9733,
1354
+ "step": 4675
1355
+ },
1356
+ {
1357
+ "epoch": 2.273826802128689,
1358
+ "grad_norm": 0.27995195984840393,
1359
+ "learning_rate": 1.3449820788530468e-05,
1360
+ "loss": 0.8415,
1361
+ "step": 4700
1362
+ },
1363
+ {
1364
+ "epoch": 2.285921625544267,
1365
+ "grad_norm": 0.35172316431999207,
1366
+ "learning_rate": 1.3225806451612905e-05,
1367
+ "loss": 1.1809,
1368
+ "step": 4725
1369
+ },
1370
+ {
1371
+ "epoch": 2.2980164489598454,
1372
+ "grad_norm": 0.33857014775276184,
1373
+ "learning_rate": 1.300179211469534e-05,
1374
+ "loss": 0.992,
1375
+ "step": 4750
1376
+ },
1377
+ {
1378
+ "epoch": 2.310111272375423,
1379
+ "grad_norm": 0.9367401003837585,
1380
+ "learning_rate": 1.2777777777777777e-05,
1381
+ "loss": 1.4335,
1382
+ "step": 4775
1383
+ },
1384
+ {
1385
+ "epoch": 2.3222060957910013,
1386
+ "grad_norm": 0.2820034921169281,
1387
+ "learning_rate": 1.2553763440860217e-05,
1388
+ "loss": 0.6884,
1389
+ "step": 4800
1390
+ },
1391
+ {
1392
+ "epoch": 2.3343009192065796,
1393
+ "grad_norm": 0.37094831466674805,
1394
+ "learning_rate": 1.2329749103942653e-05,
1395
+ "loss": 1.672,
1396
+ "step": 4825
1397
+ },
1398
+ {
1399
+ "epoch": 2.3463957426221578,
1400
+ "grad_norm": 0.32186359167099,
1401
+ "learning_rate": 1.210573476702509e-05,
1402
+ "loss": 1.2877,
1403
+ "step": 4850
1404
+ },
1405
+ {
1406
+ "epoch": 2.358490566037736,
1407
+ "grad_norm": 54.59464645385742,
1408
+ "learning_rate": 1.1881720430107528e-05,
1409
+ "loss": 1.83,
1410
+ "step": 4875
1411
+ },
1412
+ {
1413
+ "epoch": 2.370585389453314,
1414
+ "grad_norm": 0.36730891466140747,
1415
+ "learning_rate": 1.1657706093189963e-05,
1416
+ "loss": 1.6348,
1417
+ "step": 4900
1418
+ },
1419
+ {
1420
+ "epoch": 2.382680212868892,
1421
+ "grad_norm": 0.23865163326263428,
1422
+ "learning_rate": 1.1433691756272402e-05,
1423
+ "loss": 1.2163,
1424
+ "step": 4925
1425
+ },
1426
+ {
1427
+ "epoch": 2.39477503628447,
1428
+ "grad_norm": 27.761423110961914,
1429
+ "learning_rate": 1.1209677419354839e-05,
1430
+ "loss": 0.821,
1431
+ "step": 4950
1432
+ },
1433
+ {
1434
+ "epoch": 2.4068698597000484,
1435
+ "grad_norm": 0.162750706076622,
1436
+ "learning_rate": 1.0985663082437276e-05,
1437
+ "loss": 0.7309,
1438
+ "step": 4975
1439
+ },
1440
+ {
1441
+ "epoch": 2.4189646831156266,
1442
+ "grad_norm": 25.199321746826172,
1443
+ "learning_rate": 1.0761648745519713e-05,
1444
+ "loss": 0.7073,
1445
+ "step": 5000
1446
+ },
1447
+ {
1448
+ "epoch": 2.431059506531205,
1449
+ "grad_norm": 0.1526053547859192,
1450
+ "learning_rate": 1.0537634408602151e-05,
1451
+ "loss": 1.2978,
1452
+ "step": 5025
1453
+ },
1454
+ {
1455
+ "epoch": 2.4431543299467826,
1456
+ "grad_norm": 0.1368321031332016,
1457
+ "learning_rate": 1.0313620071684588e-05,
1458
+ "loss": 1.6805,
1459
+ "step": 5050
1460
+ },
1461
+ {
1462
+ "epoch": 2.455249153362361,
1463
+ "grad_norm": 0.23694339394569397,
1464
+ "learning_rate": 1.0089605734767025e-05,
1465
+ "loss": 1.2977,
1466
+ "step": 5075
1467
+ },
1468
+ {
1469
+ "epoch": 2.467343976777939,
1470
+ "grad_norm": 0.7356523871421814,
1471
+ "learning_rate": 9.865591397849464e-06,
1472
+ "loss": 1.8933,
1473
+ "step": 5100
1474
+ },
1475
+ {
1476
+ "epoch": 2.4794388001935173,
1477
+ "grad_norm": 0.3766566812992096,
1478
+ "learning_rate": 9.6415770609319e-06,
1479
+ "loss": 1.2868,
1480
+ "step": 5125
1481
+ },
1482
+ {
1483
+ "epoch": 2.4915336236090955,
1484
+ "grad_norm": 0.14354300498962402,
1485
+ "learning_rate": 9.417562724014338e-06,
1486
+ "loss": 1.5716,
1487
+ "step": 5150
1488
+ },
1489
+ {
1490
+ "epoch": 2.5036284470246732,
1491
+ "grad_norm": 0.2801443934440613,
1492
+ "learning_rate": 9.193548387096775e-06,
1493
+ "loss": 1.1805,
1494
+ "step": 5175
1495
+ },
1496
+ {
1497
+ "epoch": 2.5157232704402515,
1498
+ "grad_norm": 26.191604614257812,
1499
+ "learning_rate": 8.969534050179212e-06,
1500
+ "loss": 1.5799,
1501
+ "step": 5200
1502
+ },
1503
+ {
1504
+ "epoch": 2.5278180938558297,
1505
+ "grad_norm": 0.29439038038253784,
1506
+ "learning_rate": 8.745519713261649e-06,
1507
+ "loss": 0.7503,
1508
+ "step": 5225
1509
+ },
1510
+ {
1511
+ "epoch": 2.539912917271408,
1512
+ "grad_norm": 0.25986921787261963,
1513
+ "learning_rate": 8.521505376344087e-06,
1514
+ "loss": 1.5983,
1515
+ "step": 5250
1516
+ },
1517
+ {
1518
+ "epoch": 2.552007740686986,
1519
+ "grad_norm": 0.37119609117507935,
1520
+ "learning_rate": 8.297491039426524e-06,
1521
+ "loss": 1.3133,
1522
+ "step": 5275
1523
+ },
1524
+ {
1525
+ "epoch": 2.564102564102564,
1526
+ "grad_norm": 0.29802656173706055,
1527
+ "learning_rate": 8.073476702508961e-06,
1528
+ "loss": 1.2844,
1529
+ "step": 5300
1530
+ },
1531
+ {
1532
+ "epoch": 2.576197387518142,
1533
+ "grad_norm": 0.22544977068901062,
1534
+ "learning_rate": 7.849462365591398e-06,
1535
+ "loss": 1.5217,
1536
+ "step": 5325
1537
+ },
1538
+ {
1539
+ "epoch": 2.5882922109337203,
1540
+ "grad_norm": 0.2867962718009949,
1541
+ "learning_rate": 7.625448028673836e-06,
1542
+ "loss": 0.637,
1543
+ "step": 5350
1544
+ },
1545
+ {
1546
+ "epoch": 2.6003870343492985,
1547
+ "grad_norm": 0.25557562708854675,
1548
+ "learning_rate": 7.401433691756272e-06,
1549
+ "loss": 1.1052,
1550
+ "step": 5375
1551
+ },
1552
+ {
1553
+ "epoch": 2.6124818577648767,
1554
+ "grad_norm": 25.903554916381836,
1555
+ "learning_rate": 7.177419354838711e-06,
1556
+ "loss": 1.2246,
1557
+ "step": 5400
1558
+ },
1559
+ {
1560
+ "epoch": 2.6245766811804545,
1561
+ "grad_norm": 24.891738891601562,
1562
+ "learning_rate": 6.953405017921147e-06,
1563
+ "loss": 1.3789,
1564
+ "step": 5425
1565
+ },
1566
+ {
1567
+ "epoch": 2.636671504596033,
1568
+ "grad_norm": 0.23630130290985107,
1569
+ "learning_rate": 6.7293906810035845e-06,
1570
+ "loss": 1.1069,
1571
+ "step": 5450
1572
+ },
1573
+ {
1574
+ "epoch": 2.648766328011611,
1575
+ "grad_norm": 0.27478015422821045,
1576
+ "learning_rate": 6.5053763440860214e-06,
1577
+ "loss": 0.5998,
1578
+ "step": 5475
1579
+ },
1580
+ {
1581
+ "epoch": 2.660861151427189,
1582
+ "grad_norm": 25.27565574645996,
1583
+ "learning_rate": 6.281362007168459e-06,
1584
+ "loss": 1.2044,
1585
+ "step": 5500
1586
+ },
1587
+ {
1588
+ "epoch": 2.6729559748427674,
1589
+ "grad_norm": 25.68202781677246,
1590
+ "learning_rate": 6.057347670250896e-06,
1591
+ "loss": 1.0829,
1592
+ "step": 5525
1593
+ },
1594
+ {
1595
+ "epoch": 2.6850507982583456,
1596
+ "grad_norm": 0.17175991833209991,
1597
+ "learning_rate": 5.833333333333334e-06,
1598
+ "loss": 1.8226,
1599
+ "step": 5550
1600
+ },
1601
+ {
1602
+ "epoch": 2.697145621673924,
1603
+ "grad_norm": 0.19636030495166779,
1604
+ "learning_rate": 5.609318996415771e-06,
1605
+ "loss": 0.8312,
1606
+ "step": 5575
1607
+ },
1608
+ {
1609
+ "epoch": 2.7092404450895016,
1610
+ "grad_norm": 0.16146661341190338,
1611
+ "learning_rate": 5.385304659498208e-06,
1612
+ "loss": 0.8461,
1613
+ "step": 5600
1614
+ },
1615
+ {
1616
+ "epoch": 2.72133526850508,
1617
+ "grad_norm": 0.17099538445472717,
1618
+ "learning_rate": 5.161290322580646e-06,
1619
+ "loss": 0.667,
1620
+ "step": 5625
1621
+ },
1622
+ {
1623
+ "epoch": 2.733430091920658,
1624
+ "grad_norm": 0.48382991552352905,
1625
+ "learning_rate": 4.9372759856630825e-06,
1626
+ "loss": 0.6893,
1627
+ "step": 5650
1628
+ },
1629
+ {
1630
+ "epoch": 2.745524915336236,
1631
+ "grad_norm": 25.40802574157715,
1632
+ "learning_rate": 4.7132616487455195e-06,
1633
+ "loss": 1.7727,
1634
+ "step": 5675
1635
+ },
1636
+ {
1637
+ "epoch": 2.7576197387518144,
1638
+ "grad_norm": 45.407470703125,
1639
+ "learning_rate": 4.489247311827957e-06,
1640
+ "loss": 1.7406,
1641
+ "step": 5700
1642
+ },
1643
+ {
1644
+ "epoch": 2.769714562167392,
1645
+ "grad_norm": 0.16706956923007965,
1646
+ "learning_rate": 4.265232974910394e-06,
1647
+ "loss": 1.0508,
1648
+ "step": 5725
1649
+ },
1650
+ {
1651
+ "epoch": 2.7818093855829704,
1652
+ "grad_norm": 0.14837704598903656,
1653
+ "learning_rate": 4.041218637992832e-06,
1654
+ "loss": 0.9122,
1655
+ "step": 5750
1656
+ },
1657
+ {
1658
+ "epoch": 2.7939042089985486,
1659
+ "grad_norm": 0.19028626382350922,
1660
+ "learning_rate": 3.817204301075269e-06,
1661
+ "loss": 0.9756,
1662
+ "step": 5775
1663
+ },
1664
+ {
1665
+ "epoch": 2.805999032414127,
1666
+ "grad_norm": 29.7191162109375,
1667
+ "learning_rate": 3.593189964157706e-06,
1668
+ "loss": 1.4132,
1669
+ "step": 5800
1670
+ },
1671
+ {
1672
+ "epoch": 2.818093855829705,
1673
+ "grad_norm": 25.76336097717285,
1674
+ "learning_rate": 3.3691756272401432e-06,
1675
+ "loss": 1.9683,
1676
+ "step": 5825
1677
+ },
1678
+ {
1679
+ "epoch": 2.830188679245283,
1680
+ "grad_norm": 0.4251428544521332,
1681
+ "learning_rate": 3.1451612903225806e-06,
1682
+ "loss": 0.6718,
1683
+ "step": 5850
1684
+ },
1685
+ {
1686
+ "epoch": 2.842283502660861,
1687
+ "grad_norm": 0.26770126819610596,
1688
+ "learning_rate": 2.921146953405018e-06,
1689
+ "loss": 0.6973,
1690
+ "step": 5875
1691
+ },
1692
+ {
1693
+ "epoch": 2.8543783260764393,
1694
+ "grad_norm": 0.7866289615631104,
1695
+ "learning_rate": 2.6971326164874553e-06,
1696
+ "loss": 0.7933,
1697
+ "step": 5900
1698
+ },
1699
+ {
1700
+ "epoch": 2.8664731494920175,
1701
+ "grad_norm": 46.420658111572266,
1702
+ "learning_rate": 2.4731182795698927e-06,
1703
+ "loss": 2.0253,
1704
+ "step": 5925
1705
+ },
1706
+ {
1707
+ "epoch": 2.8785679729075957,
1708
+ "grad_norm": 0.2426643818616867,
1709
+ "learning_rate": 2.2491039426523296e-06,
1710
+ "loss": 1.0285,
1711
+ "step": 5950
1712
+ },
1713
+ {
1714
+ "epoch": 2.8906627963231735,
1715
+ "grad_norm": 34.91679382324219,
1716
+ "learning_rate": 2.025089605734767e-06,
1717
+ "loss": 1.3188,
1718
+ "step": 5975
1719
+ },
1720
+ {
1721
+ "epoch": 2.9027576197387517,
1722
+ "grad_norm": 0.22075743973255157,
1723
+ "learning_rate": 1.8010752688172043e-06,
1724
+ "loss": 1.6779,
1725
+ "step": 6000
1726
+ },
1727
+ {
1728
+ "epoch": 2.91485244315433,
1729
+ "grad_norm": 25.831626892089844,
1730
+ "learning_rate": 1.577060931899642e-06,
1731
+ "loss": 1.2721,
1732
+ "step": 6025
1733
+ },
1734
+ {
1735
+ "epoch": 2.926947266569908,
1736
+ "grad_norm": 0.1505293846130371,
1737
+ "learning_rate": 1.3530465949820788e-06,
1738
+ "loss": 2.161,
1739
+ "step": 6050
1740
+ },
1741
+ {
1742
+ "epoch": 2.9390420899854863,
1743
+ "grad_norm": 0.18153157830238342,
1744
+ "learning_rate": 1.1290322580645162e-06,
1745
+ "loss": 1.1947,
1746
+ "step": 6075
1747
+ },
1748
+ {
1749
+ "epoch": 2.951136913401064,
1750
+ "grad_norm": 25.609390258789062,
1751
+ "learning_rate": 9.050179211469536e-07,
1752
+ "loss": 0.6296,
1753
+ "step": 6100
1754
+ },
1755
+ {
1756
+ "epoch": 2.9632317368166423,
1757
+ "grad_norm": 0.3544562757015228,
1758
+ "learning_rate": 6.810035842293907e-07,
1759
+ "loss": 1.2337,
1760
+ "step": 6125
1761
+ },
1762
+ {
1763
+ "epoch": 2.9753265602322205,
1764
+ "grad_norm": 25.893449783325195,
1765
+ "learning_rate": 4.5698924731182797e-07,
1766
+ "loss": 1.7994,
1767
+ "step": 6150
1768
+ },
1769
+ {
1770
+ "epoch": 2.9874213836477987,
1771
+ "grad_norm": 1.0282210111618042,
1772
+ "learning_rate": 2.3297491039426527e-07,
1773
+ "loss": 1.2547,
1774
+ "step": 6175
1775
+ },
1776
+ {
1777
+ "epoch": 2.999516207063377,
1778
+ "grad_norm": 0.2006077915430069,
1779
+ "learning_rate": 8.960573476702509e-09,
1780
+ "loss": 0.87,
1781
+ "step": 6200
1782
+ },
1783
+ {
1784
+ "epoch": 3.0,
1785
+ "eval_accuracy": 0.7504835589941973,
1786
+ "eval_f1_macro": 0.5244016249451032,
1787
+ "eval_f1_micro": 0.7504835589941973,
1788
+ "eval_f1_weighted": 0.714761195760481,
1789
+ "eval_loss": 1.307055115699768,
1790
+ "eval_precision_macro": 0.5012229210342417,
1791
+ "eval_precision_micro": 0.7504835589941973,
1792
+ "eval_precision_weighted": 0.6860840439724423,
1793
+ "eval_recall_macro": 0.5532259049014222,
1794
+ "eval_recall_micro": 0.7504835589941973,
1795
+ "eval_recall_weighted": 0.7504835589941973,
1796
+ "eval_runtime": 6673.8059,
1797
+ "eval_samples_per_second": 0.077,
1798
+ "eval_steps_per_second": 0.039,
1799
+ "step": 6201
1800
+ }
1801
+ ],
1802
+ "logging_steps": 25,
1803
+ "max_steps": 6201,
1804
+ "num_input_tokens_seen": 0,
1805
+ "num_train_epochs": 3,
1806
+ "save_steps": 500,
1807
+ "stateful_callbacks": {
1808
+ "EarlyStoppingCallback": {
1809
+ "args": {
1810
+ "early_stopping_patience": 5,
1811
+ "early_stopping_threshold": 0.01
1812
+ },
1813
+ "attributes": {
1814
+ "early_stopping_patience_counter": 0
1815
+ }
1816
+ },
1817
+ "TrainerControl": {
1818
+ "args": {
1819
+ "should_epoch_stop": false,
1820
+ "should_evaluate": false,
1821
+ "should_log": false,
1822
+ "should_save": true,
1823
+ "should_training_stop": true
1824
+ },
1825
+ "attributes": {}
1826
+ }
1827
+ },
1828
+ "total_flos": 1.6292664567668736e+16,
1829
+ "train_batch_size": 1,
1830
+ "trial_name": null,
1831
+ "trial_params": null
1832
+ }
checkpoint-6201/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab6e4694c69fe840eda2a160ba8d0e45613204efc679c3084b6aa54b4cd418c
3
+ size 5240
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/longformer-base-4096",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "LongformerForSequenceClassification"
6
+ ],
7
+ "attention_mode": "longformer",
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "attention_window": [
10
+ 512,
11
+ 512,
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512
22
+ ],
23
+ "bos_token_id": 0,
24
+ "eos_token_id": 2,
25
+ "gradient_checkpointing": false,
26
+ "hidden_act": "gelu",
27
+ "hidden_dropout_prob": 0.1,
28
+ "hidden_size": 768,
29
+ "id2label": {
30
+ "0": "negative",
31
+ "1": "neutral",
32
+ "2": "positive"
33
+ },
34
+ "ignore_attention_mask": false,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 3072,
37
+ "label2id": {
38
+ "negative": 0,
39
+ "neutral": 1,
40
+ "positive": 2
41
+ },
42
+ "layer_norm_eps": 1e-05,
43
+ "max_position_embeddings": 4098,
44
+ "model_type": "longformer",
45
+ "num_attention_heads": 12,
46
+ "num_hidden_layers": 12,
47
+ "onnx_export": false,
48
+ "pad_token_id": 1,
49
+ "problem_type": "single_label_classification",
50
+ "sep_token_id": 2,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.45.0",
53
+ "type_vocab_size": 1,
54
+ "vocab_size": 50265
55
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322dae133d3ce467a820df6105d8c7973c40d913fb2ce1117801d5c7b57c139f
3
+ size 594681260
runs/Nov01_08-49-04_r-kafikani-longformer-va66dsc0-0e056-fjgin/events.out.tfevents.1730450948.r-kafikani-longformer-va66dsc0-0e056-fjgin.465.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5e51598b89ad0ea04ec1934e8c888cb1bcd479588893d4ba3ee21ab8784db3e
3
- size 59074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfdf7e559ab1ee1a97c3a4c242ada4f77098d01eb43f82c4493442ef596eef4a
3
+ size 60472
runs/Nov01_08-49-04_r-kafikani-longformer-va66dsc0-0e056-fjgin/events.out.tfevents.1730870444.r-kafikani-longformer-va66dsc0-0e056-fjgin.465.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232870b490734f0c789391c0d5c8c5bdeca5087a951488bf94a57efb56d76b58
3
+ size 921
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "LongformerTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": "<unk>"
57
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab6e4694c69fe840eda2a160ba8d0e45613204efc679c3084b6aa54b4cd418c
3
+ size 5240
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-iinjh-0wh75/autotrain-data",
3
+ "model": "allenai/longformer-base-4096",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 4096,
7
+ "batch_size": 1,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-iinjh-0wh75",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "kafikani",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff