File size: 10,985 Bytes
603d94f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 20.0,
  "eval_steps": 500,
  "global_step": 2120,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 4.331399440765381,
      "learning_rate": 4.75e-05,
      "loss": 0.3182,
      "step": 106
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.9571745015998031,
      "eval_f1": 0.7968127490039841,
      "eval_loss": 0.12841814756393433,
      "eval_precision": 0.746268656716418,
      "eval_recall": 0.8547008547008547,
      "eval_runtime": 0.966,
      "eval_samples_per_second": 193.575,
      "eval_steps_per_second": 3.105,
      "step": 106
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.366861581802368,
      "learning_rate": 4.5e-05,
      "loss": 0.1137,
      "step": 212
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.9561900073837066,
      "eval_f1": 0.7927927927927929,
      "eval_loss": 0.13024382293224335,
      "eval_precision": 0.7230046948356808,
      "eval_recall": 0.8774928774928775,
      "eval_runtime": 0.9793,
      "eval_samples_per_second": 190.957,
      "eval_steps_per_second": 3.063,
      "step": 212
    },
    {
      "epoch": 3.0,
      "grad_norm": 1.3602290153503418,
      "learning_rate": 4.25e-05,
      "loss": 0.0683,
      "step": 318
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.9606202313561408,
      "eval_f1": 0.8174386920980926,
      "eval_loss": 0.12490106374025345,
      "eval_precision": 0.783289817232376,
      "eval_recall": 0.8547008547008547,
      "eval_runtime": 0.9992,
      "eval_samples_per_second": 187.145,
      "eval_steps_per_second": 3.002,
      "step": 318
    },
    {
      "epoch": 4.0,
      "grad_norm": 0.569642961025238,
      "learning_rate": 4e-05,
      "loss": 0.0454,
      "step": 424
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.9591434900319961,
      "eval_f1": 0.8233731739707835,
      "eval_loss": 0.1463680863380432,
      "eval_precision": 0.7711442786069652,
      "eval_recall": 0.8831908831908832,
      "eval_runtime": 0.9458,
      "eval_samples_per_second": 197.714,
      "eval_steps_per_second": 3.172,
      "step": 424
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.9764404892921448,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 0.0325,
      "step": 530
    },
    {
      "epoch": 5.0,
      "eval_accuracy": 0.9640659611124784,
      "eval_f1": 0.8401084010840107,
      "eval_loss": 0.15568402409553528,
      "eval_precision": 0.8010335917312662,
      "eval_recall": 0.8831908831908832,
      "eval_runtime": 0.9702,
      "eval_samples_per_second": 192.747,
      "eval_steps_per_second": 3.092,
      "step": 530
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.7467350363731384,
      "learning_rate": 3.5e-05,
      "loss": 0.0211,
      "step": 636
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.9598818606940684,
      "eval_f1": 0.8411214953271028,
      "eval_loss": 0.2112356424331665,
      "eval_precision": 0.7914572864321608,
      "eval_recall": 0.8974358974358975,
      "eval_runtime": 0.9492,
      "eval_samples_per_second": 196.999,
      "eval_steps_per_second": 3.16,
      "step": 636
    },
    {
      "epoch": 7.0,
      "grad_norm": 2.3930013179779053,
      "learning_rate": 3.2500000000000004e-05,
      "loss": 0.015,
      "step": 742
    },
    {
      "epoch": 7.0,
      "eval_accuracy": 0.9606202313561408,
      "eval_f1": 0.8295904887714664,
      "eval_loss": 0.19438758492469788,
      "eval_precision": 0.7733990147783252,
      "eval_recall": 0.8945868945868946,
      "eval_runtime": 0.9407,
      "eval_samples_per_second": 198.787,
      "eval_steps_per_second": 3.189,
      "step": 742
    },
    {
      "epoch": 8.0,
      "grad_norm": 0.5287325978279114,
      "learning_rate": 3e-05,
      "loss": 0.0113,
      "step": 848
    },
    {
      "epoch": 8.0,
      "eval_accuracy": 0.9665271966527197,
      "eval_f1": 0.8536585365853658,
      "eval_loss": 0.21514002978801727,
      "eval_precision": 0.813953488372093,
      "eval_recall": 0.8974358974358975,
      "eval_runtime": 0.9327,
      "eval_samples_per_second": 200.498,
      "eval_steps_per_second": 3.217,
      "step": 848
    },
    {
      "epoch": 9.0,
      "grad_norm": 0.272814005613327,
      "learning_rate": 2.7500000000000004e-05,
      "loss": 0.0075,
      "step": 954
    },
    {
      "epoch": 9.0,
      "eval_accuracy": 0.9684961850849126,
      "eval_f1": 0.8536585365853658,
      "eval_loss": 0.199550062417984,
      "eval_precision": 0.813953488372093,
      "eval_recall": 0.8974358974358975,
      "eval_runtime": 0.9509,
      "eval_samples_per_second": 196.649,
      "eval_steps_per_second": 3.155,
      "step": 954
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.9517960548400879,
      "learning_rate": 2.5e-05,
      "loss": 0.0067,
      "step": 1060
    },
    {
      "epoch": 10.0,
      "eval_accuracy": 0.9684961850849126,
      "eval_f1": 0.8647140864714087,
      "eval_loss": 0.2077295184135437,
      "eval_precision": 0.8469945355191257,
      "eval_recall": 0.8831908831908832,
      "eval_runtime": 1.0115,
      "eval_samples_per_second": 184.866,
      "eval_steps_per_second": 2.966,
      "step": 1060
    },
    {
      "epoch": 11.0,
      "grad_norm": 0.23543691635131836,
      "learning_rate": 2.25e-05,
      "loss": 0.0039,
      "step": 1166
    },
    {
      "epoch": 11.0,
      "eval_accuracy": 0.9579128722618755,
      "eval_f1": 0.823841059602649,
      "eval_loss": 0.2609161138534546,
      "eval_precision": 0.7698019801980198,
      "eval_recall": 0.886039886039886,
      "eval_runtime": 0.9537,
      "eval_samples_per_second": 196.081,
      "eval_steps_per_second": 3.146,
      "step": 1166
    },
    {
      "epoch": 12.0,
      "grad_norm": 0.22459489107131958,
      "learning_rate": 2e-05,
      "loss": 0.0028,
      "step": 1272
    },
    {
      "epoch": 12.0,
      "eval_accuracy": 0.9648043317745508,
      "eval_f1": 0.8590971272229823,
      "eval_loss": 0.24980689585208893,
      "eval_precision": 0.8263157894736842,
      "eval_recall": 0.8945868945868946,
      "eval_runtime": 0.9427,
      "eval_samples_per_second": 198.361,
      "eval_steps_per_second": 3.182,
      "step": 1272
    },
    {
      "epoch": 13.0,
      "grad_norm": 2.4156136512756348,
      "learning_rate": 1.75e-05,
      "loss": 0.0035,
      "step": 1378
    },
    {
      "epoch": 13.0,
      "eval_accuracy": 0.9643120846665025,
      "eval_f1": 0.8493150684931507,
      "eval_loss": 0.24070732295513153,
      "eval_precision": 0.8179419525065963,
      "eval_recall": 0.8831908831908832,
      "eval_runtime": 0.9468,
      "eval_samples_per_second": 197.508,
      "eval_steps_per_second": 3.169,
      "step": 1378
    },
    {
      "epoch": 14.0,
      "grad_norm": 0.004436641000211239,
      "learning_rate": 1.5e-05,
      "loss": 0.003,
      "step": 1484
    },
    {
      "epoch": 14.0,
      "eval_accuracy": 0.963081466896382,
      "eval_f1": 0.8375838926174497,
      "eval_loss": 0.2474687099456787,
      "eval_precision": 0.7918781725888325,
      "eval_recall": 0.8888888888888888,
      "eval_runtime": 0.9445,
      "eval_samples_per_second": 197.991,
      "eval_steps_per_second": 3.176,
      "step": 1484
    },
    {
      "epoch": 15.0,
      "grad_norm": 0.024274416267871857,
      "learning_rate": 1.25e-05,
      "loss": 0.0016,
      "step": 1590
    },
    {
      "epoch": 15.0,
      "eval_accuracy": 0.9640659611124784,
      "eval_f1": 0.8445040214477211,
      "eval_loss": 0.25520774722099304,
      "eval_precision": 0.7974683544303798,
      "eval_recall": 0.8974358974358975,
      "eval_runtime": 0.9554,
      "eval_samples_per_second": 195.735,
      "eval_steps_per_second": 3.14,
      "step": 1590
    },
    {
      "epoch": 16.0,
      "grad_norm": 0.18013718724250793,
      "learning_rate": 1e-05,
      "loss": 0.0016,
      "step": 1696
    },
    {
      "epoch": 16.0,
      "eval_accuracy": 0.9665271966527197,
      "eval_f1": 0.8606557377049181,
      "eval_loss": 0.24627996981143951,
      "eval_precision": 0.8267716535433071,
      "eval_recall": 0.8974358974358975,
      "eval_runtime": 0.9538,
      "eval_samples_per_second": 196.059,
      "eval_steps_per_second": 3.145,
      "step": 1696
    },
    {
      "epoch": 17.0,
      "grad_norm": 0.005411619320511818,
      "learning_rate": 7.5e-06,
      "loss": 0.0012,
      "step": 1802
    },
    {
      "epoch": 17.0,
      "eval_accuracy": 0.9665271966527197,
      "eval_f1": 0.861072902338377,
      "eval_loss": 0.25002872943878174,
      "eval_precision": 0.8324468085106383,
      "eval_recall": 0.8917378917378918,
      "eval_runtime": 0.9536,
      "eval_samples_per_second": 196.089,
      "eval_steps_per_second": 3.146,
      "step": 1802
    },
    {
      "epoch": 18.0,
      "grad_norm": 0.02285461686551571,
      "learning_rate": 5e-06,
      "loss": 0.0009,
      "step": 1908
    },
    {
      "epoch": 18.0,
      "eval_accuracy": 0.965296578882599,
      "eval_f1": 0.8586956521739131,
      "eval_loss": 0.26289603114128113,
      "eval_precision": 0.8207792207792208,
      "eval_recall": 0.9002849002849003,
      "eval_runtime": 0.9478,
      "eval_samples_per_second": 197.295,
      "eval_steps_per_second": 3.165,
      "step": 1908
    },
    {
      "epoch": 19.0,
      "grad_norm": 0.004692568443715572,
      "learning_rate": 2.5e-06,
      "loss": 0.0014,
      "step": 2014
    },
    {
      "epoch": 19.0,
      "eval_accuracy": 0.9650504553285749,
      "eval_f1": 0.8559782608695653,
      "eval_loss": 0.26193881034851074,
      "eval_precision": 0.8181818181818182,
      "eval_recall": 0.8974358974358975,
      "eval_runtime": 0.9587,
      "eval_samples_per_second": 195.056,
      "eval_steps_per_second": 3.129,
      "step": 2014
    },
    {
      "epoch": 20.0,
      "grad_norm": 0.07399484515190125,
      "learning_rate": 0.0,
      "loss": 0.0006,
      "step": 2120
    },
    {
      "epoch": 20.0,
      "eval_accuracy": 0.9650504553285749,
      "eval_f1": 0.8555858310626703,
      "eval_loss": 0.2606537342071533,
      "eval_precision": 0.8198433420365535,
      "eval_recall": 0.8945868945868946,
      "eval_runtime": 0.958,
      "eval_samples_per_second": 195.202,
      "eval_steps_per_second": 3.132,
      "step": 2120
    },
    {
      "epoch": 20.0,
      "step": 2120,
      "total_flos": 903894941334000.0,
      "train_loss": 0.03300303453052381,
      "train_runtime": 513.1215,
      "train_samples_per_second": 65.793,
      "train_steps_per_second": 4.132
    }
  ],
  "logging_steps": 500,
  "max_steps": 2120,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20,
  "save_steps": 500,
  "total_flos": 903894941334000.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}