diogopaes10 commited on
Commit
630af50
β€’
1 Parent(s): 26d0af3

Training in progress, step 2000

Browse files
{checkpoint-1000 β†’ checkpoint-2000}/config.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-2000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d32e8828d1b1ae30530b7473d552ff9aa020e0b2f239c8b8134dfe979d15c0b
3
  size 267028677
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09552910dd7d5ed2eb865c6b30253726e3dcd7aed3588cc97c4702b013630f01
3
  size 267028677
{checkpoint-1000 β†’ checkpoint-2000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c366631ade427ba5a8dc8df9ca9f8d8f41caf8fad5eb2ca4c35c6fc3113ee13f
3
  size 133523761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294d34cfcdd9a17427a0a09cdf19e071c82e8ce59c34f4fe159d3c5118d1082d
3
  size 133523761
{checkpoint-1000 β†’ checkpoint-2000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46f76c8bed144eb0462eae7b352d4482d3ae98f3e4263329c2f739c21b5dd5ca
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f48f6150f5133b116ca71b75680965f3b45ab43582cfcc6566086e18896784f
3
  size 14575
{checkpoint-1000 β†’ checkpoint-2000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13828e1677406f24ffee2b52c5ac79b462a76afddafb014a11a21dd7429637dd
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad0c62478b179935d131a5f36054eef5c951ef7612bbd788e7a63be9d94d8ef
3
  size 627
{checkpoint-1000 β†’ checkpoint-2000}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-2000}/tokenizer.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-2000}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-2000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -220,11 +220,219 @@
220
  "eval_system_ram_total": 83.48074722290039,
221
  "eval_system_ram_used": 3.8976211547851562,
222
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  }
224
  ],
225
  "max_steps": 2500,
226
  "num_train_epochs": 10,
227
- "total_flos": 504019356005760.0,
228
  "trial_name": null,
229
  "trial_params": null
230
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
220
  "eval_system_ram_total": 83.48074722290039,
221
  "eval_system_ram_used": 3.8976211547851562,
222
  "step": 1000
223
+ },
224
+ {
225
+ "epoch": 4.5,
226
+ "learning_rate": 1.1008e-05,
227
+ "loss": 0.8512,
228
+ "step": 1125
229
+ },
230
+ {
231
+ "epoch": 4.5,
232
+ "eval_accuracy": 0.6915,
233
+ "eval_disk_space_total": 78.1898422241211,
234
+ "eval_disk_space_used": 25.830577850341797,
235
+ "eval_f1": 0.6913447845710189,
236
+ "eval_gpu_ram_allocated": 0.3996849060058594,
237
+ "eval_gpu_ram_cached": 12.9296875,
238
+ "eval_gpu_ram_total": 39.56402587890625,
239
+ "eval_gpu_utilization": 28,
240
+ "eval_loss": 1.0628596544265747,
241
+ "eval_precision": 0.6944500140220231,
242
+ "eval_recall": 0.6915,
243
+ "eval_runtime": 1.5986,
244
+ "eval_samples_per_second": 1251.121,
245
+ "eval_steps_per_second": 39.41,
246
+ "eval_system_ram_total": 83.48074722290039,
247
+ "eval_system_ram_used": 4.0599517822265625,
248
+ "step": 1125
249
+ },
250
+ {
251
+ "epoch": 5.0,
252
+ "learning_rate": 1.0008e-05,
253
+ "loss": 0.8121,
254
+ "step": 1250
255
+ },
256
+ {
257
+ "epoch": 5.0,
258
+ "eval_accuracy": 0.691,
259
+ "eval_disk_space_total": 78.1898422241211,
260
+ "eval_disk_space_used": 25.830596923828125,
261
+ "eval_f1": 0.6838005278906992,
262
+ "eval_gpu_ram_allocated": 0.39963626861572266,
263
+ "eval_gpu_ram_cached": 12.9296875,
264
+ "eval_gpu_ram_total": 39.56402587890625,
265
+ "eval_gpu_utilization": 31,
266
+ "eval_loss": 1.0575964450836182,
267
+ "eval_precision": 0.6905089592227732,
268
+ "eval_recall": 0.691,
269
+ "eval_runtime": 1.5391,
270
+ "eval_samples_per_second": 1299.447,
271
+ "eval_steps_per_second": 40.933,
272
+ "eval_system_ram_total": 83.48074722290039,
273
+ "eval_system_ram_used": 4.043186187744141,
274
+ "step": 1250
275
+ },
276
+ {
277
+ "epoch": 5.5,
278
+ "learning_rate": 9.008e-06,
279
+ "loss": 0.7733,
280
+ "step": 1375
281
+ },
282
+ {
283
+ "epoch": 5.5,
284
+ "eval_accuracy": 0.6805,
285
+ "eval_disk_space_total": 78.1898422241211,
286
+ "eval_disk_space_used": 25.83068084716797,
287
+ "eval_f1": 0.6773653982197514,
288
+ "eval_gpu_ram_allocated": 0.3996090888977051,
289
+ "eval_gpu_ram_cached": 12.9296875,
290
+ "eval_gpu_ram_total": 39.56402587890625,
291
+ "eval_gpu_utilization": 28,
292
+ "eval_loss": 1.0597690343856812,
293
+ "eval_precision": 0.683800229871734,
294
+ "eval_recall": 0.6805,
295
+ "eval_runtime": 1.5511,
296
+ "eval_samples_per_second": 1289.373,
297
+ "eval_steps_per_second": 40.615,
298
+ "eval_system_ram_total": 83.48074722290039,
299
+ "eval_system_ram_used": 3.837909698486328,
300
+ "step": 1375
301
+ },
302
+ {
303
+ "epoch": 6.0,
304
+ "learning_rate": 8.008e-06,
305
+ "loss": 0.7431,
306
+ "step": 1500
307
+ },
308
+ {
309
+ "epoch": 6.0,
310
+ "eval_accuracy": 0.702,
311
+ "eval_disk_space_total": 78.1898422241211,
312
+ "eval_disk_space_used": 25.830699920654297,
313
+ "eval_f1": 0.6973584149072398,
314
+ "eval_gpu_ram_allocated": 0.39962053298950195,
315
+ "eval_gpu_ram_cached": 12.9296875,
316
+ "eval_gpu_ram_total": 39.56402587890625,
317
+ "eval_gpu_utilization": 31,
318
+ "eval_loss": 1.0375711917877197,
319
+ "eval_precision": 0.6975777506719404,
320
+ "eval_recall": 0.702,
321
+ "eval_runtime": 1.5383,
322
+ "eval_samples_per_second": 1300.155,
323
+ "eval_steps_per_second": 40.955,
324
+ "eval_system_ram_total": 83.48074722290039,
325
+ "eval_system_ram_used": 3.8545989990234375,
326
+ "step": 1500
327
+ },
328
+ {
329
+ "epoch": 6.5,
330
+ "learning_rate": 7.0080000000000005e-06,
331
+ "loss": 0.7065,
332
+ "step": 1625
333
+ },
334
+ {
335
+ "epoch": 6.5,
336
+ "eval_accuracy": 0.6995,
337
+ "eval_disk_space_total": 78.1898422241211,
338
+ "eval_disk_space_used": 26.203968048095703,
339
+ "eval_f1": 0.6990180197515704,
340
+ "eval_gpu_ram_allocated": 0.39959049224853516,
341
+ "eval_gpu_ram_cached": 12.9296875,
342
+ "eval_gpu_ram_total": 39.56402587890625,
343
+ "eval_gpu_utilization": 28,
344
+ "eval_loss": 1.0457407236099243,
345
+ "eval_precision": 0.701399008937391,
346
+ "eval_recall": 0.6995,
347
+ "eval_runtime": 1.5796,
348
+ "eval_samples_per_second": 1266.127,
349
+ "eval_steps_per_second": 39.883,
350
+ "eval_system_ram_total": 83.48074722290039,
351
+ "eval_system_ram_used": 4.033943176269531,
352
+ "step": 1625
353
+ },
354
+ {
355
+ "epoch": 7.0,
356
+ "learning_rate": 6.008000000000001e-06,
357
+ "loss": 0.671,
358
+ "step": 1750
359
+ },
360
+ {
361
+ "epoch": 7.0,
362
+ "eval_accuracy": 0.698,
363
+ "eval_disk_space_total": 78.1898422241211,
364
+ "eval_disk_space_used": 26.203968048095703,
365
+ "eval_f1": 0.6956310421863513,
366
+ "eval_gpu_ram_allocated": 0.39962339401245117,
367
+ "eval_gpu_ram_cached": 12.9296875,
368
+ "eval_gpu_ram_total": 39.56402587890625,
369
+ "eval_gpu_utilization": 28,
370
+ "eval_loss": 1.039590835571289,
371
+ "eval_precision": 0.696566876815876,
372
+ "eval_recall": 0.698,
373
+ "eval_runtime": 1.5301,
374
+ "eval_samples_per_second": 1307.136,
375
+ "eval_steps_per_second": 41.175,
376
+ "eval_system_ram_total": 83.48074722290039,
377
+ "eval_system_ram_used": 4.038356781005859,
378
+ "step": 1750
379
+ },
380
+ {
381
+ "epoch": 7.5,
382
+ "learning_rate": 5.008000000000001e-06,
383
+ "loss": 0.6438,
384
+ "step": 1875
385
+ },
386
+ {
387
+ "epoch": 7.5,
388
+ "eval_accuracy": 0.6925,
389
+ "eval_disk_space_total": 78.1898422241211,
390
+ "eval_disk_space_used": 26.203964233398438,
391
+ "eval_f1": 0.6886960392924729,
392
+ "eval_gpu_ram_allocated": 0.3995976448059082,
393
+ "eval_gpu_ram_cached": 12.9296875,
394
+ "eval_gpu_ram_total": 39.56402587890625,
395
+ "eval_gpu_utilization": 28,
396
+ "eval_loss": 1.0473765134811401,
397
+ "eval_precision": 0.6906747949914751,
398
+ "eval_recall": 0.6925,
399
+ "eval_runtime": 1.5569,
400
+ "eval_samples_per_second": 1284.611,
401
+ "eval_steps_per_second": 40.465,
402
+ "eval_system_ram_total": 83.48074722290039,
403
+ "eval_system_ram_used": 3.827362060546875,
404
+ "step": 1875
405
+ },
406
+ {
407
+ "epoch": 8.0,
408
+ "learning_rate": 4.008e-06,
409
+ "loss": 0.6326,
410
+ "step": 2000
411
+ },
412
+ {
413
+ "epoch": 8.0,
414
+ "eval_accuracy": 0.698,
415
+ "eval_disk_space_total": 78.1898422241211,
416
+ "eval_disk_space_used": 26.204063415527344,
417
+ "eval_f1": 0.6972467602684949,
418
+ "eval_gpu_ram_allocated": 0.39958906173706055,
419
+ "eval_gpu_ram_cached": 12.9296875,
420
+ "eval_gpu_ram_total": 39.56402587890625,
421
+ "eval_gpu_utilization": 34,
422
+ "eval_loss": 1.03837251663208,
423
+ "eval_precision": 0.6982790583069545,
424
+ "eval_recall": 0.698,
425
+ "eval_runtime": 1.5106,
426
+ "eval_samples_per_second": 1323.952,
427
+ "eval_steps_per_second": 41.704,
428
+ "eval_system_ram_total": 83.48074722290039,
429
+ "eval_system_ram_used": 3.8401947021484375,
430
+ "step": 2000
431
  }
432
  ],
433
  "max_steps": 2500,
434
  "num_train_epochs": 10,
435
+ "total_flos": 1010109883418496.0,
436
  "trial_name": null,
437
  "trial_params": null
438
  }
{checkpoint-1000 β†’ checkpoint-2000}/training_args.bin RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-2000}/vocab.txt RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f999c13d7f0a2621e532de7860c6e7dc21666552e25f1fbe53bc03deab0da720
3
  size 133523761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294d34cfcdd9a17427a0a09cdf19e071c82e8ce59c34f4fe159d3c5118d1082d
3
  size 133523761
runs/Jul23_00-54-01_c587bebf8fcc/events.out.tfevents.1690073651.c587bebf8fcc.2239.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ceb3666a0265e2a6fc0d5ffc3b2dd8c1c85a7f4686342050f7bcf179e6a213fd
3
- size 18081
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a87e472813cf54d514cc70f3dcc6d715385ff7ad568595a73f287577b33a094
3
+ size 22489