diogopaes10 commited on
Commit
b94cb66
β€’
1 Parent(s): 630af50

Training in progress, step 2500

Browse files
{checkpoint-1500 β†’ checkpoint-2500}/config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab1734c54c793eae65a52769f5766f6b16b6f36390c14dc1739b4234a106ff18
3
  size 267028677
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27059fba75966ca72a03e9cc7b0567389190ed68f16253b9591168a0cb597cc
3
  size 267028677
{checkpoint-1500 β†’ checkpoint-2500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f999c13d7f0a2621e532de7860c6e7dc21666552e25f1fbe53bc03deab0da720
3
  size 133523761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16a1346b21837ee13d52ba582c706fb96643d039a8ec745ade06a4e78deb60b
3
  size 133523761
{checkpoint-1500 β†’ checkpoint-2500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29a5152fbf9e36140a3cd5aa7c29061d9d557b4711f36df08bb03836ef9ea515
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aef4ff96e068791b9d58ae396a6507bbeb779c898b52e30a356343311eae07b5
3
  size 14575
{checkpoint-1500 β†’ checkpoint-2500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e32f545d6a13edab9664803abf0214c94c6788b08b3d1c916e39481a4cd4f0c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e243256e5ce9269604f12fd0aa4d7ccba676cc02c66217d98097b0986fa106b8
3
  size 627
{checkpoint-1500 β†’ checkpoint-2500}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2500}/tokenizer.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2500}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2500}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.0,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -324,11 +324,219 @@
324
  "eval_system_ram_total": 83.48074722290039,
325
  "eval_system_ram_used": 3.8545989990234375,
326
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  }
328
  ],
329
  "max_steps": 2500,
330
  "num_train_epochs": 10,
331
- "total_flos": 756994619883264.0,
332
  "trial_name": null,
333
  "trial_params": null
334
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 2500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
324
  "eval_system_ram_total": 83.48074722290039,
325
  "eval_system_ram_used": 3.8545989990234375,
326
  "step": 1500
327
+ },
328
+ {
329
+ "epoch": 6.5,
330
+ "learning_rate": 7.0080000000000005e-06,
331
+ "loss": 0.7065,
332
+ "step": 1625
333
+ },
334
+ {
335
+ "epoch": 6.5,
336
+ "eval_accuracy": 0.6995,
337
+ "eval_disk_space_total": 78.1898422241211,
338
+ "eval_disk_space_used": 26.203968048095703,
339
+ "eval_f1": 0.6990180197515704,
340
+ "eval_gpu_ram_allocated": 0.39959049224853516,
341
+ "eval_gpu_ram_cached": 12.9296875,
342
+ "eval_gpu_ram_total": 39.56402587890625,
343
+ "eval_gpu_utilization": 28,
344
+ "eval_loss": 1.0457407236099243,
345
+ "eval_precision": 0.701399008937391,
346
+ "eval_recall": 0.6995,
347
+ "eval_runtime": 1.5796,
348
+ "eval_samples_per_second": 1266.127,
349
+ "eval_steps_per_second": 39.883,
350
+ "eval_system_ram_total": 83.48074722290039,
351
+ "eval_system_ram_used": 4.033943176269531,
352
+ "step": 1625
353
+ },
354
+ {
355
+ "epoch": 7.0,
356
+ "learning_rate": 6.008000000000001e-06,
357
+ "loss": 0.671,
358
+ "step": 1750
359
+ },
360
+ {
361
+ "epoch": 7.0,
362
+ "eval_accuracy": 0.698,
363
+ "eval_disk_space_total": 78.1898422241211,
364
+ "eval_disk_space_used": 26.203968048095703,
365
+ "eval_f1": 0.6956310421863513,
366
+ "eval_gpu_ram_allocated": 0.39962339401245117,
367
+ "eval_gpu_ram_cached": 12.9296875,
368
+ "eval_gpu_ram_total": 39.56402587890625,
369
+ "eval_gpu_utilization": 28,
370
+ "eval_loss": 1.039590835571289,
371
+ "eval_precision": 0.696566876815876,
372
+ "eval_recall": 0.698,
373
+ "eval_runtime": 1.5301,
374
+ "eval_samples_per_second": 1307.136,
375
+ "eval_steps_per_second": 41.175,
376
+ "eval_system_ram_total": 83.48074722290039,
377
+ "eval_system_ram_used": 4.038356781005859,
378
+ "step": 1750
379
+ },
380
+ {
381
+ "epoch": 7.5,
382
+ "learning_rate": 5.008000000000001e-06,
383
+ "loss": 0.6438,
384
+ "step": 1875
385
+ },
386
+ {
387
+ "epoch": 7.5,
388
+ "eval_accuracy": 0.6925,
389
+ "eval_disk_space_total": 78.1898422241211,
390
+ "eval_disk_space_used": 26.203964233398438,
391
+ "eval_f1": 0.6886960392924729,
392
+ "eval_gpu_ram_allocated": 0.3995976448059082,
393
+ "eval_gpu_ram_cached": 12.9296875,
394
+ "eval_gpu_ram_total": 39.56402587890625,
395
+ "eval_gpu_utilization": 28,
396
+ "eval_loss": 1.0473765134811401,
397
+ "eval_precision": 0.6906747949914751,
398
+ "eval_recall": 0.6925,
399
+ "eval_runtime": 1.5569,
400
+ "eval_samples_per_second": 1284.611,
401
+ "eval_steps_per_second": 40.465,
402
+ "eval_system_ram_total": 83.48074722290039,
403
+ "eval_system_ram_used": 3.827362060546875,
404
+ "step": 1875
405
+ },
406
+ {
407
+ "epoch": 8.0,
408
+ "learning_rate": 4.008e-06,
409
+ "loss": 0.6326,
410
+ "step": 2000
411
+ },
412
+ {
413
+ "epoch": 8.0,
414
+ "eval_accuracy": 0.698,
415
+ "eval_disk_space_total": 78.1898422241211,
416
+ "eval_disk_space_used": 26.204063415527344,
417
+ "eval_f1": 0.6972467602684949,
418
+ "eval_gpu_ram_allocated": 0.39958906173706055,
419
+ "eval_gpu_ram_cached": 12.9296875,
420
+ "eval_gpu_ram_total": 39.56402587890625,
421
+ "eval_gpu_utilization": 34,
422
+ "eval_loss": 1.03837251663208,
423
+ "eval_precision": 0.6982790583069545,
424
+ "eval_recall": 0.698,
425
+ "eval_runtime": 1.5106,
426
+ "eval_samples_per_second": 1323.952,
427
+ "eval_steps_per_second": 41.704,
428
+ "eval_system_ram_total": 83.48074722290039,
429
+ "eval_system_ram_used": 3.8401947021484375,
430
+ "step": 2000
431
+ },
432
+ {
433
+ "epoch": 8.5,
434
+ "learning_rate": 3.0080000000000003e-06,
435
+ "loss": 0.6121,
436
+ "step": 2125
437
+ },
438
+ {
439
+ "epoch": 8.5,
440
+ "eval_accuracy": 0.698,
441
+ "eval_disk_space_total": 78.1898422241211,
442
+ "eval_disk_space_used": 26.20423126220703,
443
+ "eval_f1": 0.6962705560782918,
444
+ "eval_gpu_ram_allocated": 0.39959049224853516,
445
+ "eval_gpu_ram_cached": 12.9296875,
446
+ "eval_gpu_ram_total": 39.56402587890625,
447
+ "eval_gpu_utilization": 29,
448
+ "eval_loss": 1.0439949035644531,
449
+ "eval_precision": 0.6975979381837488,
450
+ "eval_recall": 0.698,
451
+ "eval_runtime": 1.5616,
452
+ "eval_samples_per_second": 1280.706,
453
+ "eval_steps_per_second": 40.342,
454
+ "eval_system_ram_total": 83.48074722290039,
455
+ "eval_system_ram_used": 4.016208648681641,
456
+ "step": 2125
457
+ },
458
+ {
459
+ "epoch": 9.0,
460
+ "learning_rate": 2.008e-06,
461
+ "loss": 0.5911,
462
+ "step": 2250
463
+ },
464
+ {
465
+ "epoch": 9.0,
466
+ "eval_accuracy": 0.701,
467
+ "eval_disk_space_total": 78.1898422241211,
468
+ "eval_disk_space_used": 26.204296112060547,
469
+ "eval_f1": 0.6995247290600446,
470
+ "eval_gpu_ram_allocated": 0.3996105194091797,
471
+ "eval_gpu_ram_cached": 12.9296875,
472
+ "eval_gpu_ram_total": 39.56402587890625,
473
+ "eval_gpu_utilization": 28,
474
+ "eval_loss": 1.0518174171447754,
475
+ "eval_precision": 0.7006323341620143,
476
+ "eval_recall": 0.701,
477
+ "eval_runtime": 1.5659,
478
+ "eval_samples_per_second": 1277.26,
479
+ "eval_steps_per_second": 40.234,
480
+ "eval_system_ram_total": 83.48074722290039,
481
+ "eval_system_ram_used": 4.033794403076172,
482
+ "step": 2250
483
+ },
484
+ {
485
+ "epoch": 9.5,
486
+ "learning_rate": 1.0080000000000001e-06,
487
+ "loss": 0.592,
488
+ "step": 2375
489
+ },
490
+ {
491
+ "epoch": 9.5,
492
+ "eval_accuracy": 0.7035,
493
+ "eval_disk_space_total": 78.1898422241211,
494
+ "eval_disk_space_used": 26.20429229736328,
495
+ "eval_f1": 0.7022917649865421,
496
+ "eval_gpu_ram_allocated": 0.3996419906616211,
497
+ "eval_gpu_ram_cached": 12.9296875,
498
+ "eval_gpu_ram_total": 39.56402587890625,
499
+ "eval_gpu_utilization": 27,
500
+ "eval_loss": 1.0490039587020874,
501
+ "eval_precision": 0.702476293906477,
502
+ "eval_recall": 0.7035,
503
+ "eval_runtime": 1.5585,
504
+ "eval_samples_per_second": 1283.281,
505
+ "eval_steps_per_second": 40.423,
506
+ "eval_system_ram_total": 83.48074722290039,
507
+ "eval_system_ram_used": 3.8126220703125,
508
+ "step": 2375
509
+ },
510
+ {
511
+ "epoch": 10.0,
512
+ "learning_rate": 8e-09,
513
+ "loss": 0.5586,
514
+ "step": 2500
515
+ },
516
+ {
517
+ "epoch": 10.0,
518
+ "eval_accuracy": 0.701,
519
+ "eval_disk_space_total": 78.1898422241211,
520
+ "eval_disk_space_used": 26.204303741455078,
521
+ "eval_f1": 0.6984193396838297,
522
+ "eval_gpu_ram_allocated": 0.3996291160583496,
523
+ "eval_gpu_ram_cached": 12.9296875,
524
+ "eval_gpu_ram_total": 39.56402587890625,
525
+ "eval_gpu_utilization": 27,
526
+ "eval_loss": 1.0510661602020264,
527
+ "eval_precision": 0.700028711222529,
528
+ "eval_recall": 0.701,
529
+ "eval_runtime": 1.5162,
530
+ "eval_samples_per_second": 1319.056,
531
+ "eval_steps_per_second": 41.55,
532
+ "eval_system_ram_total": 83.48074722290039,
533
+ "eval_system_ram_used": 3.844818115234375,
534
+ "step": 2500
535
  }
536
  ],
537
  "max_steps": 2500,
538
  "num_train_epochs": 10,
539
+ "total_flos": 1262611619041920.0,
540
  "trial_name": null,
541
  "trial_params": null
542
  }
{checkpoint-1500 β†’ checkpoint-2500}/training_args.bin RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2500}/vocab.txt RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:294d34cfcdd9a17427a0a09cdf19e071c82e8ce59c34f4fe159d3c5118d1082d
3
  size 133523761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16a1346b21837ee13d52ba582c706fb96643d039a8ec745ade06a4e78deb60b
3
  size 133523761
runs/Jul23_00-54-01_c587bebf8fcc/events.out.tfevents.1690073651.c587bebf8fcc.2239.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a87e472813cf54d514cc70f3dcc6d715385ff7ad568595a73f287577b33a094
3
- size 22489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbecb5c809c6be6c8afb303e013916d04358d96b487dee4dec559b0dcf5d3a75
3
+ size 26897