nbroad commited on
Commit
a746c95
·
verified ·
1 Parent(s): e971fc8

Training in progress, step 1150

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:123ff90298633dd2c989a1c39eaf498432d0a7bc116ee54dea377c35fecb52f3
3
  size 174655536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6faaa728d5db6d098d4cc022ad9dda5a658ed0b36939d0a51b023632080ab8c0
3
  size 174655536
wandb/run-20250202_133422-fwnjnrlg/files/output.log CHANGED
@@ -571,3 +571,29 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
571
  {'loss': 0.0988, 'grad_norm': 2.0237863063812256, 'learning_rate': 3.7291956928251604e-07, 'epoch': 0.95}
572
  {'loss': 0.1445, 'grad_norm': 2.6279687881469727, 'learning_rate': 3.44839133381093e-07, 'epoch': 0.96}
573
  {'eval_loss': 0.1627296358346939, 'eval_runtime': 23.781, 'eval_samples_per_second': 21.025, 'eval_steps_per_second': 2.649, 'epoch': 0.96}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  {'loss': 0.0988, 'grad_norm': 2.0237863063812256, 'learning_rate': 3.7291956928251604e-07, 'epoch': 0.95}
572
  {'loss': 0.1445, 'grad_norm': 2.6279687881469727, 'learning_rate': 3.44839133381093e-07, 'epoch': 0.96}
573
  {'eval_loss': 0.1627296358346939, 'eval_runtime': 23.781, 'eval_samples_per_second': 21.025, 'eval_steps_per_second': 2.649, 'epoch': 0.96}
574
+ {'loss': 0.1196, 'grad_norm': 1.8184841871261597, 'learning_rate': 3.178515913435509e-07, 'epoch': 0.96}
575
+ {'loss': 0.0913, 'grad_norm': 1.855987787246704, 'learning_rate': 2.9195793775288916e-07, 'epoch': 0.96}
576
+ {'loss': 0.0898, 'grad_norm': 1.5791326761245728, 'learning_rate': 2.6715912687860777e-07, 'epoch': 0.96}
577
+ {'loss': 0.0669, 'grad_norm': 1.7256059646606445, 'learning_rate': 2.434560726414925e-07, 'epoch': 0.96}
578
+ {'loss': 0.1174, 'grad_norm': 2.3028717041015625, 'learning_rate': 2.2084964857996827e-07, 'epoch': 0.97}
579
+ {'loss': 0.1268, 'grad_norm': 1.9804052114486694, 'learning_rate': 1.9934068781790493e-07, 'epoch': 0.97}
580
+ {'loss': 0.1205, 'grad_norm': 1.540189266204834, 'learning_rate': 1.7892998303389195e-07, 'epoch': 0.97}
581
+ {'loss': 0.1028, 'grad_norm': 1.841741681098938, 'learning_rate': 1.5961828643204167e-07, 'epoch': 0.97}
582
+ {'loss': 0.0755, 'grad_norm': 1.216596245765686, 'learning_rate': 1.414063097142715e-07, 'epoch': 0.97}
583
+ {'loss': 0.1013, 'grad_norm': 1.7214853763580322, 'learning_rate': 1.242947240540615e-07, 'epoch': 0.97}
584
+ {'loss': 0.1266, 'grad_norm': 1.9562798738479614, 'learning_rate': 1.0828416007173103e-07, 'epoch': 0.98}
585
+ {'loss': 0.1275, 'grad_norm': 2.374631643295288, 'learning_rate': 9.337520781119047e-08, 'epoch': 0.98}
586
+ {'loss': 0.112, 'grad_norm': 1.5571508407592773, 'learning_rate': 7.956841671820537e-08, 'epoch': 0.98}
587
+ {'loss': 0.1158, 'grad_norm': 1.8633334636688232, 'learning_rate': 6.686429562013263e-08, 'epoch': 0.98}
588
+ {'loss': 0.1183, 'grad_norm': 1.9713605642318726, 'learning_rate': 5.526331270718221e-08, 'epoch': 0.98}
589
+ {'loss': 0.1454, 'grad_norm': 2.2960586547851562, 'learning_rate': 4.476589551516086e-08, 'epoch': 0.98}
590
+ {'loss': 0.1115, 'grad_norm': 1.4765669107437134, 'learning_rate': 3.53724309097081e-08, 'epoch': 0.99}
591
+ {'loss': 0.1296, 'grad_norm': 2.252562999725342, 'learning_rate': 2.7083265072047613e-08, 'epoch': 0.99}
592
+ {'loss': 0.1063, 'grad_norm': 1.925352692604065, 'learning_rate': 1.9898703486224136e-08, 'epoch': 0.99}
593
+ {'loss': 0.1154, 'grad_norm': 1.8675994873046875, 'learning_rate': 1.3819010927845766e-08, 'epoch': 0.99}
594
+ {'loss': 0.138, 'grad_norm': 1.7973426580429077, 'learning_rate': 8.84441145432513e-09, 'epoch': 0.99}
595
+ {'loss': 0.0976, 'grad_norm': 2.3306591510772705, 'learning_rate': 4.975088396632632e-09, 'epoch': 0.99}
596
+ {'loss': 0.1028, 'grad_norm': 1.360988736152649, 'learning_rate': 2.211184352518547e-09, 'epoch': 1.0}
597
+ {'loss': 0.1026, 'grad_norm': 2.0403928756713867, 'learning_rate': 5.528011812838685e-10, 'epoch': 1.0}
598
+ {'loss': 0.1253, 'grad_norm': 2.1106996536254883, 'learning_rate': 0.0, 'epoch': 1.0}
599
+ {'train_runtime': 12731.7902, 'train_samples_per_second': 5.78, 'train_steps_per_second': 0.09, 'train_loss': 0.13795788543379825, 'epoch': 1.0}
wandb/run-20250202_133422-fwnjnrlg/run-fwnjnrlg.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bca37413ffec6a004f96892ea372ba3659a86fa48417c9fbb713788698ed575b
3
- size 3211264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d416461370487513932320fded332864ed78913e4c94f5b41684d00a12787ec
3
+ size 3342336