Training in progress, step 1150
Browse files
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 174655536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6faaa728d5db6d098d4cc022ad9dda5a658ed0b36939d0a51b023632080ab8c0
|
3 |
size 174655536
|
wandb/run-20250202_133422-fwnjnrlg/files/output.log
CHANGED
@@ -571,3 +571,29 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
|
|
571 |
{'loss': 0.0988, 'grad_norm': 2.0237863063812256, 'learning_rate': 3.7291956928251604e-07, 'epoch': 0.95}
|
572 |
{'loss': 0.1445, 'grad_norm': 2.6279687881469727, 'learning_rate': 3.44839133381093e-07, 'epoch': 0.96}
|
573 |
{'eval_loss': 0.1627296358346939, 'eval_runtime': 23.781, 'eval_samples_per_second': 21.025, 'eval_steps_per_second': 2.649, 'epoch': 0.96}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
{'loss': 0.0988, 'grad_norm': 2.0237863063812256, 'learning_rate': 3.7291956928251604e-07, 'epoch': 0.95}
|
572 |
{'loss': 0.1445, 'grad_norm': 2.6279687881469727, 'learning_rate': 3.44839133381093e-07, 'epoch': 0.96}
|
573 |
{'eval_loss': 0.1627296358346939, 'eval_runtime': 23.781, 'eval_samples_per_second': 21.025, 'eval_steps_per_second': 2.649, 'epoch': 0.96}
|
574 |
+
{'loss': 0.1196, 'grad_norm': 1.8184841871261597, 'learning_rate': 3.178515913435509e-07, 'epoch': 0.96}
|
575 |
+
{'loss': 0.0913, 'grad_norm': 1.855987787246704, 'learning_rate': 2.9195793775288916e-07, 'epoch': 0.96}
|
576 |
+
{'loss': 0.0898, 'grad_norm': 1.5791326761245728, 'learning_rate': 2.6715912687860777e-07, 'epoch': 0.96}
|
577 |
+
{'loss': 0.0669, 'grad_norm': 1.7256059646606445, 'learning_rate': 2.434560726414925e-07, 'epoch': 0.96}
|
578 |
+
{'loss': 0.1174, 'grad_norm': 2.3028717041015625, 'learning_rate': 2.2084964857996827e-07, 'epoch': 0.97}
|
579 |
+
{'loss': 0.1268, 'grad_norm': 1.9804052114486694, 'learning_rate': 1.9934068781790493e-07, 'epoch': 0.97}
|
580 |
+
{'loss': 0.1205, 'grad_norm': 1.540189266204834, 'learning_rate': 1.7892998303389195e-07, 'epoch': 0.97}
|
581 |
+
{'loss': 0.1028, 'grad_norm': 1.841741681098938, 'learning_rate': 1.5961828643204167e-07, 'epoch': 0.97}
|
582 |
+
{'loss': 0.0755, 'grad_norm': 1.216596245765686, 'learning_rate': 1.414063097142715e-07, 'epoch': 0.97}
|
583 |
+
{'loss': 0.1013, 'grad_norm': 1.7214853763580322, 'learning_rate': 1.242947240540615e-07, 'epoch': 0.97}
|
584 |
+
{'loss': 0.1266, 'grad_norm': 1.9562798738479614, 'learning_rate': 1.0828416007173103e-07, 'epoch': 0.98}
|
585 |
+
{'loss': 0.1275, 'grad_norm': 2.374631643295288, 'learning_rate': 9.337520781119047e-08, 'epoch': 0.98}
|
586 |
+
{'loss': 0.112, 'grad_norm': 1.5571508407592773, 'learning_rate': 7.956841671820537e-08, 'epoch': 0.98}
|
587 |
+
{'loss': 0.1158, 'grad_norm': 1.8633334636688232, 'learning_rate': 6.686429562013263e-08, 'epoch': 0.98}
|
588 |
+
{'loss': 0.1183, 'grad_norm': 1.9713605642318726, 'learning_rate': 5.526331270718221e-08, 'epoch': 0.98}
|
589 |
+
{'loss': 0.1454, 'grad_norm': 2.2960586547851562, 'learning_rate': 4.476589551516086e-08, 'epoch': 0.98}
|
590 |
+
{'loss': 0.1115, 'grad_norm': 1.4765669107437134, 'learning_rate': 3.53724309097081e-08, 'epoch': 0.99}
|
591 |
+
{'loss': 0.1296, 'grad_norm': 2.252562999725342, 'learning_rate': 2.7083265072047613e-08, 'epoch': 0.99}
|
592 |
+
{'loss': 0.1063, 'grad_norm': 1.925352692604065, 'learning_rate': 1.9898703486224136e-08, 'epoch': 0.99}
|
593 |
+
{'loss': 0.1154, 'grad_norm': 1.8675994873046875, 'learning_rate': 1.3819010927845766e-08, 'epoch': 0.99}
|
594 |
+
{'loss': 0.138, 'grad_norm': 1.7973426580429077, 'learning_rate': 8.84441145432513e-09, 'epoch': 0.99}
|
595 |
+
{'loss': 0.0976, 'grad_norm': 2.3306591510772705, 'learning_rate': 4.975088396632632e-09, 'epoch': 0.99}
|
596 |
+
{'loss': 0.1028, 'grad_norm': 1.360988736152649, 'learning_rate': 2.211184352518547e-09, 'epoch': 1.0}
|
597 |
+
{'loss': 0.1026, 'grad_norm': 2.0403928756713867, 'learning_rate': 5.528011812838685e-10, 'epoch': 1.0}
|
598 |
+
{'loss': 0.1253, 'grad_norm': 2.1106996536254883, 'learning_rate': 0.0, 'epoch': 1.0}
|
599 |
+
{'train_runtime': 12731.7902, 'train_samples_per_second': 5.78, 'train_steps_per_second': 0.09, 'train_loss': 0.13795788543379825, 'epoch': 1.0}
|
wandb/run-20250202_133422-fwnjnrlg/run-fwnjnrlg.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d416461370487513932320fded332864ed78913e4c94f5b41684d00a12787ec
|
3 |
+
size 3342336
|