Femboyuwu2000 commited on
Commit
d827c75
1 Parent(s): 9461a8d

Training in progress, step 940, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:824b0482f65c54d780bf8cab1640646068f4412a35e93bf84312042275c949aa
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9ff2623ebc59ce1f05948fb7f333347562f31720ea83bd9c654d07132bc456
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd7608b89dafbdc3e9ff64304083584d3644129f923c4a985f1eb00d503b9fd9
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5979893651a6ad117a2ca79acc918df76f51b5615316af74b15fc3790d5db672
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a88d647ecb4240893f5936ba5eb3da134925ec105812fc12c4afbef4944427f9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175dfe02b8f41adb5d4468faa982ed5a51b1c1cd012483cfc61f70c9241471b4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:799790dec230733f2507fb25ac7c0f3b89a16d8c9935acbdd39b641fa24a7e12
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1c7c4b3a6117ae282f791966d823034e6914b8063785569ba52a491b2ee826
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0736,
5
  "eval_steps": 500,
6
- "global_step": 920,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -329,6 +329,13 @@
329
  "learning_rate": 2.9979864597363846e-05,
330
  "loss": 3.6716,
331
  "step": 920
 
 
 
 
 
 
 
332
  }
333
  ],
334
  "logging_steps": 20,
@@ -336,7 +343,7 @@
336
  "num_input_tokens_seen": 0,
337
  "num_train_epochs": 2,
338
  "save_steps": 20,
339
- "total_flos": 2178771877822464.0,
340
  "train_batch_size": 8,
341
  "trial_name": null,
342
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0752,
5
  "eval_steps": 500,
6
+ "global_step": 940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
329
  "learning_rate": 2.9979864597363846e-05,
330
  "loss": 3.6716,
331
  "step": 920
332
+ },
333
+ {
334
+ "epoch": 0.08,
335
+ "grad_norm": 22.791175842285156,
336
+ "learning_rate": 2.9977269673842554e-05,
337
+ "loss": 3.6172,
338
+ "step": 940
339
  }
340
  ],
341
  "logging_steps": 20,
 
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 2,
345
  "save_steps": 20,
346
+ "total_flos": 2230933278818304.0,
347
  "train_batch_size": 8,
348
  "trial_name": null,
349
  "trial_params": null