Femboyuwu2000 commited on
Commit
bbd4224
1 Parent(s): 5f3b92d

Training in progress, step 3940, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce114ed028608c33fb14ff98f4580972cd31568f223be1d22613afcf3ea6009a
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de0aeaf21dfa25596d3e83291fdac7d22780924777e6b26386fda19e60f85953
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dd88d5a6388bdda64ace4a78c9f1d530f455e15ff96f35abfaa2c7aeed5a0c6
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adef023b0b0184db66b469a4b3d5b9622ecbb07244f5c52c41d3671dd322e40a
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e22d0a49f52c6eef0f435a0352f4ad22f617645d5bf3a854028fdf06254c9f5c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31209e19cd1e43f15fe297600ccd65154a6150b1671600037426d3845125ba25
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f569d1656ee5e3c0d9963834d2eb1a257ab730529da22fb07a96bbbcaee7fe67
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c28f06383bf072aeed87a869c461b8eda46a4fd1dbe3f64038b5a01c478bcad
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3136,
5
  "eval_steps": 500,
6
- "global_step": 3920,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1379,6 +1379,13 @@
1379
  "learning_rate": 2.7883845626740046e-05,
1380
  "loss": 3.5286,
1381
  "step": 3920
 
 
 
 
 
 
 
1382
  }
1383
  ],
1384
  "logging_steps": 20,
@@ -1386,7 +1393,7 @@
1386
  "num_input_tokens_seen": 0,
1387
  "num_train_epochs": 2,
1388
  "save_steps": 20,
1389
- "total_flos": 9249627050803200.0,
1390
  "train_batch_size": 8,
1391
  "trial_name": null,
1392
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3152,
5
  "eval_steps": 500,
6
+ "global_step": 3940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1379
  "learning_rate": 2.7883845626740046e-05,
1380
  "loss": 3.5286,
1381
  "step": 3920
1382
+ },
1383
+ {
1384
+ "epoch": 0.32,
1385
+ "grad_norm": 36.6258430480957,
1386
+ "learning_rate": 2.7858899370977123e-05,
1387
+ "loss": 3.589,
1388
+ "step": 3940
1389
  }
1390
  ],
1391
  "logging_steps": 20,
 
1393
  "num_input_tokens_seen": 0,
1394
  "num_train_epochs": 2,
1395
  "save_steps": 20,
1396
+ "total_flos": 9303592776990720.0,
1397
  "train_batch_size": 8,
1398
  "trial_name": null,
1399
  "trial_params": null