dixedus commited on
Commit
aaf7ec5
·
verified ·
1 Parent(s): 20d8e09

Training in progress, step 63, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1eecd25559c9189c8612de06e403e95edc7b91bb1570d780cb66259b876a5902
3
  size 34793120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb97d9d163a73ea7f9751d1416577a26bb4b0dd126962fec44a06677503ff529
3
  size 34793120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d512dd2ac705c75c747ff69579fc5045a258495d0bc226e232574fce800ccdb2
3
  size 18132116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0227abca3d752bb4f21e6bead416e0d844ff970d3c99e49714c2d6eb85f9f259
3
  size 18132116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d9412651d3546e8955cfba636a91b429e14260cf96ea084a0944945e5f2ed03
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a505b6f4b73db6520699af0051d1b07d67320ce5266a6dfbde7ef46ef8dab4d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec430a8fba90f7f39f74e916eb32712c363a0fd20bb4904251fce0eb82f2b9cf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922634a168fad3088c2a461ec82359f2941891b1472f492b835996e27c3cba9d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7632508833922261,
5
  "eval_steps": 9,
6
- "global_step": 54,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -189,6 +189,35 @@
189
  "eval_samples_per_second": 40.017,
190
  "eval_steps_per_second": 5.002,
191
  "step": 54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  }
193
  ],
194
  "logging_steps": 3,
@@ -208,7 +237,7 @@
208
  "attributes": {}
209
  }
210
  },
211
- "total_flos": 6865241065390080.0,
212
  "train_batch_size": 8,
213
  "trial_name": null,
214
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8904593639575972,
5
  "eval_steps": 9,
6
+ "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
189
  "eval_samples_per_second": 40.017,
190
  "eval_steps_per_second": 5.002,
191
  "step": 54
192
+ },
193
+ {
194
+ "epoch": 0.8056537102473498,
195
+ "grad_norm": 0.05651354417204857,
196
+ "learning_rate": 2.3256088156396868e-05,
197
+ "loss": 2.4944,
198
+ "step": 57
199
+ },
200
+ {
201
+ "epoch": 0.8480565371024735,
202
+ "grad_norm": 0.06623954325914383,
203
+ "learning_rate": 2.0658795558326743e-05,
204
+ "loss": 2.4146,
205
+ "step": 60
206
+ },
207
+ {
208
+ "epoch": 0.8904593639575972,
209
+ "grad_norm": 0.059099238365888596,
210
+ "learning_rate": 1.8109066104575023e-05,
211
+ "loss": 2.5197,
212
+ "step": 63
213
+ },
214
+ {
215
+ "epoch": 0.8904593639575972,
216
+ "eval_loss": 2.3530490398406982,
217
+ "eval_runtime": 3.0239,
218
+ "eval_samples_per_second": 39.684,
219
+ "eval_steps_per_second": 4.96,
220
+ "step": 63
221
  }
222
  ],
223
  "logging_steps": 3,
 
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 8009447909621760.0,
241
  "train_batch_size": 8,
242
  "trial_name": null,
243
  "trial_params": null