leixa commited on
Commit
9395bbf
·
verified ·
1 Parent(s): 6e5195b

Training in progress, step 72, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:540695a3a9f43d9bbd1ef789d552460b8c74b46bd4fe758079e6157729c7e2ec
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611732e8b60b82f65ca3fb621af02d76e06289cda1042b9cb51448095f7f99f2
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa7428b52cb33a82912bb70cd558b2213508ff4ab07a0f854523e513b35a9f53
3
  size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b78fd360fd0837a0e24e78ff53bcb54596b905619d3f1795815dbe338f7fdc0
3
  size 102864548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a808482866d58c57fe0290015fc20e4f8b8939db7d59d8f55d0fcd2b8b4cf266
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea3603c9a5ac93aadff8538e8c1f7b7097253fb14e88f0b33e0bdddfa5087f6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e49a8262b2a61d3153e5d379c20cfb08094371d950647117ad67ae4b87231f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bd97bc98e39f7007b5f2750d097c7395ce4bde9cd90085b1ec2b6c643faaf8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
  "eval_steps": 24,
6
- "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -143,6 +143,70 @@
143
  "eval_samples_per_second": 48.84,
144
  "eval_steps_per_second": 6.331,
145
  "step": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  }
147
  ],
148
  "logging_steps": 3,
@@ -162,7 +226,7 @@
162
  "attributes": {}
163
  }
164
  },
165
- "total_flos": 1.06657392623616e+16,
166
  "train_batch_size": 8,
167
  "trial_name": null,
168
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.75,
5
  "eval_steps": 24,
6
+ "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
143
  "eval_samples_per_second": 48.84,
144
  "eval_steps_per_second": 6.331,
145
  "step": 48
146
+ },
147
+ {
148
+ "epoch": 0.53125,
149
+ "grad_norm": 0.284551739692688,
150
+ "learning_rate": 4.736424735627193e-05,
151
+ "loss": 1.5701,
152
+ "step": 51
153
+ },
154
+ {
155
+ "epoch": 0.5625,
156
+ "grad_norm": 0.3086010813713074,
157
+ "learning_rate": 4.697267418712415e-05,
158
+ "loss": 1.5359,
159
+ "step": 54
160
+ },
161
+ {
162
+ "epoch": 0.59375,
163
+ "grad_norm": 0.31514284014701843,
164
+ "learning_rate": 4.655584912254727e-05,
165
+ "loss": 1.5334,
166
+ "step": 57
167
+ },
168
+ {
169
+ "epoch": 0.625,
170
+ "grad_norm": 0.31487005949020386,
171
+ "learning_rate": 4.611425119494551e-05,
172
+ "loss": 1.5224,
173
+ "step": 60
174
+ },
175
+ {
176
+ "epoch": 0.65625,
177
+ "grad_norm": 0.36550524830818176,
178
+ "learning_rate": 4.564838790671e-05,
179
+ "loss": 1.5222,
180
+ "step": 63
181
+ },
182
+ {
183
+ "epoch": 0.6875,
184
+ "grad_norm": 0.29623621702194214,
185
+ "learning_rate": 4.515879464697629e-05,
186
+ "loss": 1.5206,
187
+ "step": 66
188
+ },
189
+ {
190
+ "epoch": 0.71875,
191
+ "grad_norm": 0.3229241967201233,
192
+ "learning_rate": 4.464603407633326e-05,
193
+ "loss": 1.5232,
194
+ "step": 69
195
+ },
196
+ {
197
+ "epoch": 0.75,
198
+ "grad_norm": 0.2879750728607178,
199
+ "learning_rate": 4.41106954801906e-05,
200
+ "loss": 1.5028,
201
+ "step": 72
202
+ },
203
+ {
204
+ "epoch": 0.75,
205
+ "eval_loss": 1.493626594543457,
206
+ "eval_runtime": 3.3364,
207
+ "eval_samples_per_second": 48.555,
208
+ "eval_steps_per_second": 6.294,
209
+ "step": 72
210
  }
211
  ],
212
  "logging_steps": 3,
 
226
  "attributes": {}
227
  }
228
  },
229
+ "total_flos": 1.6211923678789632e+16,
230
  "train_batch_size": 8,
231
  "trial_name": null,
232
  "trial_params": null