Shresthadev403 commited on
Commit
c808cef
·
verified ·
1 Parent(s): 0738157

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 0.9076
17
- - eval_runtime: 1379.694
18
- - eval_samples_per_second: 108.72
19
- - eval_steps_per_second: 1.699
20
- - epoch: 15.41
21
- - step: 650000
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 0.9071
17
+ - eval_runtime: 1366.9659
18
+ - eval_samples_per_second: 109.732
19
+ - eval_steps_per_second: 1.715
20
+ - epoch: 16.59
21
+ - step: 700000
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1708869176.4df77956e39f.26.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723923dc88275cf797b3f9935b358330cd419cbfe97ccce20ca661ac1bd9fd7e
3
+ size 5142
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd5d9238101183379b4f6f7d5f27f1eabe8b3f1bd7d65edce05b27a0a433a821
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d67c2b65ae6e5206dfafc08cce5024ed542ad173cc472520d3bda5967a968c
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.407224803261592,
5
  "eval_steps": 50000,
6
- "global_step": 650000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -189,6 +189,20 @@
189
  "eval_samples_per_second": 108.72,
190
  "eval_steps_per_second": 1.699,
191
  "step": 650000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  }
193
  ],
194
  "logging_steps": 50000,
@@ -196,7 +210,7 @@
196
  "num_input_tokens_seen": 0,
197
  "num_train_epochs": 100,
198
  "save_steps": 50000,
199
- "total_flos": 1.35870288887808e+18,
200
  "train_batch_size": 32,
201
  "trial_name": null,
202
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.592395941974022,
5
  "eval_steps": 50000,
6
+ "global_step": 700000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
189
  "eval_samples_per_second": 108.72,
190
  "eval_steps_per_second": 1.699,
191
  "step": 650000
192
+ },
193
+ {
194
+ "epoch": 16.59,
195
+ "learning_rate": 4.940762776144876e-05,
196
+ "loss": 0.8615,
197
+ "step": 700000
198
+ },
199
+ {
200
+ "epoch": 16.59,
201
+ "eval_loss": 0.907132625579834,
202
+ "eval_runtime": 1366.9659,
203
+ "eval_samples_per_second": 109.732,
204
+ "eval_steps_per_second": 1.715,
205
+ "step": 700000
206
  }
207
  ],
208
  "logging_steps": 50000,
 
210
  "num_input_tokens_seen": 0,
211
  "num_train_epochs": 100,
212
  "save_steps": 50000,
213
+ "total_flos": 1.463218656509952e+18,
214
  "train_batch_size": 32,
215
  "trial_name": null,
216
  "trial_params": null