Shresthadev403 commited on
Commit
ec41418
·
verified ·
1 Parent(s): 0c3fbe6

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.6093
17
- - eval_runtime: 0.0202
18
- - eval_samples_per_second: 49.616
19
- - eval_steps_per_second: 49.616
20
- - epoch: 63.0
21
- - step: 63
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 1.6092
17
+ - eval_runtime: 0.0205
18
+ - eval_samples_per_second: 48.823
19
+ - eval_steps_per_second: 48.823
20
+ - epoch: 64.0
21
+ - step: 64
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1707026588.d77ab3a64c5c.34.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff73fe3740fb5b99d01ef6ef1cbadab314dffc088695fad2e971a007c4f62f0
3
- size 24446
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94232f3ccf63cd83b0fd10a681fe1da1ed75cc824e16fc93d6019254ee1a2bd5
3
+ size 24866
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80ea65344f5f47340489f9baffe2718ad1c9a31a5e30646c73ddc3b7b576a4c4
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35477aab7a0d675972e3637322e028ad7b025b3cc2ea5013d9358f7f5b631137
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5992611646652222,
3
  "best_model_checkpoint": "controlled-food-recipe-generation/checkpoint-53",
4
- "epoch": 63.0,
5
  "eval_steps": 1,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -889,6 +889,20 @@
889
  "eval_samples_per_second": 49.616,
890
  "eval_steps_per_second": 49.616,
891
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892
  }
893
  ],
894
  "logging_steps": 1,
@@ -896,7 +910,7 @@
896
  "num_input_tokens_seen": 0,
897
  "num_train_epochs": 100,
898
  "save_steps": 1,
899
- "total_flos": 37038145536000.0,
900
  "train_batch_size": 32,
901
  "trial_name": null,
902
  "trial_params": null
 
1
  {
2
  "best_metric": 1.5992611646652222,
3
  "best_model_checkpoint": "controlled-food-recipe-generation/checkpoint-53",
4
+ "epoch": 64.0,
5
  "eval_steps": 1,
6
+ "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
889
  "eval_samples_per_second": 49.616,
890
  "eval_steps_per_second": 49.616,
891
  "step": 63
892
+ },
893
+ {
894
+ "epoch": 64.0,
895
+ "learning_rate": 2.8000000000000003e-05,
896
+ "loss": 1.1083,
897
+ "step": 64
898
+ },
899
+ {
900
+ "epoch": 64.0,
901
+ "eval_loss": 1.6092382669448853,
902
+ "eval_runtime": 0.0205,
903
+ "eval_samples_per_second": 48.823,
904
+ "eval_steps_per_second": 48.823,
905
+ "step": 64
906
  }
907
  ],
908
  "logging_steps": 1,
 
910
  "num_input_tokens_seen": 0,
911
  "num_train_epochs": 100,
912
  "save_steps": 1,
913
+ "total_flos": 37626052608000.0,
914
  "train_batch_size": 32,
915
  "trial_name": null,
916
  "trial_params": null