Femboyuwu2000 commited on
Commit
72d6f61
1 Parent(s): ce9b4c1

Training in progress, step 9040, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:630a910b2d5b8c9504add11b070a2b95445874621f6de2ea3077628d0553ffe3
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba139240646f433f9f0106d855aea9e350f07f031693b316743dd388f006a45a
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da3ec0e6441f43d24431895b711504845f578f35580f2b0f333b91a363d641c1
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6c424fd81f222bd0b9ad0388c7d2dd3efa61cf565f0946a945da841c18c43f
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e3f344cbf26501e998b8de868d6d509ad3fe3171ec390a602388814ddd9df38
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ac19e6550ed28e30f1e0773f61b5a93cb672dde550a8914d4067b84caffee97
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dde4a2f54c5119ed26c871b2368383ec34337eb67d04dfb138caacf14a881ef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb2ad4299490e0d014e4083d00933053d6f0c483383ad6b71adbd0a8a72b6c2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.712,
5
  "eval_steps": 500,
6
- "global_step": 8900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3122,6 +3122,55 @@
3122
  "learning_rate": 1.8371635972030942e-05,
3123
  "loss": 3.4938,
3124
  "step": 8900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3125
  }
3126
  ],
3127
  "logging_steps": 20,
@@ -3129,7 +3178,7 @@
3129
  "num_input_tokens_seen": 0,
3130
  "num_train_epochs": 2,
3131
  "save_steps": 20,
3132
- "total_flos": 2.103984238922957e+16,
3133
  "train_batch_size": 8,
3134
  "trial_name": null,
3135
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7232,
5
  "eval_steps": 500,
6
+ "global_step": 9040,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3122
  "learning_rate": 1.8371635972030942e-05,
3123
  "loss": 3.4938,
3124
  "step": 8900
3125
+ },
3126
+ {
3127
+ "epoch": 0.71,
3128
+ "grad_norm": 25.6701602935791,
3129
+ "learning_rate": 1.8324280212401316e-05,
3130
+ "loss": 3.3931,
3131
+ "step": 8920
3132
+ },
3133
+ {
3134
+ "epoch": 0.72,
3135
+ "grad_norm": 27.28550148010254,
3136
+ "learning_rate": 1.8276889582645278e-05,
3137
+ "loss": 3.426,
3138
+ "step": 8940
3139
+ },
3140
+ {
3141
+ "epoch": 0.72,
3142
+ "grad_norm": 26.73450469970703,
3143
+ "learning_rate": 1.8229464579868124e-05,
3144
+ "loss": 3.4074,
3145
+ "step": 8960
3146
+ },
3147
+ {
3148
+ "epoch": 0.72,
3149
+ "grad_norm": 30.16777992248535,
3150
+ "learning_rate": 1.818200570153568e-05,
3151
+ "loss": 3.4204,
3152
+ "step": 8980
3153
+ },
3154
+ {
3155
+ "epoch": 0.72,
3156
+ "grad_norm": 30.022031784057617,
3157
+ "learning_rate": 1.813451344546913e-05,
3158
+ "loss": 3.4905,
3159
+ "step": 9000
3160
+ },
3161
+ {
3162
+ "epoch": 0.72,
3163
+ "grad_norm": 34.67860412597656,
3164
+ "learning_rate": 1.8086988309839755e-05,
3165
+ "loss": 3.4165,
3166
+ "step": 9020
3167
+ },
3168
+ {
3169
+ "epoch": 0.72,
3170
+ "grad_norm": 26.23653793334961,
3171
+ "learning_rate": 1.8039430793163753e-05,
3172
+ "loss": 3.5014,
3173
+ "step": 9040
3174
  }
3175
  ],
3176
  "logging_steps": 20,
 
3178
  "num_input_tokens_seen": 0,
3179
  "num_train_epochs": 2,
3180
  "save_steps": 20,
3181
+ "total_flos": 2.1373183266914304e+16,
3182
  "train_batch_size": 8,
3183
  "trial_name": null,
3184
  "trial_params": null