plip commited on
Commit
bd5abde
1 Parent(s): c1b324f

Training in progress, step 310000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff044e80c574241efa7089a5ec2f633f003713c3c6f5dd2b7df44302f14645b7
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c38081c882988bad98fc3f3c119510cdcd90a9e84f912d6cad0cba15182d5589
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5e4cc7c8890a029475b21a89ba73ee51bdbf0169ee989a07985e899b417451
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b2181d9db98eb1abf2e59d3ffca75b8922c4c585b17c4c50bcc63a5ba1af87
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58bee1da196beb31121585eda29e2f73fb8583fc7e72eb4a3d5453bf8facf2f9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ae8a4dd944da2b5707cf785065491f61bb5535f2394367e15da56fde42baa0
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7914b9f1e3709b1198ec189eb9bb9105cd6b88dedbcbbdd4128934a703cf33e3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8c10dab6d3ee824fc8fe4628d3bf3ceea806ce0d2fbe513f32af4d508ab89e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.645649625363168,
5
- "global_step": 300000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6006,11 +6006,211 @@
6006
  "eval_samples_per_second": 755.698,
6007
  "eval_steps_per_second": 12.091,
6008
  "step": 300000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6009
  }
6010
  ],
6011
  "max_steps": 500000,
6012
  "num_train_epochs": 13,
6013
- "total_flos": 9.584519810843685e+21,
6014
  "trial_name": null,
6015
  "trial_params": null
6016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.900504612875274,
5
+ "global_step": 310000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6006
  "eval_samples_per_second": 755.698,
6007
  "eval_steps_per_second": 12.091,
6008
  "step": 300000
6009
+ },
6010
+ {
6011
+ "epoch": 7.66,
6012
+ "learning_rate": 0.00011893996636109606,
6013
+ "loss": 0.2819,
6014
+ "step": 300500
6015
+ },
6016
+ {
6017
+ "epoch": 7.67,
6018
+ "learning_rate": 0.00011847572270435852,
6019
+ "loss": 0.2818,
6020
+ "step": 301000
6021
+ },
6022
+ {
6023
+ "epoch": 7.67,
6024
+ "eval_loss": 0.8139034509658813,
6025
+ "eval_runtime": 1.299,
6026
+ "eval_samples_per_second": 769.829,
6027
+ "eval_steps_per_second": 12.317,
6028
+ "step": 301000
6029
+ },
6030
+ {
6031
+ "epoch": 7.68,
6032
+ "learning_rate": 0.00011801187847126579,
6033
+ "loss": 0.2821,
6034
+ "step": 301500
6035
+ },
6036
+ {
6037
+ "epoch": 7.7,
6038
+ "learning_rate": 0.00011754843873434411,
6039
+ "loss": 0.2818,
6040
+ "step": 302000
6041
+ },
6042
+ {
6043
+ "epoch": 7.7,
6044
+ "eval_loss": 0.8098216652870178,
6045
+ "eval_runtime": 1.2656,
6046
+ "eval_samples_per_second": 790.131,
6047
+ "eval_steps_per_second": 12.642,
6048
+ "step": 302000
6049
+ },
6050
+ {
6051
+ "epoch": 7.71,
6052
+ "learning_rate": 0.00011708540856169612,
6053
+ "loss": 0.2816,
6054
+ "step": 302500
6055
+ },
6056
+ {
6057
+ "epoch": 7.72,
6058
+ "learning_rate": 0.00011662279301694567,
6059
+ "loss": 0.2826,
6060
+ "step": 303000
6061
+ },
6062
+ {
6063
+ "epoch": 7.72,
6064
+ "eval_loss": 0.8181115984916687,
6065
+ "eval_runtime": 1.2294,
6066
+ "eval_samples_per_second": 813.399,
6067
+ "eval_steps_per_second": 13.014,
6068
+ "step": 303000
6069
+ },
6070
+ {
6071
+ "epoch": 7.73,
6072
+ "learning_rate": 0.0001161605971591822,
6073
+ "loss": 0.2817,
6074
+ "step": 303500
6075
+ },
6076
+ {
6077
+ "epoch": 7.75,
6078
+ "learning_rate": 0.00011569882604290559,
6079
+ "loss": 0.2825,
6080
+ "step": 304000
6081
+ },
6082
+ {
6083
+ "epoch": 7.75,
6084
+ "eval_loss": 0.8146129250526428,
6085
+ "eval_runtime": 1.2431,
6086
+ "eval_samples_per_second": 804.472,
6087
+ "eval_steps_per_second": 12.872,
6088
+ "step": 304000
6089
+ },
6090
+ {
6091
+ "epoch": 7.76,
6092
+ "learning_rate": 0.00011523748471797075,
6093
+ "loss": 0.2816,
6094
+ "step": 304500
6095
+ },
6096
+ {
6097
+ "epoch": 7.77,
6098
+ "learning_rate": 0.00011477657822953255,
6099
+ "loss": 0.2813,
6100
+ "step": 305000
6101
+ },
6102
+ {
6103
+ "epoch": 7.77,
6104
+ "eval_loss": 0.8215622305870056,
6105
+ "eval_runtime": 1.2616,
6106
+ "eval_samples_per_second": 792.667,
6107
+ "eval_steps_per_second": 12.683,
6108
+ "step": 305000
6109
+ },
6110
+ {
6111
+ "epoch": 7.79,
6112
+ "learning_rate": 0.00011431611161799043,
6113
+ "loss": 0.2818,
6114
+ "step": 305500
6115
+ },
6116
+ {
6117
+ "epoch": 7.8,
6118
+ "learning_rate": 0.0001138560899189335,
6119
+ "loss": 0.2814,
6120
+ "step": 306000
6121
+ },
6122
+ {
6123
+ "epoch": 7.8,
6124
+ "eval_loss": 0.8133662343025208,
6125
+ "eval_runtime": 1.3232,
6126
+ "eval_samples_per_second": 755.725,
6127
+ "eval_steps_per_second": 12.092,
6128
+ "step": 306000
6129
+ },
6130
+ {
6131
+ "epoch": 7.81,
6132
+ "learning_rate": 0.00011339651816308543,
6133
+ "loss": 0.2818,
6134
+ "step": 306500
6135
+ },
6136
+ {
6137
+ "epoch": 7.82,
6138
+ "learning_rate": 0.00011293740137624925,
6139
+ "loss": 0.2808,
6140
+ "step": 307000
6141
+ },
6142
+ {
6143
+ "epoch": 7.82,
6144
+ "eval_loss": 0.811087429523468,
6145
+ "eval_runtime": 1.3842,
6146
+ "eval_samples_per_second": 722.427,
6147
+ "eval_steps_per_second": 11.559,
6148
+ "step": 307000
6149
+ },
6150
+ {
6151
+ "epoch": 7.84,
6152
+ "learning_rate": 0.00011247874457925261,
6153
+ "loss": 0.281,
6154
+ "step": 307500
6155
+ },
6156
+ {
6157
+ "epoch": 7.85,
6158
+ "learning_rate": 0.0001120205527878927,
6159
+ "loss": 0.2808,
6160
+ "step": 308000
6161
+ },
6162
+ {
6163
+ "epoch": 7.85,
6164
+ "eval_loss": 0.8111096620559692,
6165
+ "eval_runtime": 1.3207,
6166
+ "eval_samples_per_second": 757.185,
6167
+ "eval_steps_per_second": 12.115,
6168
+ "step": 308000
6169
+ },
6170
+ {
6171
+ "epoch": 7.86,
6172
+ "learning_rate": 0.00011156283101288165,
6173
+ "loss": 0.2813,
6174
+ "step": 308500
6175
+ },
6176
+ {
6177
+ "epoch": 7.88,
6178
+ "learning_rate": 0.00011110558425979132,
6179
+ "loss": 0.2811,
6180
+ "step": 309000
6181
+ },
6182
+ {
6183
+ "epoch": 7.88,
6184
+ "eval_loss": 0.8077411651611328,
6185
+ "eval_runtime": 1.3568,
6186
+ "eval_samples_per_second": 737.044,
6187
+ "eval_steps_per_second": 11.793,
6188
+ "step": 309000
6189
+ },
6190
+ {
6191
+ "epoch": 7.89,
6192
+ "learning_rate": 0.00011064881752899906,
6193
+ "loss": 0.281,
6194
+ "step": 309500
6195
+ },
6196
+ {
6197
+ "epoch": 7.9,
6198
+ "learning_rate": 0.00011019253581563262,
6199
+ "loss": 0.2812,
6200
+ "step": 310000
6201
+ },
6202
+ {
6203
+ "epoch": 7.9,
6204
+ "eval_loss": 0.8111229538917542,
6205
+ "eval_runtime": 1.2708,
6206
+ "eval_samples_per_second": 786.914,
6207
+ "eval_steps_per_second": 12.591,
6208
+ "step": 310000
6209
  }
6210
  ],
6211
  "max_steps": 500000,
6212
  "num_train_epochs": 13,
6213
+ "total_flos": 9.904006949618185e+21,
6214
  "trial_name": null,
6215
  "trial_params": null
6216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5e4cc7c8890a029475b21a89ba73ee51bdbf0169ee989a07985e899b417451
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b2181d9db98eb1abf2e59d3ffca75b8922c4c585b17c4c50bcc63a5ba1af87
3
  size 102501541