plip commited on
Commit
7ad594c
1 Parent(s): 36efee0

Training in progress, step 460000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48da217ea188770413b01ab5847a04b2b361555bd731533e7a2791bedfd928dd
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00828d7c5b2ee9d934868fffc62db5886616a7f03a8761ef8254b89f6589bb74
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2291b66065799571b4264696030000d7ec1a0e86601a7f2aefc345f1b8fac97d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96cedfbffe43fc18644cc6de989fcca94f4a26075ab174ed0f9cb823c12e426d
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.468474438044753,
5
- "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9006,11 +9006,211 @@
9006
  "eval_samples_per_second": 787.258,
9007
  "eval_steps_per_second": 12.596,
9008
  "step": 450000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 13,
9013
- "total_flos": 1.4376772976809005e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.723329425556859,
5
+ "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9006
  "eval_samples_per_second": 787.258,
9007
  "eval_steps_per_second": 12.596,
9008
  "step": 450000
9009
+ },
9010
+ {
9011
+ "epoch": 11.48,
9012
+ "learning_rate": 1.770155470293445e-05,
9013
+ "loss": 0.2684,
9014
+ "step": 450500
9015
+ },
9016
+ {
9017
+ "epoch": 11.49,
9018
+ "learning_rate": 1.7548110926261522e-05,
9019
+ "loss": 0.2682,
9020
+ "step": 451000
9021
+ },
9022
+ {
9023
+ "epoch": 11.49,
9024
+ "eval_loss": 0.8042126297950745,
9025
+ "eval_runtime": 1.3486,
9026
+ "eval_samples_per_second": 741.514,
9027
+ "eval_steps_per_second": 11.864,
9028
+ "step": 451000
9029
+ },
9030
+ {
9031
+ "epoch": 11.51,
9032
+ "learning_rate": 1.7396170301425777e-05,
9033
+ "loss": 0.2683,
9034
+ "step": 451500
9035
+ },
9036
+ {
9037
+ "epoch": 11.52,
9038
+ "learning_rate": 1.7245734490025544e-05,
9039
+ "loss": 0.2683,
9040
+ "step": 452000
9041
+ },
9042
+ {
9043
+ "epoch": 11.52,
9044
+ "eval_loss": 0.8097262382507324,
9045
+ "eval_runtime": 1.2797,
9046
+ "eval_samples_per_second": 781.424,
9047
+ "eval_steps_per_second": 12.503,
9048
+ "step": 452000
9049
+ },
9050
+ {
9051
+ "epoch": 11.53,
9052
+ "learning_rate": 1.7096805137202738e-05,
9053
+ "loss": 0.2682,
9054
+ "step": 452500
9055
+ },
9056
+ {
9057
+ "epoch": 11.54,
9058
+ "learning_rate": 1.6949383871624917e-05,
9059
+ "loss": 0.2678,
9060
+ "step": 453000
9061
+ },
9062
+ {
9063
+ "epoch": 11.54,
9064
+ "eval_loss": 0.8061630725860596,
9065
+ "eval_runtime": 1.2741,
9066
+ "eval_samples_per_second": 784.845,
9067
+ "eval_steps_per_second": 12.558,
9068
+ "step": 453000
9069
+ },
9070
+ {
9071
+ "epoch": 11.56,
9072
+ "learning_rate": 1.6803472305467368e-05,
9073
+ "loss": 0.2683,
9074
+ "step": 453500
9075
+ },
9076
+ {
9077
+ "epoch": 11.57,
9078
+ "learning_rate": 1.665907203439568e-05,
9079
+ "loss": 0.2678,
9080
+ "step": 454000
9081
+ },
9082
+ {
9083
+ "epoch": 11.57,
9084
+ "eval_loss": 0.808397114276886,
9085
+ "eval_runtime": 1.2841,
9086
+ "eval_samples_per_second": 778.785,
9087
+ "eval_steps_per_second": 12.461,
9088
+ "step": 454000
9089
+ },
9090
+ {
9091
+ "epoch": 11.58,
9092
+ "learning_rate": 1.6516184637548058e-05,
9093
+ "loss": 0.2681,
9094
+ "step": 454500
9095
+ },
9096
+ {
9097
+ "epoch": 11.6,
9098
+ "learning_rate": 1.6374811677518142e-05,
9099
+ "loss": 0.2681,
9100
+ "step": 455000
9101
+ },
9102
+ {
9103
+ "epoch": 11.6,
9104
+ "eval_loss": 0.8134533762931824,
9105
+ "eval_runtime": 1.2711,
9106
+ "eval_samples_per_second": 786.7,
9107
+ "eval_steps_per_second": 12.587,
9108
+ "step": 455000
9109
+ },
9110
+ {
9111
+ "epoch": 11.61,
9112
+ "learning_rate": 1.6234954700338025e-05,
9113
+ "loss": 0.2679,
9114
+ "step": 455500
9115
+ },
9116
+ {
9117
+ "epoch": 11.62,
9118
+ "learning_rate": 1.6096615235461148e-05,
9119
+ "loss": 0.2678,
9120
+ "step": 456000
9121
+ },
9122
+ {
9123
+ "epoch": 11.62,
9124
+ "eval_loss": 0.8097538948059082,
9125
+ "eval_runtime": 1.2712,
9126
+ "eval_samples_per_second": 786.682,
9127
+ "eval_steps_per_second": 12.587,
9128
+ "step": 456000
9129
+ },
9130
+ {
9131
+ "epoch": 11.63,
9132
+ "learning_rate": 1.59597947957458e-05,
9133
+ "loss": 0.2678,
9134
+ "step": 456500
9135
+ },
9136
+ {
9137
+ "epoch": 11.65,
9138
+ "learning_rate": 1.5824494877438344e-05,
9139
+ "loss": 0.2681,
9140
+ "step": 457000
9141
+ },
9142
+ {
9143
+ "epoch": 11.65,
9144
+ "eval_loss": 0.8078885674476624,
9145
+ "eval_runtime": 1.2534,
9146
+ "eval_samples_per_second": 797.802,
9147
+ "eval_steps_per_second": 12.765,
9148
+ "step": 457000
9149
+ },
9150
+ {
9151
+ "epoch": 11.66,
9152
+ "learning_rate": 1.569071696015702e-05,
9153
+ "loss": 0.2679,
9154
+ "step": 457500
9155
+ },
9156
+ {
9157
+ "epoch": 11.67,
9158
+ "learning_rate": 1.555846250687569e-05,
9159
+ "loss": 0.2679,
9160
+ "step": 458000
9161
+ },
9162
+ {
9163
+ "epoch": 11.67,
9164
+ "eval_loss": 0.8051915764808655,
9165
+ "eval_runtime": 1.2442,
9166
+ "eval_samples_per_second": 803.754,
9167
+ "eval_steps_per_second": 12.86,
9168
+ "step": 458000
9169
+ },
9170
+ {
9171
+ "epoch": 11.69,
9172
+ "learning_rate": 1.542773296390789e-05,
9173
+ "loss": 0.2679,
9174
+ "step": 458500
9175
+ },
9176
+ {
9177
+ "epoch": 11.7,
9178
+ "learning_rate": 1.5298529760890945e-05,
9179
+ "loss": 0.268,
9180
+ "step": 459000
9181
+ },
9182
+ {
9183
+ "epoch": 11.7,
9184
+ "eval_loss": 0.8038084506988525,
9185
+ "eval_runtime": 1.3255,
9186
+ "eval_samples_per_second": 754.438,
9187
+ "eval_steps_per_second": 12.071,
9188
+ "step": 459000
9189
+ },
9190
+ {
9191
+ "epoch": 11.71,
9192
+ "learning_rate": 1.5170854310770376e-05,
9193
+ "loss": 0.2678,
9194
+ "step": 459500
9195
+ },
9196
+ {
9197
+ "epoch": 11.72,
9198
+ "learning_rate": 1.5044708009784457e-05,
9199
+ "loss": 0.268,
9200
+ "step": 460000
9201
+ },
9202
+ {
9203
+ "epoch": 11.72,
9204
+ "eval_loss": 0.8099783062934875,
9205
+ "eval_runtime": 1.3008,
9206
+ "eval_samples_per_second": 768.777,
9207
+ "eval_steps_per_second": 12.3,
9208
+ "step": 460000
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 13,
9213
+ "total_flos": 1.4696260115583505e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2291b66065799571b4264696030000d7ec1a0e86601a7f2aefc345f1b8fac97d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96cedfbffe43fc18644cc6de989fcca94f4a26075ab174ed0f9cb823c12e426d
3
  size 102501541