plip commited on
Commit
f7dec81
1 Parent(s): da48299

Training in progress, step 360000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:836759fedb3c04c1e36e698a6cd3899f162ede3f32c56386ef24d49bc2163cbb
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e16c64f372a1e3d1a1bc418a8e836e1cdfb665e54de868c2b11e7665a8124bd
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:998435b0f9ce53e66113d7bec14d46ccd8a7918639830988194463c1e5db1349
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c8523a23b2126f1e0b2250304237778c9e938ca58ea61d2aaa56f9cdd412f84
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005b2e75d9554e018ed97e7633dc38306c4b6264c09e33e752be796ffbb52bec
3
+ size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1c4bd77bedf03001af3b9ef889b6dfe87e92115f91a7e639d6fdb4f2831fa70
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f90d85a554f4240712d12f4bc1ca42d7122c1ea252e7027b0c918e99e4e1de85
3
+ size 14439
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec34896b527ce9812ef17fb169750711c41b89b6349b3cbcdfa7be94e1a6baf1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795aa2456aa093dc231a0ad24cc817827fd817979cb85490c41b919c7e2ac93d
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d73239bf94e1fdf6dfa1faf7bc1d6eac9849348001cddeb5d7b0b44851b2a55
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a87c23715b7e025f85b63ac84fed0695e2db4c59ea40d605d467271ee7eaff
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d82a5834c903434e02d3959ef5a762fdae6236f6271145404caf4e2755eebe70
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2870379f6b63b47a9122657f592b0d575a5671da1a1fb7b494f10475da4ee08
3
+ size 14439
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f0e5e58f841742c4dc32af4083c8e6dd9bd1edca9f29e95f20db66083298fd7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc684958440988665bb83e21073c7e935f44d96d06218e7d486fc15f417721c
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5cc705bc82ed7c42464d2fba05f65cc617ebee401f1689c9b01b9b6c7d23636
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7790e1f5d1d539cc24a5c7f36951077137e9c4173691b395bbb8021eb1098267
3
+ size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:280859229d77bcf03b4604363e8dcb5eb462972def25bd8dc14847679a550bcd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296d7ff2fcb06c6f8604bd8a18f2173bb33ea1cf17b1b7e10c614ffd53051e05
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aecc6465a886ff601ea303358a61f89a30e07c965e206675258095a3d963058
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.723367750237447,
5
- "global_step": 350000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7006,11 +7006,211 @@
7006
  "eval_samples_per_second": 1982.304,
7007
  "eval_steps_per_second": 31.717,
7008
  "step": 350000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7009
  }
7010
  ],
7011
  "max_steps": 500000,
7012
  "num_train_epochs": 16,
7013
- "total_flos": 1.1181969977587741e+22,
7014
  "trial_name": null,
7015
  "trial_params": null
7016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.029749685958516,
5
+ "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7006
  "eval_samples_per_second": 1982.304,
7007
  "eval_steps_per_second": 31.717,
7008
  "step": 350000
7009
+ },
7010
+ {
7011
+ "epoch": 10.74,
7012
+ "learning_rate": 7.529152489465592e-05,
7013
+ "loss": 0.3237,
7014
+ "step": 350500
7015
+ },
7016
+ {
7017
+ "epoch": 10.75,
7018
+ "learning_rate": 7.489140439617708e-05,
7019
+ "loss": 0.3239,
7020
+ "step": 351000
7021
+ },
7022
+ {
7023
+ "epoch": 10.75,
7024
+ "eval_loss": 0.7802942991256714,
7025
+ "eval_runtime": 0.4971,
7026
+ "eval_samples_per_second": 2011.848,
7027
+ "eval_steps_per_second": 32.19,
7028
+ "step": 351000
7029
+ },
7030
+ {
7031
+ "epoch": 10.77,
7032
+ "learning_rate": 7.449215995246522e-05,
7033
+ "loss": 0.3236,
7034
+ "step": 351500
7035
+ },
7036
+ {
7037
+ "epoch": 10.78,
7038
+ "learning_rate": 7.409379592959367e-05,
7039
+ "loss": 0.3237,
7040
+ "step": 352000
7041
+ },
7042
+ {
7043
+ "epoch": 10.78,
7044
+ "eval_loss": 0.7798171043395996,
7045
+ "eval_runtime": 0.501,
7046
+ "eval_samples_per_second": 1995.947,
7047
+ "eval_steps_per_second": 31.935,
7048
+ "step": 352000
7049
+ },
7050
+ {
7051
+ "epoch": 10.8,
7052
+ "learning_rate": 7.369631668400746e-05,
7053
+ "loss": 0.3234,
7054
+ "step": 352500
7055
+ },
7056
+ {
7057
+ "epoch": 10.82,
7058
+ "learning_rate": 7.3299726562476e-05,
7059
+ "loss": 0.3231,
7060
+ "step": 353000
7061
+ },
7062
+ {
7063
+ "epoch": 10.82,
7064
+ "eval_loss": 0.7781672477722168,
7065
+ "eval_runtime": 0.5053,
7066
+ "eval_samples_per_second": 1979.072,
7067
+ "eval_steps_per_second": 31.665,
7068
+ "step": 353000
7069
+ },
7070
+ {
7071
+ "epoch": 10.83,
7072
+ "learning_rate": 7.290402990204531e-05,
7073
+ "loss": 0.3233,
7074
+ "step": 353500
7075
+ },
7076
+ {
7077
+ "epoch": 10.85,
7078
+ "learning_rate": 7.250923102999073e-05,
7079
+ "loss": 0.3234,
7080
+ "step": 354000
7081
+ },
7082
+ {
7083
+ "epoch": 10.85,
7084
+ "eval_loss": 0.7746726870536804,
7085
+ "eval_runtime": 0.5021,
7086
+ "eval_samples_per_second": 1991.707,
7087
+ "eval_steps_per_second": 31.867,
7088
+ "step": 354000
7089
+ },
7090
+ {
7091
+ "epoch": 10.86,
7092
+ "learning_rate": 7.211533426376934e-05,
7093
+ "loss": 0.3234,
7094
+ "step": 354500
7095
+ },
7096
+ {
7097
+ "epoch": 10.88,
7098
+ "learning_rate": 7.172234391097317e-05,
7099
+ "loss": 0.3232,
7100
+ "step": 355000
7101
+ },
7102
+ {
7103
+ "epoch": 10.88,
7104
+ "eval_loss": 0.7761996984481812,
7105
+ "eval_runtime": 0.5166,
7106
+ "eval_samples_per_second": 1935.585,
7107
+ "eval_steps_per_second": 30.969,
7108
+ "step": 355000
7109
+ },
7110
+ {
7111
+ "epoch": 10.89,
7112
+ "learning_rate": 7.133026426928173e-05,
7113
+ "loss": 0.3231,
7114
+ "step": 355500
7115
+ },
7116
+ {
7117
+ "epoch": 10.91,
7118
+ "learning_rate": 7.093909962641514e-05,
7119
+ "loss": 0.3254,
7120
+ "step": 356000
7121
+ },
7122
+ {
7123
+ "epoch": 10.91,
7124
+ "eval_loss": 0.7848865985870361,
7125
+ "eval_runtime": 0.5114,
7126
+ "eval_samples_per_second": 1955.496,
7127
+ "eval_steps_per_second": 31.288,
7128
+ "step": 356000
7129
+ },
7130
+ {
7131
+ "epoch": 10.92,
7132
+ "learning_rate": 7.054885426008737e-05,
7133
+ "loss": 0.3229,
7134
+ "step": 356500
7135
+ },
7136
+ {
7137
+ "epoch": 10.94,
7138
+ "learning_rate": 7.015953243795907e-05,
7139
+ "loss": 0.3229,
7140
+ "step": 357000
7141
+ },
7142
+ {
7143
+ "epoch": 10.94,
7144
+ "eval_loss": 0.7789940237998962,
7145
+ "eval_runtime": 0.5055,
7146
+ "eval_samples_per_second": 1978.178,
7147
+ "eval_steps_per_second": 31.651,
7148
+ "step": 357000
7149
+ },
7150
+ {
7151
+ "epoch": 10.95,
7152
+ "learning_rate": 6.97711384175914e-05,
7153
+ "loss": 0.3244,
7154
+ "step": 357500
7155
+ },
7156
+ {
7157
+ "epoch": 10.97,
7158
+ "learning_rate": 6.938367644639911e-05,
7159
+ "loss": 0.3227,
7160
+ "step": 358000
7161
+ },
7162
+ {
7163
+ "epoch": 10.97,
7164
+ "eval_loss": 0.7808487415313721,
7165
+ "eval_runtime": 0.5081,
7166
+ "eval_samples_per_second": 1968.006,
7167
+ "eval_steps_per_second": 31.488,
7168
+ "step": 358000
7169
+ },
7170
+ {
7171
+ "epoch": 10.98,
7172
+ "learning_rate": 6.899715076160425e-05,
7173
+ "loss": 0.3226,
7174
+ "step": 358500
7175
+ },
7176
+ {
7177
+ "epoch": 11.0,
7178
+ "learning_rate": 6.861156559018986e-05,
7179
+ "loss": 0.323,
7180
+ "step": 359000
7181
+ },
7182
+ {
7183
+ "epoch": 11.0,
7184
+ "eval_loss": 0.7747591137886047,
7185
+ "eval_runtime": 0.493,
7186
+ "eval_samples_per_second": 2028.496,
7187
+ "eval_steps_per_second": 32.456,
7188
+ "step": 359000
7189
+ },
7190
+ {
7191
+ "epoch": 11.01,
7192
+ "learning_rate": 6.822692514885346e-05,
7193
+ "loss": 0.3225,
7194
+ "step": 359500
7195
+ },
7196
+ {
7197
+ "epoch": 11.03,
7198
+ "learning_rate": 6.784323364396135e-05,
7199
+ "loss": 0.3224,
7200
+ "step": 360000
7201
+ },
7202
+ {
7203
+ "epoch": 11.03,
7204
+ "eval_loss": 0.7760407328605652,
7205
+ "eval_runtime": 0.5204,
7206
+ "eval_samples_per_second": 1921.599,
7207
+ "eval_steps_per_second": 30.746,
7208
+ "step": 360000
7209
  }
7210
  ],
7211
  "max_steps": 500000,
7212
  "num_train_epochs": 16,
7213
+ "total_flos": 1.1501449128410266e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:998435b0f9ce53e66113d7bec14d46ccd8a7918639830988194463c1e5db1349
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
3
  size 102501541