plip commited on
Commit
2573838
1 Parent(s): 82521fb

Training in progress, step 460000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7cf25044f894af33b58ee33e839ca67a9010ce216694a7f4c91e8f90caf02e3
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3851379e290e595ff406c21b8b10ddb1e73359dd0f6752ee66fd50b92159710
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f48e297dd8d43fe26955c08bdc18374b43b5c5c7bf58df74b63ccfce891d130
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51cc55a03d0db3d8e5de6630971dddba20e9587291496d77623230dc60cf541c
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d0eaf38f8d76dc97fc60763011f1de34c7a2cb3c95faaa0610cc0f4af72cd60
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca511f19607ffe13077a404fa6aa93a3f99da5d803f11fc8cc4ffd982f7eaa96
3
+ size 14439
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aee1e0607f38b87a3b735ae98b8e01339f7cc72ffa6ccf3c213d28824ed54a9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c5e63367d8a48da7291ac4cad9dbf22e45f23c04ad6dc36fb819eee5567b30
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cceb842179d613224b3b5f8d750e75368fc012474b9befae3962586a3fa07c34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ef310867dca4f3b22b044f838f0a52fca0a2ae9be44b0e97a33f54ca14b37c
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e777f0df33e0c44b8c16c09cacb56ca419e02a262aa3b9ece5534f0249ba6105
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07690c863ee7f95b67ca0330c6c558692a4c5296256a3c0c1e6edf6ee11f4a99
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44ab8fd99918dee712abc63025e4d8d70437de212ab6324f5ae0cbe74ed24f94
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd623ae3bafd7bee294fb51f9ab76259bcc1a1e3cb80be656ec13fa7aea6663f
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef227953f09853ae7340813887aac1a30150643cccf7844f37d1f0ff5cb9042d
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f7a345bdac720f75c3e4ebf8a9d6d8895e230a0405992893aeb1c567a9dd75
3
+ size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7caa6c821a93c7fa5649d2fba3a2ebec3c3cd1a1620660f06157dc5569333b5a
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099663ce4dbefceb2b014dd475ab003207ed8b9ddbfafed2491187b9c10f927a
3
+ size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee0e57d5d9717a7b27c7acd3b2ffc6fc1518aa4d2ebf016d3b2d036634f60df0
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4435aedaa43e7ef631652a5bc79634e81959ca100d2c5dc8b85db021834925b4
3
+ size 14439
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.787187107448144,
5
- "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9006,11 +9006,211 @@
9006
  "eval_samples_per_second": 1989.909,
9007
  "eval_steps_per_second": 31.839,
9008
  "step": 450000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 16,
9013
- "total_flos": 1.4376817401476814e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.093569043169214,
5
+ "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9006
  "eval_samples_per_second": 1989.909,
9007
  "eval_steps_per_second": 31.839,
9008
  "step": 450000
9009
+ },
9010
+ {
9011
+ "epoch": 13.8,
9012
+ "learning_rate": 1.770155470293445e-05,
9013
+ "loss": 0.3141,
9014
+ "step": 450500
9015
+ },
9016
+ {
9017
+ "epoch": 13.82,
9018
+ "learning_rate": 1.7548110926261522e-05,
9019
+ "loss": 0.3144,
9020
+ "step": 451000
9021
+ },
9022
+ {
9023
+ "epoch": 13.82,
9024
+ "eval_loss": 0.7752067446708679,
9025
+ "eval_runtime": 0.5146,
9026
+ "eval_samples_per_second": 1943.251,
9027
+ "eval_steps_per_second": 31.092,
9028
+ "step": 451000
9029
+ },
9030
+ {
9031
+ "epoch": 13.83,
9032
+ "learning_rate": 1.7396170301425777e-05,
9033
+ "loss": 0.3141,
9034
+ "step": 451500
9035
+ },
9036
+ {
9037
+ "epoch": 13.85,
9038
+ "learning_rate": 1.7245734490025544e-05,
9039
+ "loss": 0.3142,
9040
+ "step": 452000
9041
+ },
9042
+ {
9043
+ "epoch": 13.85,
9044
+ "eval_loss": 0.7712000012397766,
9045
+ "eval_runtime": 0.5015,
9046
+ "eval_samples_per_second": 1994.171,
9047
+ "eval_steps_per_second": 31.907,
9048
+ "step": 452000
9049
+ },
9050
+ {
9051
+ "epoch": 13.86,
9052
+ "learning_rate": 1.7096805137202738e-05,
9053
+ "loss": 0.3139,
9054
+ "step": 452500
9055
+ },
9056
+ {
9057
+ "epoch": 13.88,
9058
+ "learning_rate": 1.6949383871624917e-05,
9059
+ "loss": 0.3139,
9060
+ "step": 453000
9061
+ },
9062
+ {
9063
+ "epoch": 13.88,
9064
+ "eval_loss": 0.7725095152854919,
9065
+ "eval_runtime": 0.5022,
9066
+ "eval_samples_per_second": 1991.07,
9067
+ "eval_steps_per_second": 31.857,
9068
+ "step": 453000
9069
+ },
9070
+ {
9071
+ "epoch": 13.89,
9072
+ "learning_rate": 1.6803472305467368e-05,
9073
+ "loss": 0.3137,
9074
+ "step": 453500
9075
+ },
9076
+ {
9077
+ "epoch": 13.91,
9078
+ "learning_rate": 1.665907203439568e-05,
9079
+ "loss": 0.3139,
9080
+ "step": 454000
9081
+ },
9082
+ {
9083
+ "epoch": 13.91,
9084
+ "eval_loss": 0.7726877927780151,
9085
+ "eval_runtime": 0.5045,
9086
+ "eval_samples_per_second": 1982.357,
9087
+ "eval_steps_per_second": 31.718,
9088
+ "step": 454000
9089
+ },
9090
+ {
9091
+ "epoch": 13.93,
9092
+ "learning_rate": 1.6516184637548058e-05,
9093
+ "loss": 0.3138,
9094
+ "step": 454500
9095
+ },
9096
+ {
9097
+ "epoch": 13.94,
9098
+ "learning_rate": 1.6374811677518142e-05,
9099
+ "loss": 0.314,
9100
+ "step": 455000
9101
+ },
9102
+ {
9103
+ "epoch": 13.94,
9104
+ "eval_loss": 0.7759785056114197,
9105
+ "eval_runtime": 0.5189,
9106
+ "eval_samples_per_second": 1927.23,
9107
+ "eval_steps_per_second": 30.836,
9108
+ "step": 455000
9109
+ },
9110
+ {
9111
+ "epoch": 13.96,
9112
+ "learning_rate": 1.6234954700338025e-05,
9113
+ "loss": 0.3135,
9114
+ "step": 455500
9115
+ },
9116
+ {
9117
+ "epoch": 13.97,
9118
+ "learning_rate": 1.6096615235461148e-05,
9119
+ "loss": 0.3135,
9120
+ "step": 456000
9121
+ },
9122
+ {
9123
+ "epoch": 13.97,
9124
+ "eval_loss": 0.773828387260437,
9125
+ "eval_runtime": 0.5012,
9126
+ "eval_samples_per_second": 1995.149,
9127
+ "eval_steps_per_second": 31.922,
9128
+ "step": 456000
9129
+ },
9130
+ {
9131
+ "epoch": 13.99,
9132
+ "learning_rate": 1.59597947957458e-05,
9133
+ "loss": 0.3138,
9134
+ "step": 456500
9135
+ },
9136
+ {
9137
+ "epoch": 14.0,
9138
+ "learning_rate": 1.5824494877438344e-05,
9139
+ "loss": 0.3137,
9140
+ "step": 457000
9141
+ },
9142
+ {
9143
+ "epoch": 14.0,
9144
+ "eval_loss": 0.7719975709915161,
9145
+ "eval_runtime": 0.5107,
9146
+ "eval_samples_per_second": 1958.237,
9147
+ "eval_steps_per_second": 31.332,
9148
+ "step": 457000
9149
+ },
9150
+ {
9151
+ "epoch": 14.02,
9152
+ "learning_rate": 1.569071696015702e-05,
9153
+ "loss": 0.3135,
9154
+ "step": 457500
9155
+ },
9156
+ {
9157
+ "epoch": 14.03,
9158
+ "learning_rate": 1.555846250687569e-05,
9159
+ "loss": 0.3138,
9160
+ "step": 458000
9161
+ },
9162
+ {
9163
+ "epoch": 14.03,
9164
+ "eval_loss": 0.7755674123764038,
9165
+ "eval_runtime": 0.5013,
9166
+ "eval_samples_per_second": 1994.642,
9167
+ "eval_steps_per_second": 31.914,
9168
+ "step": 458000
9169
+ },
9170
+ {
9171
+ "epoch": 14.05,
9172
+ "learning_rate": 1.542773296390789e-05,
9173
+ "loss": 0.3134,
9174
+ "step": 458500
9175
+ },
9176
+ {
9177
+ "epoch": 14.06,
9178
+ "learning_rate": 1.5298529760890945e-05,
9179
+ "loss": 0.3135,
9180
+ "step": 459000
9181
+ },
9182
+ {
9183
+ "epoch": 14.06,
9184
+ "eval_loss": 0.7749778032302856,
9185
+ "eval_runtime": 0.5253,
9186
+ "eval_samples_per_second": 1903.509,
9187
+ "eval_steps_per_second": 30.456,
9188
+ "step": 459000
9189
+ },
9190
+ {
9191
+ "epoch": 14.08,
9192
+ "learning_rate": 1.5170854310770376e-05,
9193
+ "loss": 0.3136,
9194
+ "step": 459500
9195
+ },
9196
+ {
9197
+ "epoch": 14.09,
9198
+ "learning_rate": 1.5044708009784457e-05,
9199
+ "loss": 0.3134,
9200
+ "step": 460000
9201
+ },
9202
+ {
9203
+ "epoch": 14.09,
9204
+ "eval_loss": 0.7761635184288025,
9205
+ "eval_runtime": 0.5003,
9206
+ "eval_samples_per_second": 1998.696,
9207
+ "eval_steps_per_second": 31.979,
9208
+ "step": 460000
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 16,
9213
+ "total_flos": 1.4696296552299338e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f48e297dd8d43fe26955c08bdc18374b43b5c5c7bf58df74b63ccfce891d130
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51cc55a03d0db3d8e5de6630971dddba20e9587291496d77623230dc60cf541c
3
  size 102501541