plip commited on
Commit
0fd1843
1 Parent(s): 9b4d6d0

Training in progress, step 360000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe20f9a7048e778f6f933d7208f0d74ea50740d5e42b03ab00e70891186e444
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c017dc7aa51a43ed67b54be4391afb3209331a95386c7a0f1faefbfda3688b82
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b416d6314d39e9369d9492ca6e3f40f06cfe57d4e4160d5311194f08695a8e6
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d6df4782178506acacd0c83df02e5b041758e0dad6be6ff1fb20dae19c22b2
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb525c3a5a5b20b39f1725d61547b357ecdab21b75b423ee22380919a8bde5bb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aecc6465a886ff601ea303358a61f89a30e07c965e206675258095a3d963058
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.919924562923697,
5
- "global_step": 350000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7006,11 +7006,211 @@
7006
  "eval_samples_per_second": 791.502,
7007
  "eval_steps_per_second": 12.664,
7008
  "step": 350000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7009
  }
7010
  ],
7011
  "max_steps": 500000,
7012
  "num_train_epochs": 13,
7013
- "total_flos": 1.118194202580314e+22,
7014
  "trial_name": null,
7015
  "trial_params": null
7016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.174779550435803,
5
+ "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7006
  "eval_samples_per_second": 791.502,
7007
  "eval_steps_per_second": 12.664,
7008
  "step": 350000
7009
+ },
7010
+ {
7011
+ "epoch": 8.93,
7012
+ "learning_rate": 7.529152489465592e-05,
7013
+ "loss": 0.277,
7014
+ "step": 350500
7015
+ },
7016
+ {
7017
+ "epoch": 8.95,
7018
+ "learning_rate": 7.489140439617708e-05,
7019
+ "loss": 0.2766,
7020
+ "step": 351000
7021
+ },
7022
+ {
7023
+ "epoch": 8.95,
7024
+ "eval_loss": 0.8036056160926819,
7025
+ "eval_runtime": 1.2298,
7026
+ "eval_samples_per_second": 813.125,
7027
+ "eval_steps_per_second": 13.01,
7028
+ "step": 351000
7029
+ },
7030
+ {
7031
+ "epoch": 8.96,
7032
+ "learning_rate": 7.449215995246522e-05,
7033
+ "loss": 0.2765,
7034
+ "step": 351500
7035
+ },
7036
+ {
7037
+ "epoch": 8.97,
7038
+ "learning_rate": 7.409379592959367e-05,
7039
+ "loss": 0.2765,
7040
+ "step": 352000
7041
+ },
7042
+ {
7043
+ "epoch": 8.97,
7044
+ "eval_loss": 0.8099916577339172,
7045
+ "eval_runtime": 1.258,
7046
+ "eval_samples_per_second": 794.9,
7047
+ "eval_steps_per_second": 12.718,
7048
+ "step": 352000
7049
+ },
7050
+ {
7051
+ "epoch": 8.98,
7052
+ "learning_rate": 7.369631668400746e-05,
7053
+ "loss": 0.2763,
7054
+ "step": 352500
7055
+ },
7056
+ {
7057
+ "epoch": 9.0,
7058
+ "learning_rate": 7.3299726562476e-05,
7059
+ "loss": 0.2762,
7060
+ "step": 353000
7061
+ },
7062
+ {
7063
+ "epoch": 9.0,
7064
+ "eval_loss": 0.8091428279876709,
7065
+ "eval_runtime": 1.2439,
7066
+ "eval_samples_per_second": 803.907,
7067
+ "eval_steps_per_second": 12.863,
7068
+ "step": 353000
7069
+ },
7070
+ {
7071
+ "epoch": 9.01,
7072
+ "learning_rate": 7.290402990204531e-05,
7073
+ "loss": 0.2763,
7074
+ "step": 353500
7075
+ },
7076
+ {
7077
+ "epoch": 9.02,
7078
+ "learning_rate": 7.250923102999073e-05,
7079
+ "loss": 0.2765,
7080
+ "step": 354000
7081
+ },
7082
+ {
7083
+ "epoch": 9.02,
7084
+ "eval_loss": 0.8080966472625732,
7085
+ "eval_runtime": 1.3706,
7086
+ "eval_samples_per_second": 729.606,
7087
+ "eval_steps_per_second": 11.674,
7088
+ "step": 354000
7089
+ },
7090
+ {
7091
+ "epoch": 9.03,
7092
+ "learning_rate": 7.211533426376934e-05,
7093
+ "loss": 0.2762,
7094
+ "step": 354500
7095
+ },
7096
+ {
7097
+ "epoch": 9.05,
7098
+ "learning_rate": 7.172234391097317e-05,
7099
+ "loss": 0.2763,
7100
+ "step": 355000
7101
+ },
7102
+ {
7103
+ "epoch": 9.05,
7104
+ "eval_loss": 0.8072100877761841,
7105
+ "eval_runtime": 1.3323,
7106
+ "eval_samples_per_second": 750.572,
7107
+ "eval_steps_per_second": 12.009,
7108
+ "step": 355000
7109
+ },
7110
+ {
7111
+ "epoch": 9.06,
7112
+ "learning_rate": 7.133026426928173e-05,
7113
+ "loss": 0.2764,
7114
+ "step": 355500
7115
+ },
7116
+ {
7117
+ "epoch": 9.07,
7118
+ "learning_rate": 7.093909962641514e-05,
7119
+ "loss": 0.2763,
7120
+ "step": 356000
7121
+ },
7122
+ {
7123
+ "epoch": 9.07,
7124
+ "eval_loss": 0.8050107359886169,
7125
+ "eval_runtime": 1.3391,
7126
+ "eval_samples_per_second": 746.746,
7127
+ "eval_steps_per_second": 11.948,
7128
+ "step": 356000
7129
+ },
7130
+ {
7131
+ "epoch": 9.09,
7132
+ "learning_rate": 7.054885426008737e-05,
7133
+ "loss": 0.276,
7134
+ "step": 356500
7135
+ },
7136
+ {
7137
+ "epoch": 9.1,
7138
+ "learning_rate": 7.015953243795907e-05,
7139
+ "loss": 0.2763,
7140
+ "step": 357000
7141
+ },
7142
+ {
7143
+ "epoch": 9.1,
7144
+ "eval_loss": 0.8131558299064636,
7145
+ "eval_runtime": 1.3479,
7146
+ "eval_samples_per_second": 741.911,
7147
+ "eval_steps_per_second": 11.871,
7148
+ "step": 357000
7149
+ },
7150
+ {
7151
+ "epoch": 9.11,
7152
+ "learning_rate": 6.97711384175914e-05,
7153
+ "loss": 0.2762,
7154
+ "step": 357500
7155
+ },
7156
+ {
7157
+ "epoch": 9.12,
7158
+ "learning_rate": 6.938367644639911e-05,
7159
+ "loss": 0.2758,
7160
+ "step": 358000
7161
+ },
7162
+ {
7163
+ "epoch": 9.12,
7164
+ "eval_loss": 0.8091667294502258,
7165
+ "eval_runtime": 1.3351,
7166
+ "eval_samples_per_second": 749.026,
7167
+ "eval_steps_per_second": 11.984,
7168
+ "step": 358000
7169
+ },
7170
+ {
7171
+ "epoch": 9.14,
7172
+ "learning_rate": 6.899715076160425e-05,
7173
+ "loss": 0.2757,
7174
+ "step": 358500
7175
+ },
7176
+ {
7177
+ "epoch": 9.15,
7178
+ "learning_rate": 6.861156559018986e-05,
7179
+ "loss": 0.2758,
7180
+ "step": 359000
7181
+ },
7182
+ {
7183
+ "epoch": 9.15,
7184
+ "eval_loss": 0.8032931685447693,
7185
+ "eval_runtime": 1.2963,
7186
+ "eval_samples_per_second": 771.398,
7187
+ "eval_steps_per_second": 12.342,
7188
+ "step": 359000
7189
+ },
7190
+ {
7191
+ "epoch": 9.16,
7192
+ "learning_rate": 6.822692514885346e-05,
7193
+ "loss": 0.2757,
7194
+ "step": 359500
7195
+ },
7196
+ {
7197
+ "epoch": 9.17,
7198
+ "learning_rate": 6.784323364396135e-05,
7199
+ "loss": 0.2757,
7200
+ "step": 360000
7201
+ },
7202
+ {
7203
+ "epoch": 9.17,
7204
+ "eval_loss": 0.8121919631958008,
7205
+ "eval_runtime": 1.3627,
7206
+ "eval_samples_per_second": 733.817,
7207
+ "eval_steps_per_second": 11.741,
7208
+ "step": 360000
7209
  }
7210
  ],
7211
  "max_steps": 500000,
7212
  "num_train_epochs": 13,
7213
+ "total_flos": 1.1501415685664595e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b416d6314d39e9369d9492ca6e3f40f06cfe57d4e4160d5311194f08695a8e6
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d6df4782178506acacd0c83df02e5b041758e0dad6be6ff1fb20dae19c22b2
3
  size 102501541