Training in progress, step 460000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00828d7c5b2ee9d934868fffc62db5886616a7f03a8761ef8254b89f6589bb74
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96cedfbffe43fc18644cc6de989fcca94f4a26075ab174ed0f9cb823c12e426d
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5faebccac6dae6326e45bd9ddb3db4ec532e2978e697cb2496075e507408e404
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9006,11 +9006,211 @@
|
|
9006 |
"eval_samples_per_second": 787.258,
|
9007 |
"eval_steps_per_second": 12.596,
|
9008 |
"step": 450000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9009 |
}
|
9010 |
],
|
9011 |
"max_steps": 500000,
|
9012 |
"num_train_epochs": 13,
|
9013 |
-
"total_flos": 1.
|
9014 |
"trial_name": null,
|
9015 |
"trial_params": null
|
9016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.723329425556859,
|
5 |
+
"global_step": 460000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9006 |
"eval_samples_per_second": 787.258,
|
9007 |
"eval_steps_per_second": 12.596,
|
9008 |
"step": 450000
|
9009 |
+
},
|
9010 |
+
{
|
9011 |
+
"epoch": 11.48,
|
9012 |
+
"learning_rate": 1.770155470293445e-05,
|
9013 |
+
"loss": 0.2684,
|
9014 |
+
"step": 450500
|
9015 |
+
},
|
9016 |
+
{
|
9017 |
+
"epoch": 11.49,
|
9018 |
+
"learning_rate": 1.7548110926261522e-05,
|
9019 |
+
"loss": 0.2682,
|
9020 |
+
"step": 451000
|
9021 |
+
},
|
9022 |
+
{
|
9023 |
+
"epoch": 11.49,
|
9024 |
+
"eval_loss": 0.8042126297950745,
|
9025 |
+
"eval_runtime": 1.3486,
|
9026 |
+
"eval_samples_per_second": 741.514,
|
9027 |
+
"eval_steps_per_second": 11.864,
|
9028 |
+
"step": 451000
|
9029 |
+
},
|
9030 |
+
{
|
9031 |
+
"epoch": 11.51,
|
9032 |
+
"learning_rate": 1.7396170301425777e-05,
|
9033 |
+
"loss": 0.2683,
|
9034 |
+
"step": 451500
|
9035 |
+
},
|
9036 |
+
{
|
9037 |
+
"epoch": 11.52,
|
9038 |
+
"learning_rate": 1.7245734490025544e-05,
|
9039 |
+
"loss": 0.2683,
|
9040 |
+
"step": 452000
|
9041 |
+
},
|
9042 |
+
{
|
9043 |
+
"epoch": 11.52,
|
9044 |
+
"eval_loss": 0.8097262382507324,
|
9045 |
+
"eval_runtime": 1.2797,
|
9046 |
+
"eval_samples_per_second": 781.424,
|
9047 |
+
"eval_steps_per_second": 12.503,
|
9048 |
+
"step": 452000
|
9049 |
+
},
|
9050 |
+
{
|
9051 |
+
"epoch": 11.53,
|
9052 |
+
"learning_rate": 1.7096805137202738e-05,
|
9053 |
+
"loss": 0.2682,
|
9054 |
+
"step": 452500
|
9055 |
+
},
|
9056 |
+
{
|
9057 |
+
"epoch": 11.54,
|
9058 |
+
"learning_rate": 1.6949383871624917e-05,
|
9059 |
+
"loss": 0.2678,
|
9060 |
+
"step": 453000
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 11.54,
|
9064 |
+
"eval_loss": 0.8061630725860596,
|
9065 |
+
"eval_runtime": 1.2741,
|
9066 |
+
"eval_samples_per_second": 784.845,
|
9067 |
+
"eval_steps_per_second": 12.558,
|
9068 |
+
"step": 453000
|
9069 |
+
},
|
9070 |
+
{
|
9071 |
+
"epoch": 11.56,
|
9072 |
+
"learning_rate": 1.6803472305467368e-05,
|
9073 |
+
"loss": 0.2683,
|
9074 |
+
"step": 453500
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 11.57,
|
9078 |
+
"learning_rate": 1.665907203439568e-05,
|
9079 |
+
"loss": 0.2678,
|
9080 |
+
"step": 454000
|
9081 |
+
},
|
9082 |
+
{
|
9083 |
+
"epoch": 11.57,
|
9084 |
+
"eval_loss": 0.808397114276886,
|
9085 |
+
"eval_runtime": 1.2841,
|
9086 |
+
"eval_samples_per_second": 778.785,
|
9087 |
+
"eval_steps_per_second": 12.461,
|
9088 |
+
"step": 454000
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 11.58,
|
9092 |
+
"learning_rate": 1.6516184637548058e-05,
|
9093 |
+
"loss": 0.2681,
|
9094 |
+
"step": 454500
|
9095 |
+
},
|
9096 |
+
{
|
9097 |
+
"epoch": 11.6,
|
9098 |
+
"learning_rate": 1.6374811677518142e-05,
|
9099 |
+
"loss": 0.2681,
|
9100 |
+
"step": 455000
|
9101 |
+
},
|
9102 |
+
{
|
9103 |
+
"epoch": 11.6,
|
9104 |
+
"eval_loss": 0.8134533762931824,
|
9105 |
+
"eval_runtime": 1.2711,
|
9106 |
+
"eval_samples_per_second": 786.7,
|
9107 |
+
"eval_steps_per_second": 12.587,
|
9108 |
+
"step": 455000
|
9109 |
+
},
|
9110 |
+
{
|
9111 |
+
"epoch": 11.61,
|
9112 |
+
"learning_rate": 1.6234954700338025e-05,
|
9113 |
+
"loss": 0.2679,
|
9114 |
+
"step": 455500
|
9115 |
+
},
|
9116 |
+
{
|
9117 |
+
"epoch": 11.62,
|
9118 |
+
"learning_rate": 1.6096615235461148e-05,
|
9119 |
+
"loss": 0.2678,
|
9120 |
+
"step": 456000
|
9121 |
+
},
|
9122 |
+
{
|
9123 |
+
"epoch": 11.62,
|
9124 |
+
"eval_loss": 0.8097538948059082,
|
9125 |
+
"eval_runtime": 1.2712,
|
9126 |
+
"eval_samples_per_second": 786.682,
|
9127 |
+
"eval_steps_per_second": 12.587,
|
9128 |
+
"step": 456000
|
9129 |
+
},
|
9130 |
+
{
|
9131 |
+
"epoch": 11.63,
|
9132 |
+
"learning_rate": 1.59597947957458e-05,
|
9133 |
+
"loss": 0.2678,
|
9134 |
+
"step": 456500
|
9135 |
+
},
|
9136 |
+
{
|
9137 |
+
"epoch": 11.65,
|
9138 |
+
"learning_rate": 1.5824494877438344e-05,
|
9139 |
+
"loss": 0.2681,
|
9140 |
+
"step": 457000
|
9141 |
+
},
|
9142 |
+
{
|
9143 |
+
"epoch": 11.65,
|
9144 |
+
"eval_loss": 0.8078885674476624,
|
9145 |
+
"eval_runtime": 1.2534,
|
9146 |
+
"eval_samples_per_second": 797.802,
|
9147 |
+
"eval_steps_per_second": 12.765,
|
9148 |
+
"step": 457000
|
9149 |
+
},
|
9150 |
+
{
|
9151 |
+
"epoch": 11.66,
|
9152 |
+
"learning_rate": 1.569071696015702e-05,
|
9153 |
+
"loss": 0.2679,
|
9154 |
+
"step": 457500
|
9155 |
+
},
|
9156 |
+
{
|
9157 |
+
"epoch": 11.67,
|
9158 |
+
"learning_rate": 1.555846250687569e-05,
|
9159 |
+
"loss": 0.2679,
|
9160 |
+
"step": 458000
|
9161 |
+
},
|
9162 |
+
{
|
9163 |
+
"epoch": 11.67,
|
9164 |
+
"eval_loss": 0.8051915764808655,
|
9165 |
+
"eval_runtime": 1.2442,
|
9166 |
+
"eval_samples_per_second": 803.754,
|
9167 |
+
"eval_steps_per_second": 12.86,
|
9168 |
+
"step": 458000
|
9169 |
+
},
|
9170 |
+
{
|
9171 |
+
"epoch": 11.69,
|
9172 |
+
"learning_rate": 1.542773296390789e-05,
|
9173 |
+
"loss": 0.2679,
|
9174 |
+
"step": 458500
|
9175 |
+
},
|
9176 |
+
{
|
9177 |
+
"epoch": 11.7,
|
9178 |
+
"learning_rate": 1.5298529760890945e-05,
|
9179 |
+
"loss": 0.268,
|
9180 |
+
"step": 459000
|
9181 |
+
},
|
9182 |
+
{
|
9183 |
+
"epoch": 11.7,
|
9184 |
+
"eval_loss": 0.8038084506988525,
|
9185 |
+
"eval_runtime": 1.3255,
|
9186 |
+
"eval_samples_per_second": 754.438,
|
9187 |
+
"eval_steps_per_second": 12.071,
|
9188 |
+
"step": 459000
|
9189 |
+
},
|
9190 |
+
{
|
9191 |
+
"epoch": 11.71,
|
9192 |
+
"learning_rate": 1.5170854310770376e-05,
|
9193 |
+
"loss": 0.2678,
|
9194 |
+
"step": 459500
|
9195 |
+
},
|
9196 |
+
{
|
9197 |
+
"epoch": 11.72,
|
9198 |
+
"learning_rate": 1.5044708009784457e-05,
|
9199 |
+
"loss": 0.268,
|
9200 |
+
"step": 460000
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 11.72,
|
9204 |
+
"eval_loss": 0.8099783062934875,
|
9205 |
+
"eval_runtime": 1.3008,
|
9206 |
+
"eval_samples_per_second": 768.777,
|
9207 |
+
"eval_steps_per_second": 12.3,
|
9208 |
+
"step": 460000
|
9209 |
}
|
9210 |
],
|
9211 |
"max_steps": 500000,
|
9212 |
"num_train_epochs": 13,
|
9213 |
+
"total_flos": 1.4696260115583505e+22,
|
9214 |
"trial_name": null,
|
9215 |
"trial_params": null
|
9216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96cedfbffe43fc18644cc6de989fcca94f4a26075ab174ed0f9cb823c12e426d
|
3 |
size 102501541
|