genejalston commited on
Commit
faf5e64
1 Parent(s): 6176e6f

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97cc9e08ee5159d5202718bc471c4dd25fcf65be39e9b592c11e051c6ae0bfe6
3
  size 1625426996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f9709676fd308493096edfefbab55e00200fd0ec9eee8d1c8d9c9b9393dd60
3
  size 1625426996
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b9d19d06ed67d28893aca72c07d4b34a8af6dd4fa79383c0f53b9d612ea5ce2
3
  size 3250759951
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b3726efa7754053b08c4fa6b30b5a12364126ed953d7f0b1fcf11d22c76b26
3
  size 3250759951
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7895c8a6b7630afd0066c4bcfcb4467e3f329e730f605a8bf5d91c3d820ec41
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7af7a01e94a9a7176c2bbdf2cb9b8139105cfe65528853ec5a27025100d6bc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6afe57b8e9e4434d4502cf3231a760a5e0234ecf9276a3bb9f6e04b8cf09b373
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9eeeb5bdd24cb6de6ccdee8afd347d0c5c5a88be6c8e7e24752989eef5c1513
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,191 +1,25 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.6,
5
  "eval_steps": 500,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.1,
13
- "learning_rate": 1.9346230820547033e-05,
14
- "loss": 0.449,
15
- "step": 500
16
- },
17
  {
18
  "epoch": 0.2,
19
- "learning_rate": 1.8679119412941963e-05,
20
- "loss": 0.3794,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.3,
25
- "learning_rate": 1.8012008005336892e-05,
26
- "loss": 0.3172,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.4,
31
- "learning_rate": 1.734489659773182e-05,
32
- "loss": 0.2633,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.5,
37
- "learning_rate": 1.667778519012675e-05,
38
- "loss": 0.3007,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.6,
43
- "learning_rate": 1.6010673782521683e-05,
44
- "loss": 0.2715,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.7,
49
- "learning_rate": 1.5343562374916613e-05,
50
- "loss": 0.2032,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 0.8,
55
- "learning_rate": 1.4676450967311542e-05,
56
- "loss": 0.1486,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.9,
61
- "learning_rate": 1.4009339559706471e-05,
62
- "loss": 0.2056,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 1.0,
67
- "learning_rate": 1.3342228152101402e-05,
68
- "loss": 0.1911,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 1.0,
73
- "eval_loss": 2.493464231491089,
74
- "eval_runtime": 1867.8035,
75
- "eval_samples_per_second": 1.338,
76
- "eval_steps_per_second": 0.168,
77
- "step": 5000
78
- },
79
- {
80
- "epoch": 1.1,
81
- "learning_rate": 1.2675116744496331e-05,
82
- "loss": 0.1399,
83
- "step": 5500
84
- },
85
- {
86
- "epoch": 1.2,
87
- "learning_rate": 1.2008005336891262e-05,
88
- "loss": 0.173,
89
- "step": 6000
90
- },
91
- {
92
- "epoch": 1.3,
93
- "learning_rate": 1.1340893929286192e-05,
94
- "loss": 0.107,
95
- "step": 6500
96
- },
97
- {
98
- "epoch": 1.4,
99
- "learning_rate": 1.0673782521681123e-05,
100
- "loss": 0.146,
101
- "step": 7000
102
- },
103
- {
104
- "epoch": 1.5,
105
- "learning_rate": 1.0006671114076052e-05,
106
- "loss": 0.1157,
107
- "step": 7500
108
- },
109
- {
110
- "epoch": 1.6,
111
- "learning_rate": 9.339559706470981e-06,
112
- "loss": 0.0823,
113
- "step": 8000
114
- },
115
- {
116
- "epoch": 1.7,
117
- "learning_rate": 8.67244829886591e-06,
118
- "loss": 0.1076,
119
- "step": 8500
120
- },
121
- {
122
- "epoch": 1.8,
123
- "learning_rate": 8.005336891260842e-06,
124
- "loss": 0.0775,
125
- "step": 9000
126
- },
127
- {
128
- "epoch": 1.9,
129
- "learning_rate": 7.338225483655771e-06,
130
- "loss": 0.095,
131
- "step": 9500
132
- },
133
- {
134
- "epoch": 2.0,
135
- "learning_rate": 6.671114076050701e-06,
136
- "loss": 0.0749,
137
- "step": 10000
138
- },
139
- {
140
- "epoch": 2.0,
141
- "eval_loss": 2.147020101547241,
142
- "eval_runtime": 1887.6613,
143
- "eval_samples_per_second": 1.324,
144
- "eval_steps_per_second": 0.166,
145
- "step": 10000
146
- },
147
- {
148
- "epoch": 2.1,
149
- "learning_rate": 6.004002668445631e-06,
150
- "loss": 0.1234,
151
- "step": 10500
152
- },
153
- {
154
- "epoch": 2.2,
155
- "learning_rate": 5.336891260840561e-06,
156
- "loss": 0.085,
157
- "step": 11000
158
- },
159
- {
160
- "epoch": 2.3,
161
- "learning_rate": 4.669779853235491e-06,
162
- "loss": 0.062,
163
- "step": 11500
164
- },
165
- {
166
- "epoch": 2.4,
167
- "learning_rate": 4.002668445630421e-06,
168
- "loss": 0.0709,
169
- "step": 12000
170
- },
171
- {
172
- "epoch": 2.5,
173
- "learning_rate": 3.3355570380253505e-06,
174
- "loss": 0.0731,
175
- "step": 12500
176
- },
177
- {
178
- "epoch": 2.6,
179
- "learning_rate": 2.6684456304202807e-06,
180
- "loss": 0.0686,
181
- "step": 13000
182
  }
183
  ],
184
  "logging_steps": 500,
185
- "max_steps": 15000,
186
  "num_train_epochs": 3,
187
  "save_steps": 500,
188
- "total_flos": 2.20088095899648e+16,
189
  "trial_name": null,
190
  "trial_params": null
191
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
11
  {
12
  "epoch": 0.2,
13
+ "learning_rate": 1.869158878504673e-05,
14
+ "loss": 0.4912,
15
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
17
  ],
18
  "logging_steps": 500,
19
+ "max_steps": 7500,
20
  "num_train_epochs": 3,
21
  "save_steps": 500,
22
+ "total_flos": 846525235200000.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }