dq158 commited on
Commit
d0bca4c
1 Parent(s): 60f324b

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -5,6 +5,5 @@ library_name: peft
5
 
6
  ### Framework versions
7
 
8
- - PEFT 0.5.0
9
 
10
  - PEFT 0.5.0
 
5
 
6
  ### Framework versions
7
 
 
8
 
9
  - PEFT 0.5.0
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d8eece0cb713c5bda8dfaf483fde59a9a8d7586872a7489092df0ad63d26b85
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:538342ac688fb0a0d86aaedd7330737ae00e80eb8e093f459c58ce63d811fc33
3
  size 1256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f8922c352ab63d1593bcb944e77d461472df192dcdc1202984e9e08f16a111
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba8427ac6eea57ec5734699585fe0dd282eb1503ee998fdc4232e54bb7d6354
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f352ad8ae767de1cdfeb29fde8f2883bf1769f14e8edca24b5887be36d5a540
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc60bce785072809819f1c1fc75a413381f48d7fd0b438c4400860ca72a7129
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.2744693756103516,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 6323,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -98,13 +98,110 @@
98
  "eval_steps_per_second": 0.895,
99
  "eval_translation_length": 1439232,
100
  "step": 6323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  }
102
  ],
103
  "logging_steps": 500,
104
  "max_steps": 18969,
105
  "num_train_epochs": 3,
106
  "save_steps": 500,
107
- "total_flos": 2.1666322696686797e+17,
108
  "trial_name": null,
109
  "trial_params": null
110
  }
 
1
  {
2
  "best_metric": 2.2744693756103516,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 12646,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
98
  "eval_steps_per_second": 0.895,
99
  "eval_translation_length": 1439232,
100
  "step": 6323
101
+ },
102
+ {
103
+ "epoch": 1.03,
104
+ "learning_rate": 6.678034646037393e-05,
105
+ "loss": 2.3216,
106
+ "step": 6500
107
+ },
108
+ {
109
+ "epoch": 1.11,
110
+ "learning_rate": 6.343059707231406e-05,
111
+ "loss": 2.3675,
112
+ "step": 7000
113
+ },
114
+ {
115
+ "epoch": 1.19,
116
+ "learning_rate": 5.995319481960425e-05,
117
+ "loss": 2.3563,
118
+ "step": 7500
119
+ },
120
+ {
121
+ "epoch": 1.27,
122
+ "learning_rate": 5.6372224671242366e-05,
123
+ "loss": 2.4016,
124
+ "step": 8000
125
+ },
126
+ {
127
+ "epoch": 1.34,
128
+ "learning_rate": 5.2712488921797064e-05,
129
+ "loss": 2.3769,
130
+ "step": 8500
131
+ },
132
+ {
133
+ "epoch": 1.42,
134
+ "learning_rate": 4.899933540728503e-05,
135
+ "loss": 2.3531,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 1.5,
140
+ "learning_rate": 4.5258481942552583e-05,
141
+ "loss": 2.3503,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 1.58,
146
+ "learning_rate": 4.151583819613269e-05,
147
+ "loss": 2.3508,
148
+ "step": 10000
149
+ },
150
+ {
151
+ "epoch": 1.66,
152
+ "learning_rate": 3.779732623629695e-05,
153
+ "loss": 2.3509,
154
+ "step": 10500
155
+ },
156
+ {
157
+ "epoch": 1.74,
158
+ "learning_rate": 3.412870099122515e-05,
159
+ "loss": 2.3825,
160
+ "step": 11000
161
+ },
162
+ {
163
+ "epoch": 1.82,
164
+ "learning_rate": 3.0535371866810265e-05,
165
+ "loss": 2.3884,
166
+ "step": 11500
167
+ },
168
+ {
169
+ "epoch": 1.9,
170
+ "learning_rate": 2.704222675759866e-05,
171
+ "loss": 2.3531,
172
+ "step": 12000
173
+ },
174
+ {
175
+ "epoch": 1.98,
176
+ "learning_rate": 2.367345966978985e-05,
177
+ "loss": 2.3813,
178
+ "step": 12500
179
+ },
180
+ {
181
+ "epoch": 2.0,
182
+ "eval_bleu": 1.0,
183
+ "eval_brevity_penalty": 1.0,
184
+ "eval_length_ratio": 1.0,
185
+ "eval_loss": 2.2744693756103516,
186
+ "eval_precisions": [
187
+ 1.0,
188
+ 1.0,
189
+ 1.0,
190
+ 1.0
191
+ ],
192
+ "eval_reference_length": 1439232,
193
+ "eval_runtime": 1567.8247,
194
+ "eval_samples_per_second": 1.793,
195
+ "eval_steps_per_second": 0.897,
196
+ "eval_translation_length": 1439232,
197
+ "step": 12646
198
  }
199
  ],
200
  "logging_steps": 500,
201
  "max_steps": 18969,
202
  "num_train_epochs": 3,
203
  "save_steps": 500,
204
+ "total_flos": 4.3332645393373594e+17,
205
  "trial_name": null,
206
  "trial_params": null
207
  }