dq158 commited on
Commit
1d33bb6
1 Parent(s): 93c771b

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:538342ac688fb0a0d86aaedd7330737ae00e80eb8e093f459c58ce63d811fc33
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f801c41a48a686d1e8c361a6732caaf9bafd7adba82937dd2b58b735832368a
3
  size 1256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ba8427ac6eea57ec5734699585fe0dd282eb1503ee998fdc4232e54bb7d6354
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae28f9b2f5015b95ad2f650d54d1979a68d40e31cc70d2fd5c54ddf3ae4a6519
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc60bce785072809819f1c1fc75a413381f48d7fd0b438c4400860ca72a7129
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c70b4498d2f1f0f22fc7b37a8cc650fd9782887b3bd68443093cb35a029f0af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,188 +1,25 @@
1
  {
2
- "best_metric": 2.2744693756103516,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 12646,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.08,
13
- "learning_rate": 8.990024286845271e-05,
14
- "loss": 2.3525,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.16,
19
- "learning_rate": 8.949573742576993e-05,
20
- "loss": 2.3648,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.24,
25
- "learning_rate": 8.878304835100916e-05,
26
- "loss": 2.3927,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.32,
31
- "learning_rate": 8.776711182728166e-05,
32
- "loss": 2.3619,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.4,
37
- "learning_rate": 8.645496437147547e-05,
38
- "loss": 2.3352,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.47,
43
- "learning_rate": 8.485569409836621e-05,
44
- "loss": 2.3968,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.55,
49
- "learning_rate": 8.298037777507714e-05,
50
- "loss": 2.3855,
51
- "step": 3500
52
- },
53
  {
54
  "epoch": 0.63,
55
- "learning_rate": 8.084200410185778e-05,
56
- "loss": 2.3609,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.71,
61
- "learning_rate": 7.845538375054953e-05,
62
- "loss": 2.3766,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 0.79,
67
- "learning_rate": 7.583704678382374e-05,
68
- "loss": 2.3645,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 0.87,
73
- "learning_rate": 7.300512816568131e-05,
74
- "loss": 2.3709,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 0.95,
79
- "learning_rate": 6.997924215618355e-05,
80
- "loss": 2.3794,
81
- "step": 6000
82
  },
83
  {
84
  "epoch": 1.0,
85
  "eval_bleu": 1.0,
86
  "eval_brevity_penalty": 1.0,
87
  "eval_length_ratio": 1.0,
88
- "eval_loss": 2.2744693756103516,
89
- "eval_precisions": [
90
- 1.0,
91
- 1.0,
92
- 1.0,
93
- 1.0
94
- ],
95
- "eval_reference_length": 1439232,
96
- "eval_runtime": 1571.1114,
97
- "eval_samples_per_second": 1.789,
98
- "eval_steps_per_second": 0.895,
99
- "eval_translation_length": 1439232,
100
- "step": 6323
101
- },
102
- {
103
- "epoch": 1.03,
104
- "learning_rate": 6.678034646037393e-05,
105
- "loss": 2.3216,
106
- "step": 6500
107
- },
108
- {
109
- "epoch": 1.11,
110
- "learning_rate": 6.343059707231406e-05,
111
- "loss": 2.3675,
112
- "step": 7000
113
- },
114
- {
115
- "epoch": 1.19,
116
- "learning_rate": 5.995319481960425e-05,
117
- "loss": 2.3563,
118
- "step": 7500
119
- },
120
- {
121
- "epoch": 1.27,
122
- "learning_rate": 5.6372224671242366e-05,
123
- "loss": 2.4016,
124
- "step": 8000
125
- },
126
- {
127
- "epoch": 1.34,
128
- "learning_rate": 5.2712488921797064e-05,
129
- "loss": 2.3769,
130
- "step": 8500
131
- },
132
- {
133
- "epoch": 1.42,
134
- "learning_rate": 4.899933540728503e-05,
135
- "loss": 2.3531,
136
- "step": 9000
137
- },
138
- {
139
- "epoch": 1.5,
140
- "learning_rate": 4.5258481942552583e-05,
141
- "loss": 2.3503,
142
- "step": 9500
143
- },
144
- {
145
- "epoch": 1.58,
146
- "learning_rate": 4.151583819613269e-05,
147
- "loss": 2.3508,
148
- "step": 10000
149
- },
150
- {
151
- "epoch": 1.66,
152
- "learning_rate": 3.779732623629695e-05,
153
- "loss": 2.3509,
154
- "step": 10500
155
- },
156
- {
157
- "epoch": 1.74,
158
- "learning_rate": 3.412870099122515e-05,
159
- "loss": 2.3825,
160
- "step": 11000
161
- },
162
- {
163
- "epoch": 1.82,
164
- "learning_rate": 3.0535371866810265e-05,
165
- "loss": 2.3884,
166
- "step": 11500
167
- },
168
- {
169
- "epoch": 1.9,
170
- "learning_rate": 2.704222675759866e-05,
171
- "loss": 2.3531,
172
- "step": 12000
173
- },
174
- {
175
- "epoch": 1.98,
176
- "learning_rate": 2.367345966978985e-05,
177
- "loss": 2.3813,
178
- "step": 12500
179
- },
180
- {
181
- "epoch": 2.0,
182
- "eval_bleu": 1.0,
183
- "eval_brevity_penalty": 1.0,
184
- "eval_length_ratio": 1.0,
185
- "eval_loss": 2.2744693756103516,
186
  "eval_precisions": [
187
  1.0,
188
  1.0,
@@ -190,18 +27,18 @@
190
  1.0
191
  ],
192
  "eval_reference_length": 1439232,
193
- "eval_runtime": 1567.8247,
194
- "eval_samples_per_second": 1.793,
195
- "eval_steps_per_second": 0.897,
196
  "eval_translation_length": 1439232,
197
- "step": 12646
198
  }
199
  ],
200
  "logging_steps": 500,
201
- "max_steps": 18969,
202
  "num_train_epochs": 3,
203
  "save_steps": 500,
204
- "total_flos": 4.3332645393373594e+17,
205
  "trial_name": null,
206
  "trial_params": null
207
  }
 
1
  {
2
+ "best_metric": 2.096945285797119,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-790",
4
+ "epoch": 0.9993674889310563,
5
  "eval_steps": 500,
6
+ "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.63,
13
+ "learning_rate": 4.6266135493489015e-05,
14
+ "loss": 2.2387,
15
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
17
  {
18
  "epoch": 1.0,
19
  "eval_bleu": 1.0,
20
  "eval_brevity_penalty": 1.0,
21
  "eval_length_ratio": 1.0,
22
+ "eval_loss": 2.096945285797119,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "eval_precisions": [
24
  1.0,
25
  1.0,
 
27
  1.0
28
  ],
29
  "eval_reference_length": 1439232,
30
+ "eval_runtime": 879.4262,
31
+ "eval_samples_per_second": 3.196,
32
+ "eval_steps_per_second": 0.2,
33
  "eval_translation_length": 1439232,
34
+ "step": 790
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 2370,
39
  "num_train_epochs": 3,
40
  "save_steps": 500,
41
+ "total_flos": 2.1666322696686797e+17,
42
  "trial_name": null,
43
  "trial_params": null
44
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ef329a26ff1c545c7abfacd51a0506adc651e12a2a47e71cffbb6c7a728715a
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7694805b29bfb8491caf50687f3617fba8e8b17c948a4fabc0da0476e235ec2
3
  size 4664