EricPeter commited on
Commit
9c8e7d1
1 Parent(s): 07c01da

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Sunbird/sunbird-en-mul",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 64109
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 64109,
25
+ "decoder_vocab_size": 64110,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 64110,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 64109,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.42.3",
59
+ "use_cache": true,
60
+ "vocab_size": 64110
61
+ }
.ipynb_checkpoints/generation_config-checkpoint.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 64109
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 64109,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 64109,
14
+ "transformers_version": "4.42.3"
15
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f3b76e2fd91bb500b4ba2e119c0e247aa20878b3958b52e4005795911e4472f
3
  size 308136760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a671b44e845c38b4c922359787150f67fe827da60cb8013c57f1e95b8979bda
3
  size 308136760
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b1fad9f6859580f2b1286fc9e5ab4e9b98448e13e353a481fe5d16750ec81e
3
  size 615914106
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314c29c585a428a949c96d0ad30940975e42977868f743cb743eca8a7ef11d5e
3
  size 615914106
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:978de5ea732400c1030e6151dbd2f7da1685b2766cd3dbd76ad9e6ca76c73905
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944e670087748feb93d27d6f02cd19e3a0d45a2067db8c91e6ba9d7d1d9530ee
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:addf6ed2edfb91e5799707ef8ad563521651e0e38e75b9389ea7f0c4c79ff9b3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06585fe70176c3147fca57438a795025afd40446eb83631c3d5f29dcfab8b5fd
3
  size 1064
trainer_state.json CHANGED
@@ -1,292 +1,118 @@
1
  {
2
- "best_metric": 4.029151439666748,
3
- "best_model_checkpoint": "continued-finetuned-en-to-lg/checkpoint-39",
4
- "epoch": 26.0,
5
  "eval_steps": 50,
6
- "global_step": 1027,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.9873417721518988,
13
- "eval_bleu": 0.8383,
14
- "eval_gen_len": 20.509,
15
- "eval_loss": 4.029151439666748,
16
- "eval_runtime": 478.8606,
17
- "eval_samples_per_second": 25.387,
18
- "eval_steps_per_second": 0.794,
19
- "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_bleu": 0.8282,
24
- "eval_gen_len": 20.3936,
25
- "eval_loss": 4.243524074554443,
26
- "eval_runtime": 474.9597,
27
- "eval_samples_per_second": 25.596,
28
- "eval_steps_per_second": 0.8,
29
- "step": 79
30
  },
31
  {
32
- "epoch": 2.9873417721518987,
33
- "eval_bleu": 0.8032,
34
- "eval_gen_len": 20.33,
35
- "eval_loss": 4.359070301055908,
36
- "eval_runtime": 475.0262,
37
- "eval_samples_per_second": 25.592,
38
- "eval_steps_per_second": 0.8,
39
- "step": 118
40
  },
41
  {
42
  "epoch": 4.0,
43
- "eval_bleu": 0.8038,
44
- "eval_gen_len": 20.2033,
45
- "eval_loss": 4.451895236968994,
46
- "eval_runtime": 473.6738,
47
- "eval_samples_per_second": 25.665,
48
- "eval_steps_per_second": 0.802,
49
- "step": 158
50
  },
51
  {
52
- "epoch": 4.987341772151899,
53
- "eval_bleu": 0.8017,
54
- "eval_gen_len": 20.0283,
55
- "eval_loss": 4.547088146209717,
56
- "eval_runtime": 476.3005,
57
- "eval_samples_per_second": 25.524,
58
- "eval_steps_per_second": 0.798,
59
- "step": 197
60
  },
61
  {
62
  "epoch": 6.0,
63
- "eval_bleu": 0.7828,
64
- "eval_gen_len": 20.1771,
65
- "eval_loss": 4.555694103240967,
66
- "eval_runtime": 473.4342,
67
- "eval_samples_per_second": 25.678,
68
- "eval_steps_per_second": 0.803,
69
- "step": 237
70
  },
71
  {
72
- "epoch": 6.987341772151899,
73
- "eval_bleu": 0.771,
74
- "eval_gen_len": 20.3172,
75
- "eval_loss": 4.672959327697754,
76
- "eval_runtime": 476.2521,
77
- "eval_samples_per_second": 25.526,
78
- "eval_steps_per_second": 0.798,
79
- "step": 276
80
  },
81
  {
82
  "epoch": 8.0,
83
- "eval_bleu": 0.7971,
84
- "eval_gen_len": 20.2095,
85
- "eval_loss": 4.719875812530518,
86
- "eval_runtime": 472.5874,
87
- "eval_samples_per_second": 25.724,
88
- "eval_steps_per_second": 0.804,
89
- "step": 316
90
  },
91
  {
92
- "epoch": 8.987341772151899,
93
- "eval_bleu": 0.7587,
94
- "eval_gen_len": 20.4253,
95
- "eval_loss": 4.758220672607422,
96
- "eval_runtime": 477.573,
97
- "eval_samples_per_second": 25.456,
98
- "eval_steps_per_second": 0.796,
99
- "step": 355
100
  },
101
  {
102
  "epoch": 10.0,
103
- "eval_bleu": 0.7975,
104
- "eval_gen_len": 20.3643,
105
- "eval_loss": 4.771291255950928,
106
- "eval_runtime": 477.1681,
107
- "eval_samples_per_second": 25.477,
108
- "eval_steps_per_second": 0.796,
109
- "step": 395
110
- },
111
- {
112
- "epoch": 10.987341772151899,
113
- "eval_bleu": 0.7619,
114
- "eval_gen_len": 20.4121,
115
- "eval_loss": 4.748700141906738,
116
- "eval_runtime": 479.5702,
117
- "eval_samples_per_second": 25.35,
118
- "eval_steps_per_second": 0.792,
119
- "step": 434
120
- },
121
- {
122
- "epoch": 12.0,
123
- "eval_bleu": 0.7865,
124
- "eval_gen_len": 20.3477,
125
- "eval_loss": 4.832670211791992,
126
- "eval_runtime": 473.4727,
127
- "eval_samples_per_second": 25.676,
128
- "eval_steps_per_second": 0.803,
129
- "step": 474
130
- },
131
- {
132
- "epoch": 12.658227848101266,
133
- "grad_norm": 1.2026784420013428,
134
- "learning_rate": 1.1452991452991454e-05,
135
- "loss": 0.1585,
136
- "step": 500
137
- },
138
- {
139
- "epoch": 12.987341772151899,
140
- "eval_bleu": 0.7909,
141
- "eval_gen_len": 20.307,
142
- "eval_loss": 4.844118595123291,
143
- "eval_runtime": 475.2982,
144
- "eval_samples_per_second": 25.578,
145
- "eval_steps_per_second": 0.799,
146
- "step": 513
147
- },
148
- {
149
- "epoch": 14.0,
150
- "eval_bleu": 0.8004,
151
- "eval_gen_len": 20.3038,
152
- "eval_loss": 4.876136302947998,
153
- "eval_runtime": 475.2478,
154
- "eval_samples_per_second": 25.58,
155
- "eval_steps_per_second": 0.8,
156
- "step": 553
157
- },
158
- {
159
- "epoch": 14.987341772151899,
160
- "eval_bleu": 0.7823,
161
- "eval_gen_len": 20.2747,
162
- "eval_loss": 4.92393684387207,
163
- "eval_runtime": 476.8033,
164
- "eval_samples_per_second": 25.497,
165
- "eval_steps_per_second": 0.797,
166
- "step": 592
167
- },
168
- {
169
- "epoch": 16.0,
170
- "eval_bleu": 0.79,
171
- "eval_gen_len": 20.226,
172
- "eval_loss": 4.880424976348877,
173
- "eval_runtime": 474.5946,
174
- "eval_samples_per_second": 25.616,
175
- "eval_steps_per_second": 0.801,
176
- "step": 632
177
- },
178
- {
179
- "epoch": 16.9873417721519,
180
- "eval_bleu": 0.7821,
181
- "eval_gen_len": 20.3653,
182
- "eval_loss": 4.9470534324646,
183
- "eval_runtime": 477.2511,
184
- "eval_samples_per_second": 25.473,
185
- "eval_steps_per_second": 0.796,
186
- "step": 671
187
- },
188
- {
189
- "epoch": 18.0,
190
- "eval_bleu": 0.7876,
191
- "eval_gen_len": 20.3285,
192
- "eval_loss": 4.9639482498168945,
193
- "eval_runtime": 477.3388,
194
- "eval_samples_per_second": 25.468,
195
- "eval_steps_per_second": 0.796,
196
- "step": 711
197
- },
198
- {
199
- "epoch": 18.9873417721519,
200
- "eval_bleu": 0.7895,
201
- "eval_gen_len": 20.3364,
202
- "eval_loss": 4.970686912536621,
203
- "eval_runtime": 476.4474,
204
- "eval_samples_per_second": 25.516,
205
- "eval_steps_per_second": 0.798,
206
- "step": 750
207
- },
208
- {
209
- "epoch": 20.0,
210
- "eval_bleu": 0.784,
211
- "eval_gen_len": 20.3641,
212
- "eval_loss": 4.984948635101318,
213
- "eval_runtime": 477.378,
214
- "eval_samples_per_second": 25.466,
215
- "eval_steps_per_second": 0.796,
216
- "step": 790
217
- },
218
- {
219
- "epoch": 20.9873417721519,
220
- "eval_bleu": 0.7838,
221
- "eval_gen_len": 20.3852,
222
- "eval_loss": 5.011585712432861,
223
- "eval_runtime": 477.6336,
224
- "eval_samples_per_second": 25.453,
225
- "eval_steps_per_second": 0.796,
226
- "step": 829
227
- },
228
- {
229
- "epoch": 22.0,
230
- "eval_bleu": 0.7874,
231
- "eval_gen_len": 20.3104,
232
- "eval_loss": 5.026498794555664,
233
- "eval_runtime": 477.8381,
234
- "eval_samples_per_second": 25.442,
235
- "eval_steps_per_second": 0.795,
236
- "step": 869
237
- },
238
- {
239
- "epoch": 22.9873417721519,
240
- "eval_bleu": 0.7886,
241
- "eval_gen_len": 20.3484,
242
- "eval_loss": 5.026541233062744,
243
- "eval_runtime": 478.7469,
244
- "eval_samples_per_second": 25.393,
245
- "eval_steps_per_second": 0.794,
246
- "step": 908
247
- },
248
- {
249
- "epoch": 24.0,
250
- "eval_bleu": 0.7833,
251
- "eval_gen_len": 20.3558,
252
- "eval_loss": 5.028345584869385,
253
- "eval_runtime": 476.6444,
254
- "eval_samples_per_second": 25.505,
255
- "eval_steps_per_second": 0.797,
256
- "step": 948
257
- },
258
- {
259
- "epoch": 24.9873417721519,
260
- "eval_bleu": 0.7835,
261
- "eval_gen_len": 20.313,
262
- "eval_loss": 5.029706001281738,
263
- "eval_runtime": 478.6376,
264
- "eval_samples_per_second": 25.399,
265
- "eval_steps_per_second": 0.794,
266
- "step": 987
267
- },
268
- {
269
- "epoch": 25.31645569620253,
270
- "grad_norm": 0.5030060410499573,
271
- "learning_rate": 2.9059829059829063e-06,
272
- "loss": 0.0263,
273
- "step": 1000
274
- },
275
- {
276
- "epoch": 26.0,
277
- "eval_bleu": 0.7812,
278
- "eval_gen_len": 20.3503,
279
- "eval_loss": 5.040848731994629,
280
- "eval_runtime": 478.615,
281
- "eval_samples_per_second": 25.4,
282
- "eval_steps_per_second": 0.794,
283
- "step": 1027
284
  }
285
  ],
286
  "logging_steps": 500,
287
- "max_steps": 1170,
288
  "num_input_tokens_seen": 0,
289
- "num_train_epochs": 30,
290
  "save_steps": 1000,
291
  "stateful_callbacks": {
292
  "TrainerControl": {
@@ -295,12 +121,12 @@
295
  "should_evaluate": false,
296
  "should_log": false,
297
  "should_save": true,
298
- "should_training_stop": false
299
  },
300
  "attributes": {}
301
  }
302
  },
303
- "total_flos": 346447645507584.0,
304
  "train_batch_size": 32,
305
  "trial_name": null,
306
  "trial_params": null
 
1
  {
2
+ "best_metric": 3.7421457767486572,
3
+ "best_model_checkpoint": "en-to-lg/checkpoint-16",
4
+ "epoch": 10.0,
5
  "eval_steps": 50,
6
+ "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_bleu": 0.8596,
14
+ "eval_gen_len": 20.4303,
15
+ "eval_loss": 3.7421457767486572,
16
+ "eval_runtime": 486.1664,
17
+ "eval_samples_per_second": 25.006,
18
+ "eval_steps_per_second": 0.782,
19
+ "step": 16
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_bleu": 0.855,
24
+ "eval_gen_len": 20.5137,
25
+ "eval_loss": 3.800355911254883,
26
+ "eval_runtime": 489.6809,
27
+ "eval_samples_per_second": 24.826,
28
+ "eval_steps_per_second": 0.776,
29
+ "step": 32
30
  },
31
  {
32
+ "epoch": 3.0,
33
+ "eval_bleu": 0.8413,
34
+ "eval_gen_len": 20.4,
35
+ "eval_loss": 3.806553602218628,
36
+ "eval_runtime": 491.5388,
37
+ "eval_samples_per_second": 24.733,
38
+ "eval_steps_per_second": 0.773,
39
+ "step": 48
40
  },
41
  {
42
  "epoch": 4.0,
43
+ "eval_bleu": 0.8455,
44
+ "eval_gen_len": 20.3859,
45
+ "eval_loss": 3.864736557006836,
46
+ "eval_runtime": 490.916,
47
+ "eval_samples_per_second": 24.764,
48
+ "eval_steps_per_second": 0.774,
49
+ "step": 64
50
  },
51
  {
52
+ "epoch": 5.0,
53
+ "eval_bleu": 0.8527,
54
+ "eval_gen_len": 20.3211,
55
+ "eval_loss": 3.916616916656494,
56
+ "eval_runtime": 490.8429,
57
+ "eval_samples_per_second": 24.768,
58
+ "eval_steps_per_second": 0.774,
59
+ "step": 80
60
  },
61
  {
62
  "epoch": 6.0,
63
+ "eval_bleu": 0.8539,
64
+ "eval_gen_len": 20.3088,
65
+ "eval_loss": 3.9686362743377686,
66
+ "eval_runtime": 492.3847,
67
+ "eval_samples_per_second": 24.69,
68
+ "eval_steps_per_second": 0.772,
69
+ "step": 96
70
  },
71
  {
72
+ "epoch": 7.0,
73
+ "eval_bleu": 0.8492,
74
+ "eval_gen_len": 20.3507,
75
+ "eval_loss": 3.9955193996429443,
76
+ "eval_runtime": 494.2016,
77
+ "eval_samples_per_second": 24.599,
78
+ "eval_steps_per_second": 0.769,
79
+ "step": 112
80
  },
81
  {
82
  "epoch": 8.0,
83
+ "eval_bleu": 0.8387,
84
+ "eval_gen_len": 20.3404,
85
+ "eval_loss": 4.008735179901123,
86
+ "eval_runtime": 491.9673,
87
+ "eval_samples_per_second": 24.711,
88
+ "eval_steps_per_second": 0.772,
89
+ "step": 128
90
  },
91
  {
92
+ "epoch": 9.0,
93
+ "eval_bleu": 0.8345,
94
+ "eval_gen_len": 20.3244,
95
+ "eval_loss": 4.021594047546387,
96
+ "eval_runtime": 491.5119,
97
+ "eval_samples_per_second": 24.734,
98
+ "eval_steps_per_second": 0.773,
99
+ "step": 144
100
  },
101
  {
102
  "epoch": 10.0,
103
+ "eval_bleu": 0.8431,
104
+ "eval_gen_len": 20.3462,
105
+ "eval_loss": 4.02959680557251,
106
+ "eval_runtime": 491.997,
107
+ "eval_samples_per_second": 24.71,
108
+ "eval_steps_per_second": 0.772,
109
+ "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  }
111
  ],
112
  "logging_steps": 500,
113
+ "max_steps": 160,
114
  "num_input_tokens_seen": 0,
115
+ "num_train_epochs": 10,
116
  "save_steps": 1000,
117
  "stateful_callbacks": {
118
  "TrainerControl": {
 
121
  "should_evaluate": false,
122
  "should_log": false,
123
  "should_save": true,
124
+ "should_training_stop": true
125
  },
126
  "attributes": {}
127
  }
128
  },
129
+ "total_flos": 53351789101056.0,
130
  "train_batch_size": 32,
131
  "trial_name": null,
132
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:308614a7c1143402fb2b7f47f4e5b67e0ad8de478d93377850387da73b823bf1
3
- size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9395e59bed2f6384e0c693e2944381623a36a75b946cce5bfdab8eebcecec7
3
+ size 5240