lz039 commited on
Commit
55035f3
·
verified ·
1 Parent(s): 7171347

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model-de-bg-checkpoint/target.spm filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  model-de-bg-checkpoint/target.spm filter=lfs diff=lfs merge=lfs -text
37
+ target.spm filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-de-bg",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 61109
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 61109,
25
+ "decoder_vocab_size": 61110,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 61110,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 61109,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.44.2",
59
+ "use_cache": true,
60
+ "vocab_size": 61110
61
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 61109
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 61109,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 61109,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.44.2"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760f9009e819cdb38d7d46707a080987de06a20f71367263c023abbec4eec0a8
3
+ size 301980760
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdda2f75090a1fa6a2183cc5bbf5d356776970ca21e20aa147053a61b48f34d5
3
+ size 603626106
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75bdecce60610b3bcd1d97c52de3f414a5305efa9e61c49a11d3e65936d2668
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc0f3a8a99bc754b73e55fd43baa7d572c046f5860346dbf5e6521b29bd9dff
3
+ size 1064
source.spm ADDED
Binary file (828 kB). View file
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8276037bb04a891dc44eb7554c993477aed83be87c462e7173389dfe2c0d319b
3
+ size 1014706
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "61109": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "separate_vocabs": false,
33
+ "source_lang": "deu",
34
+ "sp_model_kwargs": {},
35
+ "target_lang": "bul",
36
+ "tokenizer_class": "MarianTokenizer",
37
+ "unk_token": "<unk>"
38
+ }
trainer_state.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
+ "eval_steps": 500,
6
+ "global_step": 7280,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5494505494505495,
13
+ "grad_norm": 7.208542823791504,
14
+ "learning_rate": 1.862637362637363e-05,
15
+ "loss": 1.4229,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_bleu": 45.1493,
21
+ "eval_gen_len": 21.1512,
22
+ "eval_loss": 0.9880260229110718,
23
+ "eval_runtime": 159.8583,
24
+ "eval_samples_per_second": 11.379,
25
+ "eval_steps_per_second": 0.713,
26
+ "step": 910
27
+ },
28
+ {
29
+ "epoch": 1.098901098901099,
30
+ "grad_norm": 5.381701469421387,
31
+ "learning_rate": 1.7252747252747256e-05,
32
+ "loss": 1.1197,
33
+ "step": 1000
34
+ },
35
+ {
36
+ "epoch": 1.6483516483516483,
37
+ "grad_norm": 4.710964679718018,
38
+ "learning_rate": 1.587912087912088e-05,
39
+ "loss": 0.9205,
40
+ "step": 1500
41
+ },
42
+ {
43
+ "epoch": 2.0,
44
+ "eval_bleu": 48.9448,
45
+ "eval_gen_len": 21.4244,
46
+ "eval_loss": 0.8794811367988586,
47
+ "eval_runtime": 164.115,
48
+ "eval_samples_per_second": 11.084,
49
+ "eval_steps_per_second": 0.695,
50
+ "step": 1820
51
+ },
52
+ {
53
+ "epoch": 2.197802197802198,
54
+ "grad_norm": 3.468052864074707,
55
+ "learning_rate": 1.4505494505494506e-05,
56
+ "loss": 0.8318,
57
+ "step": 2000
58
+ },
59
+ {
60
+ "epoch": 2.7472527472527473,
61
+ "grad_norm": 3.9397318363189697,
62
+ "learning_rate": 1.3131868131868134e-05,
63
+ "loss": 0.7509,
64
+ "step": 2500
65
+ },
66
+ {
67
+ "epoch": 3.0,
68
+ "eval_bleu": 50.7507,
69
+ "eval_gen_len": 21.4739,
70
+ "eval_loss": 0.83347487449646,
71
+ "eval_runtime": 167.1805,
72
+ "eval_samples_per_second": 10.88,
73
+ "eval_steps_per_second": 0.682,
74
+ "step": 2730
75
+ },
76
+ {
77
+ "epoch": 3.2967032967032965,
78
+ "grad_norm": 4.654043197631836,
79
+ "learning_rate": 1.1758241758241759e-05,
80
+ "loss": 0.7075,
81
+ "step": 3000
82
+ },
83
+ {
84
+ "epoch": 3.8461538461538463,
85
+ "grad_norm": 3.244563341140747,
86
+ "learning_rate": 1.0384615384615386e-05,
87
+ "loss": 0.6522,
88
+ "step": 3500
89
+ },
90
+ {
91
+ "epoch": 4.0,
92
+ "eval_bleu": 51.8451,
93
+ "eval_gen_len": 21.4134,
94
+ "eval_loss": 0.8046153783798218,
95
+ "eval_runtime": 167.5891,
96
+ "eval_samples_per_second": 10.854,
97
+ "eval_steps_per_second": 0.68,
98
+ "step": 3640
99
+ },
100
+ {
101
+ "epoch": 4.395604395604396,
102
+ "grad_norm": 4.202299118041992,
103
+ "learning_rate": 9.010989010989011e-06,
104
+ "loss": 0.6097,
105
+ "step": 4000
106
+ },
107
+ {
108
+ "epoch": 4.945054945054945,
109
+ "grad_norm": 6.508543968200684,
110
+ "learning_rate": 7.637362637362638e-06,
111
+ "loss": 0.5877,
112
+ "step": 4500
113
+ },
114
+ {
115
+ "epoch": 5.0,
116
+ "eval_bleu": 52.489,
117
+ "eval_gen_len": 21.453,
118
+ "eval_loss": 0.7915568947792053,
119
+ "eval_runtime": 170.447,
120
+ "eval_samples_per_second": 10.672,
121
+ "eval_steps_per_second": 0.669,
122
+ "step": 4550
123
+ },
124
+ {
125
+ "epoch": 5.4945054945054945,
126
+ "grad_norm": 4.693784236907959,
127
+ "learning_rate": 6.2637362637362645e-06,
128
+ "loss": 0.5454,
129
+ "step": 5000
130
+ },
131
+ {
132
+ "epoch": 6.0,
133
+ "eval_bleu": 53.267,
134
+ "eval_gen_len": 21.4063,
135
+ "eval_loss": 0.7819094657897949,
136
+ "eval_runtime": 166.0221,
137
+ "eval_samples_per_second": 10.956,
138
+ "eval_steps_per_second": 0.687,
139
+ "step": 5460
140
+ },
141
+ {
142
+ "epoch": 6.043956043956044,
143
+ "grad_norm": 3.2858574390411377,
144
+ "learning_rate": 4.890109890109891e-06,
145
+ "loss": 0.5319,
146
+ "step": 5500
147
+ },
148
+ {
149
+ "epoch": 6.593406593406593,
150
+ "grad_norm": 4.065381050109863,
151
+ "learning_rate": 3.516483516483517e-06,
152
+ "loss": 0.5039,
153
+ "step": 6000
154
+ },
155
+ {
156
+ "epoch": 7.0,
157
+ "eval_bleu": 53.3718,
158
+ "eval_gen_len": 21.5014,
159
+ "eval_loss": 0.7780753970146179,
160
+ "eval_runtime": 172.3653,
161
+ "eval_samples_per_second": 10.553,
162
+ "eval_steps_per_second": 0.661,
163
+ "step": 6370
164
+ },
165
+ {
166
+ "epoch": 7.142857142857143,
167
+ "grad_norm": 3.438549280166626,
168
+ "learning_rate": 2.1428571428571427e-06,
169
+ "loss": 0.5018,
170
+ "step": 6500
171
+ },
172
+ {
173
+ "epoch": 7.6923076923076925,
174
+ "grad_norm": 4.464223384857178,
175
+ "learning_rate": 7.692307692307694e-07,
176
+ "loss": 0.4847,
177
+ "step": 7000
178
+ }
179
+ ],
180
+ "logging_steps": 500,
181
+ "max_steps": 7280,
182
+ "num_input_tokens_seen": 0,
183
+ "num_train_epochs": 8,
184
+ "save_steps": 500,
185
+ "stateful_callbacks": {
186
+ "TrainerControl": {
187
+ "args": {
188
+ "should_epoch_stop": false,
189
+ "should_evaluate": false,
190
+ "should_log": false,
191
+ "should_save": true,
192
+ "should_training_stop": true
193
+ },
194
+ "attributes": {}
195
+ }
196
+ },
197
+ "total_flos": 2444491603574784.0,
198
+ "train_batch_size": 16,
199
+ "trial_name": null,
200
+ "trial_params": null
201
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20449154fe68ba8250b81371c6832942b4bdd0b93d2c344989c41f78532d92ce
3
+ size 5368
vocab.json ADDED
The diff for this file is too large to render. See raw diff