Mel-Iza0 commited on
Commit
9921373
·
1 Parent(s): d7e94f9

Training in progress, epoch 4, checkpoint

Browse files
checkpoint-16932/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: True
29
+ - bnb_4bit_compute_dtype: bfloat16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.4.0
33
+
34
+ - PEFT 0.4.0
checkpoint-16932/adapter_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_projo_proj"
20
+ ],
21
+ "task_type": "CAUSAL_LM"
22
+ }
checkpoint-16932/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad25a7eb9cb72fbbe9d3edf00e78584870bb9d937630c0da28792dbbbba6ab7a
3
+ size 13677261
checkpoint-16932/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d15b53270b9b62da1821794aae28d51abba7588b4682e862f81c75a8307c93ae
3
+ size 13648432
checkpoint-16932/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:353ed746cb298288d34d9195e72babee57facb5b7767a4b38d57063bfb4a4a07
3
+ size 27370181
checkpoint-16932/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5c2cdbf773ee990ad0d518e03aaae819c64bb772e74572b22503fbd7aec7ff5
3
+ size 14575
checkpoint-16932/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce9e342ddf59a6fb9d1a6d60ac34229635882ae60c7b7312aa42575f54c44b0e
3
+ size 627
checkpoint-16932/special_tokens_map.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<unk>",
4
+ "<s>",
5
+ "</s>"
6
+ ],
7
+ "bos_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "pad_token": "</s>",
10
+ "unk_token": "<unk>"
11
+ }
checkpoint-16932/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-16932/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [
29
+ "<unk>",
30
+ "<s>",
31
+ "</s>"
32
+ ],
33
+ "bos_token": "<s>",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": true,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": true
44
+ }
checkpoint-16932/trainer_state.json ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8134331107139587,
3
+ "best_model_checkpoint": "./Zeroshot/01-12-23-mistralai-Mistral-7B-v0.1_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_3/checkpoints/checkpoint-16932",
4
+ "epoch": 4.999852354938727,
5
+ "eval_steps": 500,
6
+ "global_step": 16932,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.15,
13
+ "learning_rate": 5.835794447725931e-05,
14
+ "loss": 1.4468,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.3,
19
+ "learning_rate": 0.00011742468989958655,
20
+ "loss": 0.9754,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.44,
25
+ "learning_rate": 0.00017649143532191377,
26
+ "loss": 0.9429,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.59,
31
+ "learning_rate": 0.000235558180744241,
32
+ "loss": 0.9147,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.74,
37
+ "learning_rate": 0.00029462492616656825,
38
+ "loss": 0.9067,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.89,
43
+ "learning_rate": 0.00035369167158889544,
44
+ "loss": 0.8978,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 1.0,
49
+ "eval_loss": 0.8805813789367676,
50
+ "eval_runtime": 88.2945,
51
+ "eval_samples_per_second": 17.057,
52
+ "eval_steps_per_second": 2.141,
53
+ "step": 3386
54
+ },
55
+ {
56
+ "epoch": 1.03,
57
+ "learning_rate": 0.00039998760393503537,
58
+ "loss": 0.895,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 1.18,
63
+ "learning_rate": 0.0003996072594095129,
64
+ "loss": 0.8687,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 1.33,
69
+ "learning_rate": 0.00039869668890858337,
70
+ "loss": 0.8884,
71
+ "step": 4500
72
+ },
73
+ {
74
+ "epoch": 1.48,
75
+ "learning_rate": 0.00039725831122269285,
76
+ "loss": 0.8715,
77
+ "step": 5000
78
+ },
79
+ {
80
+ "epoch": 1.62,
81
+ "learning_rate": 0.00039529594718087214,
82
+ "loss": 0.8645,
83
+ "step": 5500
84
+ },
85
+ {
86
+ "epoch": 1.77,
87
+ "learning_rate": 0.0003928148095012922,
88
+ "loss": 0.8666,
89
+ "step": 6000
90
+ },
91
+ {
92
+ "epoch": 1.92,
93
+ "learning_rate": 0.0003898214889444803,
94
+ "loss": 0.8719,
95
+ "step": 6500
96
+ },
97
+ {
98
+ "epoch": 2.0,
99
+ "eval_loss": 0.8552550673484802,
100
+ "eval_runtime": 88.3232,
101
+ "eval_samples_per_second": 17.051,
102
+ "eval_steps_per_second": 2.14,
103
+ "step": 6773
104
+ },
105
+ {
106
+ "epoch": 2.07,
107
+ "learning_rate": 0.00038632393680597854,
108
+ "loss": 0.8438,
109
+ "step": 7000
110
+ },
111
+ {
112
+ "epoch": 2.21,
113
+ "learning_rate": 0.0003823314437949511,
114
+ "loss": 0.8308,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 2.36,
119
+ "learning_rate": 0.00037785461535484375,
120
+ "loss": 0.8259,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 2.51,
125
+ "learning_rate": 0.0003729053434916558,
126
+ "loss": 0.8324,
127
+ "step": 8500
128
+ },
129
+ {
130
+ "epoch": 2.66,
131
+ "learning_rate": 0.0003674967751846552,
132
+ "loss": 0.8413,
133
+ "step": 9000
134
+ },
135
+ {
136
+ "epoch": 2.81,
137
+ "learning_rate": 0.0003616554183563445,
138
+ "loss": 0.8322,
139
+ "step": 9500
140
+ },
141
+ {
142
+ "epoch": 2.95,
143
+ "learning_rate": 0.00035537338261496887,
144
+ "loss": 0.8368,
145
+ "step": 10000
146
+ },
147
+ {
148
+ "epoch": 3.0,
149
+ "eval_loss": 0.8443310260772705,
150
+ "eval_runtime": 88.3102,
151
+ "eval_samples_per_second": 17.054,
152
+ "eval_steps_per_second": 2.14,
153
+ "step": 10159
154
+ },
155
+ {
156
+ "epoch": 3.1,
157
+ "learning_rate": 0.0003486786213865893,
158
+ "loss": 0.8088,
159
+ "step": 10500
160
+ },
161
+ {
162
+ "epoch": 3.25,
163
+ "learning_rate": 0.0003415889182744321,
164
+ "loss": 0.8003,
165
+ "step": 11000
166
+ },
167
+ {
168
+ "epoch": 3.4,
169
+ "learning_rate": 0.0003341231059840768,
170
+ "loss": 0.805,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 3.54,
175
+ "learning_rate": 0.0003263010162972709,
176
+ "loss": 0.8061,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 3.69,
181
+ "learning_rate": 0.00031814342739185336,
182
+ "loss": 0.8008,
183
+ "step": 12500
184
+ },
185
+ {
186
+ "epoch": 3.84,
187
+ "learning_rate": 0.000309672008647721,
188
+ "loss": 0.8029,
189
+ "step": 13000
190
+ },
191
+ {
192
+ "epoch": 3.99,
193
+ "learning_rate": 0.00030090926308545536,
194
+ "loss": 0.8056,
195
+ "step": 13500
196
+ },
197
+ {
198
+ "epoch": 4.0,
199
+ "eval_loss": 0.8322489857673645,
200
+ "eval_runtime": 88.3294,
201
+ "eval_samples_per_second": 17.05,
202
+ "eval_steps_per_second": 2.14,
203
+ "step": 13546
204
+ },
205
+ {
206
+ "epoch": 4.13,
207
+ "learning_rate": 0.00029187846759051,
208
+ "loss": 0.7649,
209
+ "step": 14000
210
+ },
211
+ {
212
+ "epoch": 4.28,
213
+ "learning_rate": 0.00028260361108174584,
214
+ "loss": 0.7674,
215
+ "step": 14500
216
+ },
217
+ {
218
+ "epoch": 4.43,
219
+ "learning_rate": 0.0002731093307885585,
220
+ "loss": 0.7635,
221
+ "step": 15000
222
+ },
223
+ {
224
+ "epoch": 4.58,
225
+ "learning_rate": 0.0002634208468058692,
226
+ "loss": 0.7759,
227
+ "step": 15500
228
+ },
229
+ {
230
+ "epoch": 4.72,
231
+ "learning_rate": 0.000253563895100822,
232
+ "loss": 0.7669,
233
+ "step": 16000
234
+ },
235
+ {
236
+ "epoch": 4.87,
237
+ "learning_rate": 0.0002435847820221107,
238
+ "loss": 0.7708,
239
+ "step": 16500
240
+ },
241
+ {
242
+ "epoch": 5.0,
243
+ "eval_loss": 0.8134331107139587,
244
+ "eval_runtime": 88.3002,
245
+ "eval_samples_per_second": 17.055,
246
+ "eval_steps_per_second": 2.14,
247
+ "step": 16932
248
+ }
249
+ ],
250
+ "logging_steps": 500,
251
+ "max_steps": 33860,
252
+ "num_train_epochs": 10,
253
+ "save_steps": 500,
254
+ "total_flos": 8.503491256748114e+17,
255
+ "trial_name": null,
256
+ "trial_params": null
257
+ }
checkpoint-16932/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ba9992e8be82ca13275abd7c4d38c76e2e922e71387553242d20e094340bc1
3
+ size 4347