Mel-Iza0 commited on
Commit
bc3f42b
·
1 Parent(s): 67a9873

Training in progress, epoch 4, checkpoint

Browse files
checkpoint-13546/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: True
29
+ - bnb_4bit_compute_dtype: bfloat16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.4.0
33
+
34
+ - PEFT 0.4.0
checkpoint-13546/adapter_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_projo_proj"
20
+ ],
21
+ "task_type": "CAUSAL_LM"
22
+ }
checkpoint-13546/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6295225def52ea99d44f746af4683cbdba11aa3ca9d3400ea576805e28858b
3
+ size 13677261
checkpoint-13546/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c3a392347e9f30d15162edc53b2fb9ef18be158c100090cda7bb2352f4381f9
3
+ size 13648432
checkpoint-13546/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0f5fae95ea0c23a4f8d956287e5853b22d03766123e215ba803940f6820cc1
3
+ size 27370181
checkpoint-13546/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064c5ce4ebf53d0db14e4cc2631af8e522f6bbe8bc7635d3f4f1f5608abd4012
3
+ size 14575
checkpoint-13546/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f952043f150812cd5e62e79be842309502cd8c2893aebd0918819a8caadece76
3
+ size 627
checkpoint-13546/special_tokens_map.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<unk>",
4
+ "<s>",
5
+ "</s>"
6
+ ],
7
+ "bos_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "pad_token": "</s>",
10
+ "unk_token": "<unk>"
11
+ }
checkpoint-13546/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-13546/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [
29
+ "<unk>",
30
+ "<s>",
31
+ "</s>"
32
+ ],
33
+ "bos_token": "<s>",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": true,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": true
44
+ }
checkpoint-13546/trainer_state.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8322489857673645,
3
+ "best_model_checkpoint": "./Zeroshot/01-12-23-mistralai-Mistral-7B-v0.1_multilang-dataset-3.0.3-portuguese-2_epochs-10_batch_3/checkpoints/checkpoint-13546",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 13546,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.15,
13
+ "learning_rate": 5.835794447725931e-05,
14
+ "loss": 1.4468,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.3,
19
+ "learning_rate": 0.00011742468989958655,
20
+ "loss": 0.9754,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.44,
25
+ "learning_rate": 0.00017649143532191377,
26
+ "loss": 0.9429,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.59,
31
+ "learning_rate": 0.000235558180744241,
32
+ "loss": 0.9147,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.74,
37
+ "learning_rate": 0.00029462492616656825,
38
+ "loss": 0.9067,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.89,
43
+ "learning_rate": 0.00035369167158889544,
44
+ "loss": 0.8978,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 1.0,
49
+ "eval_loss": 0.8805813789367676,
50
+ "eval_runtime": 88.2945,
51
+ "eval_samples_per_second": 17.057,
52
+ "eval_steps_per_second": 2.141,
53
+ "step": 3386
54
+ },
55
+ {
56
+ "epoch": 1.03,
57
+ "learning_rate": 0.00039998760393503537,
58
+ "loss": 0.895,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 1.18,
63
+ "learning_rate": 0.0003996072594095129,
64
+ "loss": 0.8687,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 1.33,
69
+ "learning_rate": 0.00039869668890858337,
70
+ "loss": 0.8884,
71
+ "step": 4500
72
+ },
73
+ {
74
+ "epoch": 1.48,
75
+ "learning_rate": 0.00039725831122269285,
76
+ "loss": 0.8715,
77
+ "step": 5000
78
+ },
79
+ {
80
+ "epoch": 1.62,
81
+ "learning_rate": 0.00039529594718087214,
82
+ "loss": 0.8645,
83
+ "step": 5500
84
+ },
85
+ {
86
+ "epoch": 1.77,
87
+ "learning_rate": 0.0003928148095012922,
88
+ "loss": 0.8666,
89
+ "step": 6000
90
+ },
91
+ {
92
+ "epoch": 1.92,
93
+ "learning_rate": 0.0003898214889444803,
94
+ "loss": 0.8719,
95
+ "step": 6500
96
+ },
97
+ {
98
+ "epoch": 2.0,
99
+ "eval_loss": 0.8552550673484802,
100
+ "eval_runtime": 88.3232,
101
+ "eval_samples_per_second": 17.051,
102
+ "eval_steps_per_second": 2.14,
103
+ "step": 6773
104
+ },
105
+ {
106
+ "epoch": 2.07,
107
+ "learning_rate": 0.00038632393680597854,
108
+ "loss": 0.8438,
109
+ "step": 7000
110
+ },
111
+ {
112
+ "epoch": 2.21,
113
+ "learning_rate": 0.0003823314437949511,
114
+ "loss": 0.8308,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 2.36,
119
+ "learning_rate": 0.00037785461535484375,
120
+ "loss": 0.8259,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 2.51,
125
+ "learning_rate": 0.0003729053434916558,
126
+ "loss": 0.8324,
127
+ "step": 8500
128
+ },
129
+ {
130
+ "epoch": 2.66,
131
+ "learning_rate": 0.0003674967751846552,
132
+ "loss": 0.8413,
133
+ "step": 9000
134
+ },
135
+ {
136
+ "epoch": 2.81,
137
+ "learning_rate": 0.0003616554183563445,
138
+ "loss": 0.8322,
139
+ "step": 9500
140
+ },
141
+ {
142
+ "epoch": 2.95,
143
+ "learning_rate": 0.00035537338261496887,
144
+ "loss": 0.8368,
145
+ "step": 10000
146
+ },
147
+ {
148
+ "epoch": 3.0,
149
+ "eval_loss": 0.8443310260772705,
150
+ "eval_runtime": 88.3102,
151
+ "eval_samples_per_second": 17.054,
152
+ "eval_steps_per_second": 2.14,
153
+ "step": 10159
154
+ },
155
+ {
156
+ "epoch": 3.1,
157
+ "learning_rate": 0.0003486786213865893,
158
+ "loss": 0.8088,
159
+ "step": 10500
160
+ },
161
+ {
162
+ "epoch": 3.25,
163
+ "learning_rate": 0.0003415889182744321,
164
+ "loss": 0.8003,
165
+ "step": 11000
166
+ },
167
+ {
168
+ "epoch": 3.4,
169
+ "learning_rate": 0.0003341231059840768,
170
+ "loss": 0.805,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 3.54,
175
+ "learning_rate": 0.0003263010162972709,
176
+ "loss": 0.8061,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 3.69,
181
+ "learning_rate": 0.00031814342739185336,
182
+ "loss": 0.8008,
183
+ "step": 12500
184
+ },
185
+ {
186
+ "epoch": 3.84,
187
+ "learning_rate": 0.000309672008647721,
188
+ "loss": 0.8029,
189
+ "step": 13000
190
+ },
191
+ {
192
+ "epoch": 3.99,
193
+ "learning_rate": 0.00030090926308545536,
194
+ "loss": 0.8056,
195
+ "step": 13500
196
+ },
197
+ {
198
+ "epoch": 4.0,
199
+ "eval_loss": 0.8322489857673645,
200
+ "eval_runtime": 88.3294,
201
+ "eval_samples_per_second": 17.05,
202
+ "eval_steps_per_second": 2.14,
203
+ "step": 13546
204
+ }
205
+ ],
206
+ "logging_steps": 500,
207
+ "max_steps": 33860,
208
+ "num_train_epochs": 10,
209
+ "save_steps": 500,
210
+ "total_flos": 6.802174422960538e+17,
211
+ "trial_name": null,
212
+ "trial_params": null
213
+ }
checkpoint-13546/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ba9992e8be82ca13275abd7c4d38c76e2e922e71387553242d20e094340bc1
3
+ size 4347