ayjays132 commited on
Commit
95a8dd9
·
verified ·
1 Parent(s): cba3cd0

Upload 11 files

Browse files
config.json CHANGED
@@ -1,155 +1,154 @@
1
- {
2
- "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
- "activation_function": "gelu_new",
4
- "advanced_model_options": {
5
- "contextual_embeddings": {
6
- "approaches": [
7
- "contextual_attention_mechanisms",
8
- "semantic_embedding_regularization"
9
- ],
10
- "enable": true
11
- },
12
- "dynamic_adaptation": {
13
- "enable": true,
14
- "techniques": [
15
- "adaptive_layer_dropping",
16
- "dynamic_context_window"
17
- ]
18
- },
19
- "innovative_neuron_growth": {
20
- "enable": true,
21
- "strategies": [
22
- "selective_neuron_pruning",
23
- "progressive_neuron_expansion"
24
- ]
25
- },
26
- "memory_optimization": {
27
- "enable": true,
28
- "methods": [
29
- "gradient_checkpointing",
30
- "memory-efficient_attention"
31
- ]
32
- },
33
- "meta_learning": {
34
- "approaches": [
35
- "meta_learning_rate_adjustment",
36
- "online_adaptation"
37
- ],
38
- "enable": true
39
- },
40
- "secret_advanced_options": {
41
- "adaptive_token_embedding": {
42
- "enable": true,
43
- "strategies": [
44
- "dynamic_embedding_resizing",
45
- "contextual_embedding_scaling"
46
- ]
47
- },
48
- "future_context_prediction": {
49
- "enable": true,
50
- "techniques": [
51
- "lookahead_context_integration",
52
- "predictive_attention_mechanisms"
53
- ]
54
- },
55
- "multi_modal_integration": {
56
- "enable": true,
57
- "methods": [
58
- "text_image_alignment",
59
- "cross_modal_attention"
60
- ]
61
- }
62
- }
63
- },
64
- "architectures": [
65
- "GPT2LMHeadModel"
66
- ],
67
- "max_length": 512,
68
- "min_length": 50,
69
- "num_beams": 5,
70
- "length_penalty": 1.0,
71
- "no_repeat_ngram_size": 2,
72
- "early_stopping": true,
73
- "attn_pdrop": 0.1,
74
- "bos_token_id": 50256,
75
- "context_window": 20,
76
- "contextual_embedding_dim": 1024,
77
- "device": "cuda",
78
- "dropout_rate": 0.1,
79
- "embd_pdrop": 0.1,
80
- "embedding_dim": 1024,
81
- "eos_token_id": 50256,
82
- "hidden_dim": 1024,
83
- "initializer_range": 0.02,
84
- "innovative_growth_capacity": 50000,
85
- "integration_settings": {
86
- "config_name": "config.json",
87
- "load_from_transformers": true,
88
- "pytorch_dump_folder_path": "./model_save",
89
- "pytorch_model_bin_name": "pytorch_model.bin"
90
- },
91
- "layer_norm_epsilon": 1e-05,
92
- "max_memory_size": 100000,
93
- "max_neurons": 100,
94
- "meta_learning_rate": 0.001,
95
- "model_type": "gpt2",
96
- "n_ctx": 1024,
97
- "n_embd": 1024,
98
- "n_head": 16,
99
- "n_inner": null,
100
- "n_layer": 24,
101
- "n_positions": 1024,
102
- "num_embeddings": 50268,
103
- "num_heads": 64,
104
- "num_layers": 24,
105
- "output_attentions": true,
106
- "output_hidden_states": true,
107
- "pad_token_id": 50256,
108
- "reorder_and_upcast_attn": false,
109
- "resid_pdrop": 0.1,
110
- "scale_attn_by_inverse_layer_idx": false,
111
- "scale_attn_weights": true,
112
- "sep_token_id": -1,
113
- "special_tokens": {
114
- "additional_special_tokens": [
115
- "<greeting>",
116
- "<farewell>",
117
- "<thank>",
118
- "<apology>"
119
- ],
120
- "bos_token": "<bos>",
121
- "cls_token": "<cls>",
122
- "eos_token": "<eos>",
123
- "mask_token": "<mask>",
124
- "pad_token": "<pad>",
125
- "sep_token": "<sep>",
126
- "unk_token": "<unk>"
127
- },
128
- "state_shape": null,
129
- "summary_activation": null,
130
- "summary_first_dropout": 0.1,
131
- "summary_proj_to_labels": true,
132
- "summary_type": "cls_index",
133
- "summary_use_proj": true,
134
- "target_q_model": null,
135
- "task_specific_params": {
136
- "text-generation": {
137
- "do_sample": true,
138
- "early_stopping": true,
139
- "length_penalty": 1.0,
140
- "max_length": 2048,
141
- "min_length": 64,
142
- "no_repeat_ngram_size": 2,
143
- "num_beams": 8,
144
- "num_return_sequences": 3,
145
- "repetition_penalty": 1.2,
146
- "temperature": 0.9,
147
- "top_k": 50,
148
- "top_p": 0.95
149
- }
150
- },
151
- "torch_dtype": "float32",
152
- "transformers_version": "4.28.0.dev0",
153
- "use_cache": true,
154
- "vocab_size": 50257
155
- }
 
1
+ {
2
+ "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
+ "activation_function": "gelu_new",
4
+ "advanced_model_options": {
5
+ "contextual_embeddings": {
6
+ "approaches": [
7
+ "contextual_attention_mechanisms",
8
+ "semantic_embedding_regularization"
9
+ ],
10
+ "enable": true
11
+ },
12
+ "dynamic_adaptation": {
13
+ "enable": true,
14
+ "techniques": [
15
+ "adaptive_layer_dropping",
16
+ "dynamic_context_window"
17
+ ]
18
+ },
19
+ "innovative_neuron_growth": {
20
+ "enable": true,
21
+ "strategies": [
22
+ "selective_neuron_pruning",
23
+ "progressive_neuron_expansion"
24
+ ]
25
+ },
26
+ "memory_optimization": {
27
+ "enable": true,
28
+ "methods": [
29
+ "gradient_checkpointing",
30
+ "memory-efficient_attention"
31
+ ]
32
+ },
33
+ "meta_learning": {
34
+ "approaches": [
35
+ "meta_learning_rate_adjustment",
36
+ "online_adaptation"
37
+ ],
38
+ "enable": true
39
+ },
40
+ "secret_advanced_options": {
41
+ "adaptive_token_embedding": {
42
+ "enable": true,
43
+ "strategies": [
44
+ "dynamic_embedding_resizing",
45
+ "contextual_embedding_scaling"
46
+ ]
47
+ },
48
+ "future_context_prediction": {
49
+ "enable": true,
50
+ "techniques": [
51
+ "lookahead_context_integration",
52
+ "predictive_attention_mechanisms"
53
+ ]
54
+ },
55
+ "multi_modal_integration": {
56
+ "enable": true,
57
+ "methods": [
58
+ "text_image_alignment",
59
+ "cross_modal_attention"
60
+ ]
61
+ }
62
+ }
63
+ },
64
+ "architectures": [
65
+ "GPT2LMHeadModel"
66
+ ],
67
+ "attn_pdrop": 0.1,
68
+ "bos_token_id": 50256,
69
+ "context_window": 20,
70
+ "contextual_embedding_dim": 1024,
71
+ "device": "cuda",
72
+ "dropout_rate": 0.1,
73
+ "early_stopping": true,
74
+ "embd_pdrop": 0.1,
75
+ "embedding_dim": 1024,
76
+ "eos_token_id": 50256,
77
+ "hidden_dim": 1024,
78
+ "initializer_range": 0.02,
79
+ "innovative_growth_capacity": 50000,
80
+ "integration_settings": {
81
+ "config_name": "config.json",
82
+ "load_from_transformers": true,
83
+ "pytorch_dump_folder_path": "./model_save",
84
+ "pytorch_model_bin_name": "pytorch_model.bin"
85
+ },
86
+ "layer_norm_epsilon": 1e-05,
87
+ "max_length": 512,
88
+ "max_memory_size": 100000,
89
+ "max_neurons": 100,
90
+ "meta_learning_rate": 0.001,
91
+ "min_length": 50,
92
+ "model_type": "gpt2",
93
+ "n_ctx": 1024,
94
+ "n_embd": 1024,
95
+ "n_head": 16,
96
+ "n_inner": null,
97
+ "n_layer": 24,
98
+ "n_positions": 1024,
99
+ "no_repeat_ngram_size": 2,
100
+ "num_beams": 5,
101
+ "num_embeddings": 50268,
102
+ "num_heads": 64,
103
+ "num_layers": 24,
104
+ "output_attentions": true,
105
+ "output_hidden_states": true,
106
+ "pad_token_id": 50256,
107
+ "reorder_and_upcast_attn": false,
108
+ "resid_pdrop": 0.1,
109
+ "scale_attn_by_inverse_layer_idx": false,
110
+ "scale_attn_weights": true,
111
+ "sep_token_id": -1,
112
+ "special_tokens": {
113
+ "additional_special_tokens": [
114
+ "<greeting>",
115
+ "<farewell>",
116
+ "<thank>",
117
+ "<apology>"
118
+ ],
119
+ "bos_token": "<bos>",
120
+ "cls_token": "<cls>",
121
+ "eos_token": "<eos>",
122
+ "mask_token": "<mask>",
123
+ "pad_token": "<pad>",
124
+ "sep_token": "<sep>",
125
+ "unk_token": "<unk>"
126
+ },
127
+ "state_shape": null,
128
+ "summary_activation": null,
129
+ "summary_first_dropout": 0.1,
130
+ "summary_proj_to_labels": true,
131
+ "summary_type": "cls_index",
132
+ "summary_use_proj": true,
133
+ "target_q_model": null,
134
+ "task_specific_params": {
135
+ "text-generation": {
136
+ "do_sample": true,
137
+ "early_stopping": true,
138
+ "length_penalty": 1.0,
139
+ "max_length": 2048,
140
+ "min_length": 64,
141
+ "no_repeat_ngram_size": 2,
142
+ "num_beams": 8,
143
+ "num_return_sequences": 3,
144
+ "repetition_penalty": 1.2,
145
+ "temperature": 0.9,
146
+ "top_k": 50,
147
+ "top_p": 0.95
148
+ }
149
+ },
150
+ "torch_dtype": "float32",
151
+ "transformers_version": "4.44.0",
152
+ "use_cache": true,
153
+ "vocab_size": 50257
154
+ }
 
generation_config.json CHANGED
@@ -1,22 +1,14 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 50258,
4
- "diversity_penalty": 1.0, // Increased to encourage diversity
5
- "do_sample": true,
6
- "early_stopping": false,
7
- "encoder_no_repeat_ngram_size": 3,
8
- "eos_token_id": 50259,
9
- "length_penalty": 2.0,
10
- "max_length": 2048,
11
- "min_length": 50,
12
- "no_repeat_ngram_size": 3,
13
- "num_beams": 3, // Decreased to encourage more variety
14
- "num_return_sequences": 5,
15
- "output_attentions": false,
16
- "output_hidden_states": false,
17
- "pad_token_id": 50260,
18
- "repetition_penalty": 1.2, // Slightly reduced to balance repetition
19
- "temperature": 0.8, // Increased for more variety
20
- "top_p": 0.85, // Adjusted to change the sampling method
21
- "transformers_version": "4.28.0.dev0"
22
- }
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "early_stopping": true,
5
+ "eos_token_id": 50256,
6
+ "max_length": 512,
7
+ "min_length": 50,
8
+ "no_repeat_ngram_size": 2,
9
+ "num_beams": 5,
10
+ "output_attentions": true,
11
+ "output_hidden_states": true,
12
+ "pad_token_id": 50256,
13
+ "transformers_version": "4.44.0"
14
+ }
 
 
 
 
 
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfbfe49a0df1bf0c68b96e533cfcac9860d224e6cd93987cefd99755aeed9293
3
- size 1650346720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bedad08b6a15276b927bbfe52f63c93bd8de121c0ed283143f52cb05a4034952
3
+ size 1419322880
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab539d99a780743646d9a767aa02600e44b3d55247a9d2645ddfbe5ddac77d90
3
- size 7950060608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec8bbad0dfcf56ce50d8403fead5d33759f8377b4ecc3586d48e4f70e1fa7130
3
+ size 2838829242
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e01b7f87643c2c0e2c54e0176baa248efa5b4d899d20eb6dccb59d8207cd98d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8346bdbc56406753b30e03fb8823387b71174c06848cd07d3ae27967e07a1236
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7a322f5bf45f9f9b8ad8738431158838d76971e674aed83470f4b2485f7b2d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd77b5f09d2be234b9c114ec17c3402cfa26c5f985967d53d9ecd811d60c62d
3
  size 1064
special_tokens_map.json CHANGED
@@ -1,9 +1,51 @@
1
  {
2
- "bos_token": "[BOS]",
3
- "cls_token": "[CLS]",
4
- "eos_token": "[EOS]",
5
- "mask_token": "[MASK]",
6
- "pad_token": "[UNK]",
7
- "sep_token": "[SEP]",
8
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "[BOS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[EOS]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1c20b7a90d2b5bd165c0532f7f94a9f412f47fe9fca3bb900b0a9cfd95365d1
3
- size 2114555
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c96bac1238975e345e9433da7045e227abfa9551536c55b7e5caa292114893c
3
+ size 2217058
tokenizer_config.json CHANGED
@@ -1,667 +1,78 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "11274": {
5
- "content": "good",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": false
11
- },
12
- "26209": {
13
- "content": "response",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": false
19
- },
20
- "34191": {
21
- "content": "happy",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": false
27
- },
28
- "38986": {
29
- "content": "environment",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": false
35
- },
36
- "50256": {
37
- "content": "",
38
- "lstrip": false,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- },
44
- "50257": {
45
- "content": "",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": true
51
- },
52
- "50258": {
53
- "content": "<bos>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": true
59
- },
60
- "50259": {
61
- "content": "<eos>",
62
- "lstrip": false,
63
- "normalized": false,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": true
67
- },
68
- "50260": {
69
- "content": "<pad>",
70
- "lstrip": false,
71
- "normalized": false,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": true
75
- },
76
- "50261": {
77
- "content": "<unk>",
78
- "lstrip": false,
79
- "normalized": false,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": true
83
- },
84
- "50262": {
85
- "content": "<sep>",
86
- "lstrip": false,
87
- "normalized": false,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": true
91
- },
92
- "50263": {
93
- "content": "<cls>",
94
- "lstrip": false,
95
- "normalized": false,
96
- "rstrip": false,
97
- "single_word": false,
98
- "special": true
99
- },
100
- "50264": {
101
- "content": "<mask>",
102
- "lstrip": false,
103
- "normalized": false,
104
- "rstrip": false,
105
- "single_word": false,
106
- "special": true
107
- },
108
- "50265": {
109
- "content": "<greeting>",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false,
114
- "special": true
115
- },
116
- "50266": {
117
- "content": "<farewell>",
118
- "lstrip": false,
119
- "normalized": false,
120
- "rstrip": false,
121
- "single_word": false,
122
- "special": true
123
- },
124
- "50267": {
125
- "content": "<thank>",
126
- "lstrip": false,
127
- "normalized": false,
128
- "rstrip": false,
129
- "single_word": false,
130
- "special": true
131
- },
132
- "50268": {
133
- "content": "<apology>",
134
- "lstrip": false,
135
- "normalized": false,
136
- "rstrip": false,
137
- "single_word": false,
138
- "special": true
139
- },
140
- "50269": {
141
- "content": "[PAD]",
142
- "lstrip": false,
143
- "normalized": false,
144
- "rstrip": false,
145
- "single_word": false,
146
- "special": true
147
- },
148
- "50270": {
149
- "content": "intelligent",
150
- "lstrip": false,
151
- "normalized": true,
152
- "rstrip": false,
153
- "single_word": false,
154
- "special": false
155
- },
156
- "50271": {
157
- "content": "amazeballs",
158
- "lstrip": false,
159
- "normalized": true,
160
- "rstrip": false,
161
- "single_word": false,
162
- "special": false
163
- },
164
- "50272": {
165
- "content": "cryptocurrency",
166
- "lstrip": false,
167
- "normalized": true,
168
- "rstrip": false,
169
- "single_word": false,
170
- "special": false
171
- },
172
- "50273": {
173
- "content": "webinar",
174
- "lstrip": false,
175
- "normalized": true,
176
- "rstrip": false,
177
- "single_word": false,
178
- "special": false
179
- },
180
- "50274": {
181
- "content": "vlog",
182
- "lstrip": false,
183
- "normalized": true,
184
- "rstrip": false,
185
- "single_word": false,
186
- "special": false
187
- },
188
- "50275": {
189
- "content": "upcycle",
190
- "lstrip": false,
191
- "normalized": true,
192
- "rstrip": false,
193
- "single_word": false,
194
- "special": false
195
- },
196
- "50276": {
197
- "content": "photobomb",
198
- "lstrip": false,
199
- "normalized": true,
200
- "rstrip": false,
201
- "single_word": false,
202
- "special": false
203
- },
204
- "50277": {
205
- "content": "facepalm",
206
- "lstrip": false,
207
- "normalized": true,
208
- "rstrip": false,
209
- "single_word": false,
210
- "special": false
211
- },
212
- "50278": {
213
- "content": "crowdfunding",
214
- "lstrip": false,
215
- "normalized": true,
216
- "rstrip": false,
217
- "single_word": false,
218
- "special": false
219
- },
220
- "50279": {
221
- "content": "bromance",
222
- "lstrip": false,
223
- "normalized": true,
224
- "rstrip": false,
225
- "single_word": false,
226
- "special": false
227
- },
228
- "50280": {
229
- "content": "hangry",
230
- "lstrip": false,
231
- "normalized": true,
232
- "rstrip": false,
233
- "single_word": false,
234
- "special": false
235
- },
236
- "50281": {
237
- "content": "empathy",
238
- "lstrip": false,
239
- "normalized": true,
240
- "rstrip": false,
241
- "single_word": false,
242
- "special": false
243
- },
244
- "50282": {
245
- "content": "active_listening",
246
- "lstrip": false,
247
- "normalized": true,
248
- "rstrip": false,
249
- "single_word": false,
250
- "special": false
251
- },
252
- "50283": {
253
- "content": "open_ended_question",
254
- "lstrip": false,
255
- "normalized": true,
256
- "rstrip": false,
257
- "single_word": false,
258
- "special": false
259
- },
260
- "50284": {
261
- "content": "reflective_summary",
262
- "lstrip": false,
263
- "normalized": true,
264
- "rstrip": false,
265
- "single_word": false,
266
- "special": false
267
- },
268
- "50285": {
269
- "content": "active_listener",
270
- "lstrip": false,
271
- "normalized": true,
272
- "rstrip": false,
273
- "single_word": false,
274
- "special": false
275
- },
276
- "50286": {
277
- "content": "emotional_intelligence",
278
- "lstrip": false,
279
- "normalized": true,
280
- "rstrip": false,
281
- "single_word": false,
282
- "special": false
283
- },
284
- "50287": {
285
- "content": "interpersonal_skill",
286
- "lstrip": false,
287
- "normalized": true,
288
- "rstrip": false,
289
- "single_word": false,
290
- "special": false
291
- },
292
- "50288": {
293
- "content": "non_verbal_cue",
294
- "lstrip": false,
295
- "normalized": true,
296
- "rstrip": false,
297
- "single_word": false,
298
- "special": false
299
- },
300
- "50289": {
301
- "content": "personal_boundaries",
302
- "lstrip": false,
303
- "normalized": true,
304
- "rstrip": false,
305
- "single_word": false,
306
- "special": false
307
- },
308
- "50290": {
309
- "content": "active_listener",
310
- "lstrip": false,
311
- "normalized": true,
312
- "rstrip": false,
313
- "single_word": false,
314
- "special": false
315
- },
316
- "50291": {
317
- "content": "non_judgmental",
318
- "lstrip": false,
319
- "normalized": true,
320
- "rstrip": false,
321
- "single_word": false,
322
- "special": false
323
- },
324
- "50292": {
325
- "content": "cultural_sensitivity",
326
- "lstrip": false,
327
- "normalized": true,
328
- "rstrip": false,
329
- "single_word": false,
330
- "special": false
331
- },
332
- "50293": {
333
- "content": "emotional_regulation",
334
- "lstrip": false,
335
- "normalized": true,
336
- "rstrip": false,
337
- "single_word": false,
338
- "special": false
339
- },
340
- "50294": {
341
- "content": "self_awareness",
342
- "lstrip": false,
343
- "normalized": true,
344
- "rstrip": false,
345
- "single_word": false,
346
- "special": false
347
- },
348
- "50295": {
349
- "content": "social_skill",
350
- "lstrip": false,
351
- "normalized": true,
352
- "rstrip": false,
353
- "single_word": false,
354
- "special": false
355
- },
356
- "50296": {
357
- "content": "communication_style",
358
- "lstrip": false,
359
- "normalized": true,
360
- "rstrip": false,
361
- "single_word": false,
362
- "special": false
363
- },
364
- "50297": {
365
- "content": "conversational_skill",
366
- "lstrip": false,
367
- "normalized": true,
368
- "rstrip": false,
369
- "single_word": false,
370
- "special": false
371
- },
372
- "50298": {
373
- "content": "emotional_literacy",
374
- "lstrip": false,
375
- "normalized": true,
376
- "rstrip": false,
377
- "single_word": false,
378
- "special": false
379
- },
380
- "50299": {
381
- "content": "interpersonal_communication",
382
- "lstrip": false,
383
- "normalized": true,
384
- "rstrip": false,
385
- "single_word": false,
386
- "special": false
387
- },
388
- "50300": {
389
- "content": "resilience",
390
- "lstrip": false,
391
- "normalized": true,
392
- "rstrip": false,
393
- "single_word": false,
394
- "special": false
395
- },
396
- "50301": {
397
- "content": "stress_management",
398
- "lstrip": false,
399
- "normalized": true,
400
- "rstrip": false,
401
- "single_word": false,
402
- "special": false
403
- },
404
- "50302": {
405
- "content": "self_management",
406
- "lstrip": false,
407
- "normalized": true,
408
- "rstrip": false,
409
- "single_word": false,
410
- "special": false
411
- },
412
- "50303": {
413
- "content": "self_efficacy",
414
- "lstrip": false,
415
- "normalized": true,
416
- "rstrip": false,
417
- "single_word": false,
418
- "special": false
419
- },
420
- "50304": {
421
- "content": "self_motivation",
422
- "lstrip": false,
423
- "normalized": true,
424
- "rstrip": false,
425
- "single_word": false,
426
- "special": false
427
- },
428
- "50305": {
429
- "content": "self_control",
430
- "lstrip": false,
431
- "normalized": true,
432
- "rstrip": false,
433
- "single_word": false,
434
- "special": false
435
- },
436
- "50306": {
437
- "content": "self_esteem",
438
- "lstrip": false,
439
- "normalized": true,
440
- "rstrip": false,
441
- "single_word": false,
442
- "special": false
443
- },
444
- "50307": {
445
- "content": "emotional_balance",
446
- "lstrip": false,
447
- "normalized": true,
448
- "rstrip": false,
449
- "single_word": false,
450
- "special": false
451
- },
452
- "50308": {
453
- "content": "stress_reduction",
454
- "lstrip": false,
455
- "normalized": true,
456
- "rstrip": false,
457
- "single_word": false,
458
- "special": false
459
- },
460
- "50309": {
461
- "content": "goal_setting",
462
- "lstrip": false,
463
- "normalized": true,
464
- "rstrip": false,
465
- "single_word": false,
466
- "special": false
467
- },
468
- "50310": {
469
- "content": "time_management",
470
- "lstrip": false,
471
- "normalized": true,
472
- "rstrip": false,
473
- "single_word": false,
474
- "special": false
475
- },
476
- "50311": {
477
- "content": "organization_skill",
478
- "lstrip": false,
479
- "normalized": true,
480
- "rstrip": false,
481
- "single_word": false,
482
- "special": false
483
- },
484
- "50312": {
485
- "content": "self_directed_learning",
486
- "lstrip": false,
487
- "normalized": true,
488
- "rstrip": false,
489
- "single_word": false,
490
- "special": false
491
- },
492
- "50313": {
493
- "content": "self_reflection",
494
- "lstrip": false,
495
- "normalized": true,
496
- "rstrip": false,
497
- "single_word": false,
498
- "special": false
499
- },
500
- "50314": {
501
- "content": "empathy_map",
502
- "lstrip": false,
503
- "normalized": true,
504
- "rstrip": false,
505
- "single_word": false,
506
- "special": false
507
- },
508
- "50315": {
509
- "content": "interpersonal_relationship",
510
- "lstrip": false,
511
- "normalized": true,
512
- "rstrip": false,
513
- "single_word": false,
514
- "special": false
515
- },
516
- "50316": {
517
- "content": "personal_growth",
518
- "lstrip": false,
519
- "normalized": true,
520
- "rstrip": false,
521
- "single_word": false,
522
- "special": false
523
- },
524
- "50317": {
525
- "content": "personal_development",
526
- "lstrip": false,
527
- "normalized": true,
528
- "rstrip": false,
529
- "single_word": false,
530
- "special": false
531
- },
532
- "50318": {
533
- "content": "mindfulness",
534
- "lstrip": false,
535
- "normalized": true,
536
- "rstrip": false,
537
- "single_word": false,
538
- "special": false
539
- },
540
- "50319": {
541
- "content": "personal_well_being",
542
- "lstrip": false,
543
- "normalized": true,
544
- "rstrip": false,
545
- "single_word": false,
546
- "special": false
547
- },
548
- "50320": {
549
- "content": "personal_resilience",
550
- "lstrip": false,
551
- "normalized": true,
552
- "rstrip": false,
553
- "single_word": false,
554
- "special": false
555
- },
556
- "50321": {
557
- "content": "self_care",
558
- "lstrip": false,
559
- "normalized": true,
560
- "rstrip": false,
561
- "single_word": false,
562
- "special": false
563
- },
564
- "50322": {
565
- "content": "self_improvement",
566
- "lstrip": false,
567
- "normalized": true,
568
- "rstrip": false,
569
- "single_word": false,
570
- "special": false
571
- },
572
- "50323": {
573
- "content": "personal_growth_plan",
574
- "lstrip": false,
575
- "normalized": true,
576
- "rstrip": false,
577
- "single_word": false,
578
- "special": false
579
- },
580
- "50324": {
581
- "content": "life_skill",
582
- "lstrip": false,
583
- "normalized": true,
584
- "rstrip": false,
585
- "single_word": false,
586
- "special": false
587
- },
588
- "50325": {
589
- "content": "self_renewal",
590
- "lstrip": false,
591
- "normalized": true,
592
- "rstrip": false,
593
- "single_word": false,
594
- "special": false
595
- },
596
- "50326": {
597
- "content": "self_growth",
598
- "lstrip": false,
599
- "normalized": true,
600
- "rstrip": false,
601
- "single_word": false,
602
- "special": false
603
- }
604
- },
605
- "additional_special_tokens": [
606
- "<greeting>",
607
- "<farewell>",
608
- "<thank>",
609
- "<apology>"
610
- ],
611
- "additional_vocab_files": {
612
- "bpe_merges_file": "merges.txt",
613
- "bpe_vocab_file": "vocab.json"
614
- },
615
- "advanced_options": {
616
- "dynamic_padding": true,
617
- "language_model_integration": {
618
- "enable": true,
619
- "models": [
620
- "bert-base-uncased",
621
- "roberta-base"
622
- ]
623
- },
624
- "lexical_analysis": {
625
- "enable": true,
626
- "methods": [
627
- "morphological",
628
- "syntactic",
629
- "semantic"
630
- ]
631
- },
632
- "subword_regularization": {
633
- "enable": true,
634
- "techniques": [
635
- "sampling",
636
- "dropping"
637
- ]
638
- }
639
- },
640
- "bos_token": "<bos>",
641
- "bpe_dropout": 0.1,
642
- "clean_up_tokenization_spaces": true,
643
- "cls_token": "<cls>",
644
- "do_lower_case": false,
645
- "enable_sentencepiece": true,
646
- "eos_token": "<eos>",
647
- "interleaved_tokenization": true,
648
- "mask_token": "<mask>",
649
- "max_length": 1024,
650
- "model_max_length": 1024,
651
- "pad_to_multiple_of": null,
652
- "pad_token": "<pad>",
653
- "pad_token_type_id": 0,
654
- "padding_side": "right",
655
- "sentence_piece_model_path": "spiece.model",
656
- "sep_token": "<sep>",
657
- "special_tokens_map_file": "special_tokens_map.json",
658
- "stride": 0,
659
- "strip_accents": null,
660
- "tokenizer_class": "GPT2Tokenizer",
661
- "truncation_side": "right",
662
- "truncation_strategy": "longest_first",
663
- "unk_token": "<unk>",
664
- "use_auth_token": true,
665
- "use_fast_tokenizer": true,
666
- "wordpieces_prefix": "##"
667
- }
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[BOS]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "[EOS]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "bos_token": "[BOS]",
61
+ "clean_up_tokenization_spaces": true,
62
+ "cls_token": "[CLS]",
63
+ "eos_token": "[EOS]",
64
+ "mask_token": "[MASK]",
65
+ "max_length": 1024,
66
+ "model_max_length": 1024,
67
+ "pad_to_multiple_of": null,
68
+ "pad_token": "[UNK]",
69
+ "pad_token_type_id": 0,
70
+ "padding": "max_length",
71
+ "padding_side": "right",
72
+ "sep_token": "[SEP]",
73
+ "stride": 0,
74
+ "tokenizer_class": "PreTrainedTokenizerFast",
75
+ "truncation_side": "right",
76
+ "truncation_strategy": "longest_first",
77
+ "unk_token": "[UNK]"
78
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trainer_state.json CHANGED
@@ -1,209 +1,155 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.9995835068721366,
5
- "eval_steps": 500,
6
- "global_step": 300,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.03,
13
- "learning_rate": 3.0000000000000004e-07,
14
- "loss": 11.1022,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.07,
19
- "learning_rate": 8.000000000000001e-07,
20
- "loss": 10.5874,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.1,
25
- "learning_rate": 1.3e-06,
26
- "loss": 9.359,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.13,
31
- "learning_rate": 1.8e-06,
32
- "loss": 7.4948,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.17,
37
- "learning_rate": 2.3e-06,
38
- "loss": 5.7244,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.2,
43
- "learning_rate": 2.8000000000000003e-06,
44
- "loss": 4.1675,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.23,
49
- "learning_rate": 3.3e-06,
50
- "loss": 3.2211,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.27,
55
- "learning_rate": 3.8e-06,
56
- "loss": 2.4147,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.3,
61
- "learning_rate": 4.2999999999999995e-06,
62
- "loss": 1.7997,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.33,
67
- "learning_rate": 4.800000000000001e-06,
68
- "loss": 1.4401,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.37,
73
- "learning_rate": 5.3e-06,
74
- "loss": 1.2247,
75
- "step": 110
76
- },
77
- {
78
- "epoch": 0.4,
79
- "learning_rate": 5.8e-06,
80
- "loss": 1.0594,
81
- "step": 120
82
- },
83
- {
84
- "epoch": 0.43,
85
- "learning_rate": 6.300000000000001e-06,
86
- "loss": 0.9899,
87
- "step": 130
88
- },
89
- {
90
- "epoch": 0.47,
91
- "learning_rate": 6.800000000000001e-06,
92
- "loss": 0.8842,
93
- "step": 140
94
- },
95
- {
96
- "epoch": 0.5,
97
- "learning_rate": 7.2999999999999996e-06,
98
- "loss": 0.8798,
99
- "step": 150
100
- },
101
- {
102
- "epoch": 0.53,
103
- "learning_rate": 7.8e-06,
104
- "loss": 0.8872,
105
- "step": 160
106
- },
107
- {
108
- "epoch": 0.57,
109
- "learning_rate": 8.3e-06,
110
- "loss": 0.8889,
111
- "step": 170
112
- },
113
- {
114
- "epoch": 0.6,
115
- "learning_rate": 8.8e-06,
116
- "loss": 0.9344,
117
- "step": 180
118
- },
119
- {
120
- "epoch": 0.63,
121
- "learning_rate": 9.3e-06,
122
- "loss": 0.9867,
123
- "step": 190
124
- },
125
- {
126
- "epoch": 0.67,
127
- "learning_rate": 9.800000000000001e-06,
128
- "loss": 0.8925,
129
- "step": 200
130
- },
131
- {
132
- "epoch": 0.7,
133
- "learning_rate": 1.03e-05,
134
- "loss": 0.7869,
135
- "step": 210
136
- },
137
- {
138
- "epoch": 0.73,
139
- "learning_rate": 1.08e-05,
140
- "loss": 0.8847,
141
- "step": 220
142
- },
143
- {
144
- "epoch": 0.77,
145
- "learning_rate": 1.13e-05,
146
- "loss": 0.8221,
147
- "step": 230
148
- },
149
- {
150
- "epoch": 0.8,
151
- "learning_rate": 1.18e-05,
152
- "loss": 0.8611,
153
- "step": 240
154
- },
155
- {
156
- "epoch": 0.83,
157
- "learning_rate": 1.23e-05,
158
- "loss": 0.8544,
159
- "step": 250
160
- },
161
- {
162
- "epoch": 0.87,
163
- "learning_rate": 1.2800000000000001e-05,
164
- "loss": 0.8061,
165
- "step": 260
166
- },
167
- {
168
- "epoch": 0.9,
169
- "learning_rate": 1.3300000000000001e-05,
170
- "loss": 0.7984,
171
- "step": 270
172
- },
173
- {
174
- "epoch": 0.93,
175
- "learning_rate": 1.3800000000000002e-05,
176
- "loss": 0.7396,
177
- "step": 280
178
- },
179
- {
180
- "epoch": 0.97,
181
- "learning_rate": 1.43e-05,
182
- "loss": 0.8653,
183
- "step": 290
184
- },
185
- {
186
- "epoch": 1.0,
187
- "learning_rate": 1.48e-05,
188
- "loss": 0.8675,
189
- "step": 300
190
- },
191
- {
192
- "epoch": 1.0,
193
- "eval_loss": 0.8561133146286011,
194
- "eval_runtime": 33.8169,
195
- "eval_samples_per_second": 47.343,
196
- "eval_steps_per_second": 7.895,
197
- "step": 300
198
- }
199
- ],
200
- "logging_steps": 10,
201
- "max_steps": 1800,
202
- "num_input_tokens_seen": 0,
203
- "num_train_epochs": 6,
204
- "save_steps": 500,
205
- "total_flos": 3763911720960000.0,
206
- "train_batch_size": 6,
207
- "trial_name": null,
208
- "trial_params": null
209
- }
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 141,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2127659574468085,
13
+ "grad_norm": 14.269296646118164,
14
+ "learning_rate": 4.858156028368794e-05,
15
+ "loss": 17.1938,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.425531914893617,
20
+ "grad_norm": 6.576030254364014,
21
+ "learning_rate": 4.50354609929078e-05,
22
+ "loss": 8.7349,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.6382978723404256,
27
+ "grad_norm": 9.514806747436523,
28
+ "learning_rate": 4.148936170212766e-05,
29
+ "loss": 7.7678,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.851063829787234,
34
+ "grad_norm": 9.582891464233398,
35
+ "learning_rate": 3.794326241134752e-05,
36
+ "loss": 7.0878,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_loss": 6.37921142578125,
42
+ "eval_runtime": 5.6868,
43
+ "eval_samples_per_second": 7.386,
44
+ "eval_steps_per_second": 1.934,
45
+ "step": 47
46
+ },
47
+ {
48
+ "epoch": 1.0638297872340425,
49
+ "grad_norm": 12.990375518798828,
50
+ "learning_rate": 3.4397163120567377e-05,
51
+ "loss": 6.86,
52
+ "step": 50
53
+ },
54
+ {
55
+ "epoch": 1.2765957446808511,
56
+ "grad_norm": 12.003615379333496,
57
+ "learning_rate": 3.085106382978723e-05,
58
+ "loss": 6.5234,
59
+ "step": 60
60
+ },
61
+ {
62
+ "epoch": 1.4893617021276595,
63
+ "grad_norm": 14.846796989440918,
64
+ "learning_rate": 2.7304964539007094e-05,
65
+ "loss": 6.2994,
66
+ "step": 70
67
+ },
68
+ {
69
+ "epoch": 1.702127659574468,
70
+ "grad_norm": 15.324792861938477,
71
+ "learning_rate": 2.3758865248226954e-05,
72
+ "loss": 6.1691,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 1.9148936170212765,
77
+ "grad_norm": 11.699965476989746,
78
+ "learning_rate": 2.0212765957446807e-05,
79
+ "loss": 6.1812,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 2.0,
84
+ "eval_loss": 5.777428150177002,
85
+ "eval_runtime": 6.2453,
86
+ "eval_samples_per_second": 6.725,
87
+ "eval_steps_per_second": 1.761,
88
+ "step": 94
89
+ },
90
+ {
91
+ "epoch": 2.127659574468085,
92
+ "grad_norm": 13.892985343933105,
93
+ "learning_rate": 1.6666666666666667e-05,
94
+ "loss": 6.0102,
95
+ "step": 100
96
+ },
97
+ {
98
+ "epoch": 2.3404255319148937,
99
+ "grad_norm": 15.94567584991455,
100
+ "learning_rate": 1.3120567375886524e-05,
101
+ "loss": 5.9313,
102
+ "step": 110
103
+ },
104
+ {
105
+ "epoch": 2.5531914893617023,
106
+ "grad_norm": 10.468910217285156,
107
+ "learning_rate": 9.574468085106383e-06,
108
+ "loss": 6.013,
109
+ "step": 120
110
+ },
111
+ {
112
+ "epoch": 2.7659574468085104,
113
+ "grad_norm": 10.299534797668457,
114
+ "learning_rate": 6.028368794326241e-06,
115
+ "loss": 5.9602,
116
+ "step": 130
117
+ },
118
+ {
119
+ "epoch": 2.978723404255319,
120
+ "grad_norm": 12.492215156555176,
121
+ "learning_rate": 2.4822695035460995e-06,
122
+ "loss": 5.8626,
123
+ "step": 140
124
+ },
125
+ {
126
+ "epoch": 3.0,
127
+ "eval_loss": 5.686648845672607,
128
+ "eval_runtime": 6.8751,
129
+ "eval_samples_per_second": 6.109,
130
+ "eval_steps_per_second": 1.6,
131
+ "step": 141
132
+ }
133
+ ],
134
+ "logging_steps": 10,
135
+ "max_steps": 141,
136
+ "num_input_tokens_seen": 0,
137
+ "num_train_epochs": 3,
138
+ "save_steps": 500,
139
+ "stateful_callbacks": {
140
+ "TrainerControl": {
141
+ "args": {
142
+ "should_epoch_stop": false,
143
+ "should_evaluate": false,
144
+ "should_log": false,
145
+ "should_save": true,
146
+ "should_training_stop": true
147
+ },
148
+ "attributes": {}
149
+ }
150
+ },
151
+ "total_flos": 1047574383427584.0,
152
+ "train_batch_size": 4,
153
+ "trial_name": null,
154
+ "trial_params": null
155
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6300bd64a2133832e68dd195890f548ec75ffbdaa6208036f80c4a9b9edd6a
3
+ size 5112