lovodkin93 commited on
Commit
ce4a6f7
·
verified ·
1 Parent(s): 4ca1853

Upload 9 files

Browse files
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/flan-t5-large",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2816,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "early_stopping": true,
14
+ "eos_token_id": 1,
15
+ "feed_forward_proj": "gated-gelu",
16
+ "initializer_factor": 1.0,
17
+ "is_encoder_decoder": true,
18
+ "is_gated_act": true,
19
+ "layer_norm_epsilon": 1e-06,
20
+ "length_penalty": 2.0,
21
+ "max_length": 4,
22
+ "min_length": 100,
23
+ "model_type": "t5",
24
+ "n_positions": 512,
25
+ "no_repeat_ngram_size": 3,
26
+ "num_decoder_layers": 24,
27
+ "num_heads": 16,
28
+ "num_layers": 24,
29
+ "output_past": true,
30
+ "pad_token_id": 0,
31
+ "relative_attention_max_distance": 128,
32
+ "relative_attention_num_buckets": 32,
33
+ "tie_word_embeddings": false,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.33.3",
36
+ "use_cache": true,
37
+ "vocab_size": 32100
38
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1185b6b3382ee0685b966a56ad35dbe66376972d14f588b5080580884cc5db4
3
+ size 3132564293
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23d730ea44512112f685e80528ea985b41bcdbdf666a6443cc77304b5703f68a
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:decbfe5fd0162b6fb6b19bf5944473086405894ca05cd6af72523918c25dd77e
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_1>",
4
+ "<extra_id_2>",
5
+ "<extra_id_3>",
6
+ "<extra_id_4>",
7
+ "<extra_id_5>",
8
+ "<extra_id_6>"
9
+ ],
10
+ "eos_token": "</s>",
11
+ "pad_token": "<pad>",
12
+ "unk_token": "<unk>"
13
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }
trainer_state.json ADDED
@@ -0,0 +1,1384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 83.6764,
3
+ "best_model_checkpoint": "models/one_alignment_vs_summary/flan-t5-large/flan_t5_large_alignment_first/checkpoint-6500",
4
+ "epoch": 0.9742206235011991,
5
+ "eval_steps": 100,
6
+ "global_step": 6500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "learning_rate": 4.992505995203837e-05,
14
+ "loss": 0.5753,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.01,
19
+ "eval_F1": 46.8005,
20
+ "eval_Precision": 65.9011,
21
+ "eval_Recall": 36.284,
22
+ "eval_accuracy": 58.536,
23
+ "eval_accuracy_negative": 80.7879,
24
+ "eval_accuracy_positive": 36.284,
25
+ "eval_gen_len": 3.0,
26
+ "eval_loss": 0.3608146607875824,
27
+ "eval_runtime": 77.8641,
28
+ "eval_samples_per_second": 52.81,
29
+ "eval_steps_per_second": 3.301,
30
+ "step": 100
31
+ },
32
+ {
33
+ "epoch": 0.03,
34
+ "learning_rate": 4.9850119904076745e-05,
35
+ "loss": 0.3529,
36
+ "step": 200
37
+ },
38
+ {
39
+ "epoch": 0.03,
40
+ "eval_F1": 75.56,
41
+ "eval_Precision": 63.7671,
42
+ "eval_Recall": 92.7043,
43
+ "eval_accuracy": 69.9903,
44
+ "eval_accuracy_negative": 47.2763,
45
+ "eval_accuracy_positive": 92.7043,
46
+ "eval_gen_len": 3.0,
47
+ "eval_loss": 0.3245851397514343,
48
+ "eval_runtime": 78.1082,
49
+ "eval_samples_per_second": 52.645,
50
+ "eval_steps_per_second": 3.29,
51
+ "step": 200
52
+ },
53
+ {
54
+ "epoch": 0.04,
55
+ "learning_rate": 4.977517985611511e-05,
56
+ "loss": 0.2974,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.04,
61
+ "eval_F1": 78.279,
62
+ "eval_Precision": 68.5944,
63
+ "eval_Recall": 91.1479,
64
+ "eval_accuracy": 74.392,
65
+ "eval_accuracy_negative": 57.6362,
66
+ "eval_accuracy_positive": 91.1479,
67
+ "eval_gen_len": 3.0,
68
+ "eval_loss": 0.41329512000083923,
69
+ "eval_runtime": 78.0252,
70
+ "eval_samples_per_second": 52.701,
71
+ "eval_steps_per_second": 3.294,
72
+ "step": 300
73
+ },
74
+ {
75
+ "epoch": 0.06,
76
+ "learning_rate": 4.970023980815348e-05,
77
+ "loss": 0.2854,
78
+ "step": 400
79
+ },
80
+ {
81
+ "epoch": 0.06,
82
+ "eval_F1": 79.6539,
83
+ "eval_Precision": 73.2354,
84
+ "eval_Recall": 87.3054,
85
+ "eval_accuracy": 77.3346,
86
+ "eval_accuracy_negative": 67.3638,
87
+ "eval_accuracy_positive": 87.3054,
88
+ "eval_gen_len": 3.0,
89
+ "eval_loss": 0.2850329875946045,
90
+ "eval_runtime": 77.9552,
91
+ "eval_samples_per_second": 52.748,
92
+ "eval_steps_per_second": 3.297,
93
+ "step": 400
94
+ },
95
+ {
96
+ "epoch": 0.07,
97
+ "learning_rate": 4.9625299760191855e-05,
98
+ "loss": 0.2569,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 0.07,
103
+ "eval_F1": 79.9727,
104
+ "eval_Precision": 75.225,
105
+ "eval_Recall": 85.3599,
106
+ "eval_accuracy": 78.2588,
107
+ "eval_accuracy_negative": 71.1576,
108
+ "eval_accuracy_positive": 85.3599,
109
+ "eval_gen_len": 3.0,
110
+ "eval_loss": 0.31440889835357666,
111
+ "eval_runtime": 77.9127,
112
+ "eval_samples_per_second": 52.777,
113
+ "eval_steps_per_second": 3.299,
114
+ "step": 500
115
+ },
116
+ {
117
+ "epoch": 0.09,
118
+ "learning_rate": 4.9550359712230215e-05,
119
+ "loss": 0.273,
120
+ "step": 600
121
+ },
122
+ {
123
+ "epoch": 0.09,
124
+ "eval_F1": 80.6036,
125
+ "eval_Precision": 74.1224,
126
+ "eval_Recall": 88.3268,
127
+ "eval_accuracy": 78.6722,
128
+ "eval_accuracy_negative": 69.0175,
129
+ "eval_accuracy_positive": 88.3268,
130
+ "eval_gen_len": 3.0,
131
+ "eval_loss": 0.2862711548805237,
132
+ "eval_runtime": 77.9247,
133
+ "eval_samples_per_second": 52.769,
134
+ "eval_steps_per_second": 3.298,
135
+ "step": 600
136
+ },
137
+ {
138
+ "epoch": 0.1,
139
+ "learning_rate": 4.947541966426858e-05,
140
+ "loss": 0.2769,
141
+ "step": 700
142
+ },
143
+ {
144
+ "epoch": 0.1,
145
+ "eval_F1": 79.2214,
146
+ "eval_Precision": 79.26,
147
+ "eval_Recall": 79.1829,
148
+ "eval_accuracy": 78.4776,
149
+ "eval_accuracy_negative": 77.7724,
150
+ "eval_accuracy_positive": 79.1829,
151
+ "eval_gen_len": 3.0,
152
+ "eval_loss": 0.26657262444496155,
153
+ "eval_runtime": 77.9599,
154
+ "eval_samples_per_second": 52.745,
155
+ "eval_steps_per_second": 3.297,
156
+ "step": 700
157
+ },
158
+ {
159
+ "epoch": 0.12,
160
+ "learning_rate": 4.940047961630696e-05,
161
+ "loss": 0.288,
162
+ "step": 800
163
+ },
164
+ {
165
+ "epoch": 0.12,
166
+ "eval_F1": 80.125,
167
+ "eval_Precision": 79.1924,
168
+ "eval_Recall": 81.0798,
169
+ "eval_accuracy": 79.7909,
170
+ "eval_accuracy_negative": 78.5019,
171
+ "eval_accuracy_positive": 81.0798,
172
+ "eval_gen_len": 3.0,
173
+ "eval_loss": 0.23273229598999023,
174
+ "eval_runtime": 77.9321,
175
+ "eval_samples_per_second": 52.764,
176
+ "eval_steps_per_second": 3.298,
177
+ "step": 800
178
+ },
179
+ {
180
+ "epoch": 0.13,
181
+ "learning_rate": 4.9325539568345325e-05,
182
+ "loss": 0.2597,
183
+ "step": 900
184
+ },
185
+ {
186
+ "epoch": 0.13,
187
+ "eval_F1": 79.5494,
188
+ "eval_Precision": 80.1184,
189
+ "eval_Recall": 78.9883,
190
+ "eval_accuracy": 78.7451,
191
+ "eval_accuracy_negative": 78.5019,
192
+ "eval_accuracy_positive": 78.9883,
193
+ "eval_gen_len": 3.0,
194
+ "eval_loss": 0.22254109382629395,
195
+ "eval_runtime": 77.9631,
196
+ "eval_samples_per_second": 52.743,
197
+ "eval_steps_per_second": 3.296,
198
+ "step": 900
199
+ },
200
+ {
201
+ "epoch": 0.15,
202
+ "learning_rate": 4.925059952038369e-05,
203
+ "loss": 0.2735,
204
+ "step": 1000
205
+ },
206
+ {
207
+ "epoch": 0.15,
208
+ "eval_F1": 81.095,
209
+ "eval_Precision": 78.1335,
210
+ "eval_Recall": 84.2899,
211
+ "eval_accuracy": 79.9368,
212
+ "eval_accuracy_negative": 75.5837,
213
+ "eval_accuracy_positive": 84.2899,
214
+ "eval_gen_len": 3.0,
215
+ "eval_loss": 0.23284350335597992,
216
+ "eval_runtime": 77.9328,
217
+ "eval_samples_per_second": 52.763,
218
+ "eval_steps_per_second": 3.298,
219
+ "step": 1000
220
+ },
221
+ {
222
+ "epoch": 0.16,
223
+ "learning_rate": 4.917565947242207e-05,
224
+ "loss": 0.2591,
225
+ "step": 1100
226
+ },
227
+ {
228
+ "epoch": 0.16,
229
+ "eval_F1": 81.521,
230
+ "eval_Precision": 77.891,
231
+ "eval_Recall": 85.5058,
232
+ "eval_accuracy": 80.5934,
233
+ "eval_accuracy_negative": 75.6809,
234
+ "eval_accuracy_positive": 85.5058,
235
+ "eval_gen_len": 3.0,
236
+ "eval_loss": 0.26482418179512024,
237
+ "eval_runtime": 78.0339,
238
+ "eval_samples_per_second": 52.695,
239
+ "eval_steps_per_second": 3.293,
240
+ "step": 1100
241
+ },
242
+ {
243
+ "epoch": 0.18,
244
+ "learning_rate": 4.9100719424460435e-05,
245
+ "loss": 0.2675,
246
+ "step": 1200
247
+ },
248
+ {
249
+ "epoch": 0.18,
250
+ "eval_F1": 80.1508,
251
+ "eval_Precision": 82.9002,
252
+ "eval_Recall": 77.5778,
253
+ "eval_accuracy": 80.7879,
254
+ "eval_accuracy_negative": 83.9981,
255
+ "eval_accuracy_positive": 77.5778,
256
+ "eval_gen_len": 3.0,
257
+ "eval_loss": 0.23910629749298096,
258
+ "eval_runtime": 78.0093,
259
+ "eval_samples_per_second": 52.712,
260
+ "eval_steps_per_second": 3.294,
261
+ "step": 1200
262
+ },
263
+ {
264
+ "epoch": 0.19,
265
+ "learning_rate": 4.90257793764988e-05,
266
+ "loss": 0.2549,
267
+ "step": 1300
268
+ },
269
+ {
270
+ "epoch": 0.19,
271
+ "eval_F1": 81.6754,
272
+ "eval_Precision": 79.9627,
273
+ "eval_Recall": 83.463,
274
+ "eval_accuracy": 81.1041,
275
+ "eval_accuracy_negative": 78.7451,
276
+ "eval_accuracy_positive": 83.463,
277
+ "eval_gen_len": 3.0,
278
+ "eval_loss": 0.22716127336025238,
279
+ "eval_runtime": 78.0476,
280
+ "eval_samples_per_second": 52.686,
281
+ "eval_steps_per_second": 3.293,
282
+ "step": 1300
283
+ },
284
+ {
285
+ "epoch": 0.21,
286
+ "learning_rate": 4.895083932853717e-05,
287
+ "loss": 0.2338,
288
+ "step": 1400
289
+ },
290
+ {
291
+ "epoch": 0.21,
292
+ "eval_F1": 80.1995,
293
+ "eval_Precision": 82.2927,
294
+ "eval_Recall": 78.2101,
295
+ "eval_accuracy": 80.4475,
296
+ "eval_accuracy_negative": 82.6848,
297
+ "eval_accuracy_positive": 78.2101,
298
+ "eval_gen_len": 3.0,
299
+ "eval_loss": 0.24518640339374542,
300
+ "eval_runtime": 77.9981,
301
+ "eval_samples_per_second": 52.719,
302
+ "eval_steps_per_second": 3.295,
303
+ "step": 1400
304
+ },
305
+ {
306
+ "epoch": 0.22,
307
+ "learning_rate": 4.8875899280575545e-05,
308
+ "loss": 0.2747,
309
+ "step": 1500
310
+ },
311
+ {
312
+ "epoch": 0.22,
313
+ "eval_F1": 81.9309,
314
+ "eval_Precision": 75.2033,
315
+ "eval_Recall": 89.9805,
316
+ "eval_accuracy": 79.4504,
317
+ "eval_accuracy_negative": 68.9202,
318
+ "eval_accuracy_positive": 89.9805,
319
+ "eval_gen_len": 3.0,
320
+ "eval_loss": 0.2351878583431244,
321
+ "eval_runtime": 77.8581,
322
+ "eval_samples_per_second": 52.814,
323
+ "eval_steps_per_second": 3.301,
324
+ "step": 1500
325
+ },
326
+ {
327
+ "epoch": 0.24,
328
+ "learning_rate": 4.880095923261391e-05,
329
+ "loss": 0.2389,
330
+ "step": 1600
331
+ },
332
+ {
333
+ "epoch": 0.24,
334
+ "eval_F1": 77.4047,
335
+ "eval_Precision": 80.4724,
336
+ "eval_Recall": 74.5623,
337
+ "eval_accuracy": 76.143,
338
+ "eval_accuracy_negative": 77.7237,
339
+ "eval_accuracy_positive": 74.5623,
340
+ "eval_gen_len": 3.0,
341
+ "eval_loss": 0.240137979388237,
342
+ "eval_runtime": 77.9704,
343
+ "eval_samples_per_second": 52.738,
344
+ "eval_steps_per_second": 3.296,
345
+ "step": 1600
346
+ },
347
+ {
348
+ "epoch": 0.25,
349
+ "learning_rate": 4.872601918465228e-05,
350
+ "loss": 0.2486,
351
+ "step": 1700
352
+ },
353
+ {
354
+ "epoch": 0.25,
355
+ "eval_F1": 74.3833,
356
+ "eval_Precision": 84.0171,
357
+ "eval_Recall": 66.7315,
358
+ "eval_accuracy": 73.5165,
359
+ "eval_accuracy_negative": 80.3016,
360
+ "eval_accuracy_positive": 66.7315,
361
+ "eval_gen_len": 3.0,
362
+ "eval_loss": 0.23113004863262177,
363
+ "eval_runtime": 77.9471,
364
+ "eval_samples_per_second": 52.754,
365
+ "eval_steps_per_second": 3.297,
366
+ "step": 1700
367
+ },
368
+ {
369
+ "epoch": 0.27,
370
+ "learning_rate": 4.865107913669065e-05,
371
+ "loss": 0.2558,
372
+ "step": 1800
373
+ },
374
+ {
375
+ "epoch": 0.27,
376
+ "eval_F1": 75.5311,
377
+ "eval_Precision": 81.9579,
378
+ "eval_Recall": 70.0389,
379
+ "eval_accuracy": 71.644,
380
+ "eval_accuracy_negative": 73.249,
381
+ "eval_accuracy_positive": 70.0389,
382
+ "eval_gen_len": 3.0,
383
+ "eval_loss": 0.2416938990354538,
384
+ "eval_runtime": 77.8981,
385
+ "eval_samples_per_second": 52.787,
386
+ "eval_steps_per_second": 3.299,
387
+ "step": 1800
388
+ },
389
+ {
390
+ "epoch": 0.28,
391
+ "learning_rate": 4.8576139088729016e-05,
392
+ "loss": 0.2521,
393
+ "step": 1900
394
+ },
395
+ {
396
+ "epoch": 0.28,
397
+ "eval_F1": 75.8988,
398
+ "eval_Precision": 83.8729,
399
+ "eval_Recall": 69.3093,
400
+ "eval_accuracy": 72.0574,
401
+ "eval_accuracy_negative": 74.8054,
402
+ "eval_accuracy_positive": 69.3093,
403
+ "eval_gen_len": 3.0,
404
+ "eval_loss": 0.2378227710723877,
405
+ "eval_runtime": 78.1302,
406
+ "eval_samples_per_second": 52.63,
407
+ "eval_steps_per_second": 3.289,
408
+ "step": 1900
409
+ },
410
+ {
411
+ "epoch": 0.3,
412
+ "learning_rate": 4.8501199040767384e-05,
413
+ "loss": 0.2487,
414
+ "step": 2000
415
+ },
416
+ {
417
+ "epoch": 0.3,
418
+ "eval_F1": 82.5998,
419
+ "eval_Precision": 77.7587,
420
+ "eval_Recall": 88.0837,
421
+ "eval_accuracy": 72.5438,
422
+ "eval_accuracy_negative": 57.0039,
423
+ "eval_accuracy_positive": 88.0837,
424
+ "eval_gen_len": 3.0,
425
+ "eval_loss": 0.31695932149887085,
426
+ "eval_runtime": 78.0522,
427
+ "eval_samples_per_second": 52.683,
428
+ "eval_steps_per_second": 3.293,
429
+ "step": 2000
430
+ },
431
+ {
432
+ "epoch": 0.31,
433
+ "learning_rate": 4.842625899280576e-05,
434
+ "loss": 0.246,
435
+ "step": 2100
436
+ },
437
+ {
438
+ "epoch": 0.31,
439
+ "eval_F1": 81.1168,
440
+ "eval_Precision": 81.6971,
441
+ "eval_Recall": 80.5447,
442
+ "eval_accuracy": 66.3911,
443
+ "eval_accuracy_negative": 52.2374,
444
+ "eval_accuracy_positive": 80.5447,
445
+ "eval_gen_len": 3.0,
446
+ "eval_loss": 0.25484344363212585,
447
+ "eval_runtime": 78.0548,
448
+ "eval_samples_per_second": 52.681,
449
+ "eval_steps_per_second": 3.293,
450
+ "step": 2100
451
+ },
452
+ {
453
+ "epoch": 0.33,
454
+ "learning_rate": 4.8351318944844126e-05,
455
+ "loss": 0.2407,
456
+ "step": 2200
457
+ },
458
+ {
459
+ "epoch": 0.33,
460
+ "eval_F1": 74.0905,
461
+ "eval_Precision": 86.343,
462
+ "eval_Recall": 64.8833,
463
+ "eval_accuracy": 71.7656,
464
+ "eval_accuracy_negative": 78.6479,
465
+ "eval_accuracy_positive": 64.8833,
466
+ "eval_gen_len": 3.0,
467
+ "eval_loss": 0.26473525166511536,
468
+ "eval_runtime": 77.9999,
469
+ "eval_samples_per_second": 52.718,
470
+ "eval_steps_per_second": 3.295,
471
+ "step": 2200
472
+ },
473
+ {
474
+ "epoch": 0.34,
475
+ "learning_rate": 4.8276378896882494e-05,
476
+ "loss": 0.2293,
477
+ "step": 2300
478
+ },
479
+ {
480
+ "epoch": 0.34,
481
+ "eval_F1": 76.173,
482
+ "eval_Precision": 82.6038,
483
+ "eval_Recall": 70.6712,
484
+ "eval_accuracy": 72.7383,
485
+ "eval_accuracy_negative": 74.8054,
486
+ "eval_accuracy_positive": 70.6712,
487
+ "eval_gen_len": 3.0,
488
+ "eval_loss": 0.23481938242912292,
489
+ "eval_runtime": 78.0051,
490
+ "eval_samples_per_second": 52.714,
491
+ "eval_steps_per_second": 3.295,
492
+ "step": 2300
493
+ },
494
+ {
495
+ "epoch": 0.36,
496
+ "learning_rate": 4.820143884892087e-05,
497
+ "loss": 0.199,
498
+ "step": 2400
499
+ },
500
+ {
501
+ "epoch": 0.36,
502
+ "eval_F1": 81.2854,
503
+ "eval_Precision": 79.0441,
504
+ "eval_Recall": 83.6576,
505
+ "eval_accuracy": 77.8453,
506
+ "eval_accuracy_negative": 72.0331,
507
+ "eval_accuracy_positive": 83.6576,
508
+ "eval_gen_len": 3.0,
509
+ "eval_loss": 0.29514577984809875,
510
+ "eval_runtime": 77.9919,
511
+ "eval_samples_per_second": 52.723,
512
+ "eval_steps_per_second": 3.295,
513
+ "step": 2400
514
+ },
515
+ {
516
+ "epoch": 0.37,
517
+ "learning_rate": 4.8126498800959236e-05,
518
+ "loss": 0.247,
519
+ "step": 2500
520
+ },
521
+ {
522
+ "epoch": 0.37,
523
+ "eval_F1": 67.5548,
524
+ "eval_Precision": 86.3086,
525
+ "eval_Recall": 55.4961,
526
+ "eval_accuracy": 70.3551,
527
+ "eval_accuracy_negative": 85.214,
528
+ "eval_accuracy_positive": 55.4961,
529
+ "eval_gen_len": 3.0,
530
+ "eval_loss": 0.23327696323394775,
531
+ "eval_runtime": 78.0614,
532
+ "eval_samples_per_second": 52.677,
533
+ "eval_steps_per_second": 3.292,
534
+ "step": 2500
535
+ },
536
+ {
537
+ "epoch": 0.39,
538
+ "learning_rate": 4.8051558752997604e-05,
539
+ "loss": 0.2659,
540
+ "step": 2600
541
+ },
542
+ {
543
+ "epoch": 0.39,
544
+ "eval_F1": 77.5335,
545
+ "eval_Precision": 80.6944,
546
+ "eval_Recall": 74.6109,
547
+ "eval_accuracy": 68.6284,
548
+ "eval_accuracy_negative": 62.6459,
549
+ "eval_accuracy_positive": 74.6109,
550
+ "eval_gen_len": 3.0,
551
+ "eval_loss": 0.26498734951019287,
552
+ "eval_runtime": 77.9945,
553
+ "eval_samples_per_second": 52.722,
554
+ "eval_steps_per_second": 3.295,
555
+ "step": 2600
556
+ },
557
+ {
558
+ "epoch": 0.4,
559
+ "learning_rate": 4.797661870503598e-05,
560
+ "loss": 0.2445,
561
+ "step": 2700
562
+ },
563
+ {
564
+ "epoch": 0.4,
565
+ "eval_F1": 81.6411,
566
+ "eval_Precision": 77.1998,
567
+ "eval_Recall": 86.6245,
568
+ "eval_accuracy": 78.0156,
569
+ "eval_accuracy_negative": 69.4066,
570
+ "eval_accuracy_positive": 86.6245,
571
+ "eval_gen_len": 3.0,
572
+ "eval_loss": 0.25022661685943604,
573
+ "eval_runtime": 78.0413,
574
+ "eval_samples_per_second": 52.69,
575
+ "eval_steps_per_second": 3.293,
576
+ "step": 2700
577
+ },
578
+ {
579
+ "epoch": 0.42,
580
+ "learning_rate": 4.7901678657074346e-05,
581
+ "loss": 0.2397,
582
+ "step": 2800
583
+ },
584
+ {
585
+ "epoch": 0.42,
586
+ "eval_F1": 81.1614,
587
+ "eval_Precision": 77.679,
588
+ "eval_Recall": 84.9708,
589
+ "eval_accuracy": 75.9241,
590
+ "eval_accuracy_negative": 66.8774,
591
+ "eval_accuracy_positive": 84.9708,
592
+ "eval_gen_len": 3.0,
593
+ "eval_loss": 0.2765290439128876,
594
+ "eval_runtime": 77.9454,
595
+ "eval_samples_per_second": 52.755,
596
+ "eval_steps_per_second": 3.297,
597
+ "step": 2800
598
+ },
599
+ {
600
+ "epoch": 0.43,
601
+ "learning_rate": 4.7826738609112713e-05,
602
+ "loss": 0.2134,
603
+ "step": 2900
604
+ },
605
+ {
606
+ "epoch": 0.43,
607
+ "eval_F1": 81.6516,
608
+ "eval_Precision": 78.0488,
609
+ "eval_Recall": 85.6031,
610
+ "eval_accuracy": 77.9426,
611
+ "eval_accuracy_negative": 70.2821,
612
+ "eval_accuracy_positive": 85.6031,
613
+ "eval_gen_len": 3.0,
614
+ "eval_loss": 0.2863008975982666,
615
+ "eval_runtime": 78.0388,
616
+ "eval_samples_per_second": 52.692,
617
+ "eval_steps_per_second": 3.293,
618
+ "step": 2900
619
+ },
620
+ {
621
+ "epoch": 0.45,
622
+ "learning_rate": 4.775179856115108e-05,
623
+ "loss": 0.2349,
624
+ "step": 3000
625
+ },
626
+ {
627
+ "epoch": 0.45,
628
+ "eval_F1": 81.0536,
629
+ "eval_Precision": 80.5476,
630
+ "eval_Recall": 81.5661,
631
+ "eval_accuracy": 77.8696,
632
+ "eval_accuracy_negative": 74.1732,
633
+ "eval_accuracy_positive": 81.5661,
634
+ "eval_gen_len": 3.0,
635
+ "eval_loss": 0.23327529430389404,
636
+ "eval_runtime": 77.998,
637
+ "eval_samples_per_second": 52.719,
638
+ "eval_steps_per_second": 3.295,
639
+ "step": 3000
640
+ },
641
+ {
642
+ "epoch": 0.46,
643
+ "learning_rate": 4.767685851318945e-05,
644
+ "loss": 0.2031,
645
+ "step": 3100
646
+ },
647
+ {
648
+ "epoch": 0.46,
649
+ "eval_F1": 80.2638,
650
+ "eval_Precision": 83.8812,
651
+ "eval_Recall": 76.9455,
652
+ "eval_accuracy": 78.429,
653
+ "eval_accuracy_negative": 79.9125,
654
+ "eval_accuracy_positive": 76.9455,
655
+ "eval_gen_len": 3.0,
656
+ "eval_loss": 0.2780900001525879,
657
+ "eval_runtime": 78.0382,
658
+ "eval_samples_per_second": 52.692,
659
+ "eval_steps_per_second": 3.293,
660
+ "step": 3100
661
+ },
662
+ {
663
+ "epoch": 0.48,
664
+ "learning_rate": 4.7601918465227817e-05,
665
+ "loss": 0.2704,
666
+ "step": 3200
667
+ },
668
+ {
669
+ "epoch": 0.48,
670
+ "eval_F1": 80.8479,
671
+ "eval_Precision": 82.958,
672
+ "eval_Recall": 78.8424,
673
+ "eval_accuracy": 79.8881,
674
+ "eval_accuracy_negative": 80.9339,
675
+ "eval_accuracy_positive": 78.8424,
676
+ "eval_gen_len": 3.0,
677
+ "eval_loss": 0.22213193774223328,
678
+ "eval_runtime": 77.9705,
679
+ "eval_samples_per_second": 52.738,
680
+ "eval_steps_per_second": 3.296,
681
+ "step": 3200
682
+ },
683
+ {
684
+ "epoch": 0.49,
685
+ "learning_rate": 4.752697841726619e-05,
686
+ "loss": 0.2413,
687
+ "step": 3300
688
+ },
689
+ {
690
+ "epoch": 0.49,
691
+ "eval_F1": 82.2274,
692
+ "eval_Precision": 78.6158,
693
+ "eval_Recall": 86.1868,
694
+ "eval_accuracy": 79.3045,
695
+ "eval_accuracy_negative": 72.4222,
696
+ "eval_accuracy_positive": 86.1868,
697
+ "eval_gen_len": 3.0,
698
+ "eval_loss": 0.21565163135528564,
699
+ "eval_runtime": 77.9735,
700
+ "eval_samples_per_second": 52.736,
701
+ "eval_steps_per_second": 3.296,
702
+ "step": 3300
703
+ },
704
+ {
705
+ "epoch": 0.51,
706
+ "learning_rate": 4.745203836930456e-05,
707
+ "loss": 0.2549,
708
+ "step": 3400
709
+ },
710
+ {
711
+ "epoch": 0.51,
712
+ "eval_F1": 80.991,
713
+ "eval_Precision": 70.8045,
714
+ "eval_Recall": 94.6012,
715
+ "eval_accuracy": 75.5107,
716
+ "eval_accuracy_negative": 56.4202,
717
+ "eval_accuracy_positive": 94.6012,
718
+ "eval_gen_len": 3.0,
719
+ "eval_loss": 0.3653569221496582,
720
+ "eval_runtime": 77.9428,
721
+ "eval_samples_per_second": 52.757,
722
+ "eval_steps_per_second": 3.297,
723
+ "step": 3400
724
+ },
725
+ {
726
+ "epoch": 0.52,
727
+ "learning_rate": 4.7377098321342926e-05,
728
+ "loss": 0.2327,
729
+ "step": 3500
730
+ },
731
+ {
732
+ "epoch": 0.52,
733
+ "eval_F1": 80.6348,
734
+ "eval_Precision": 82.2458,
735
+ "eval_Recall": 79.0856,
736
+ "eval_accuracy": 78.7938,
737
+ "eval_accuracy_negative": 78.5019,
738
+ "eval_accuracy_positive": 79.0856,
739
+ "eval_gen_len": 3.0,
740
+ "eval_loss": 0.22318707406520844,
741
+ "eval_runtime": 77.9908,
742
+ "eval_samples_per_second": 52.724,
743
+ "eval_steps_per_second": 3.295,
744
+ "step": 3500
745
+ },
746
+ {
747
+ "epoch": 0.54,
748
+ "learning_rate": 4.7302158273381294e-05,
749
+ "loss": 0.234,
750
+ "step": 3600
751
+ },
752
+ {
753
+ "epoch": 0.54,
754
+ "eval_F1": 82.8591,
755
+ "eval_Precision": 76.8176,
756
+ "eval_Recall": 89.9319,
757
+ "eval_accuracy": 77.5535,
758
+ "eval_accuracy_negative": 65.1751,
759
+ "eval_accuracy_positive": 89.9319,
760
+ "eval_gen_len": 3.0,
761
+ "eval_loss": 0.28604593873023987,
762
+ "eval_runtime": 77.9808,
763
+ "eval_samples_per_second": 52.731,
764
+ "eval_steps_per_second": 3.296,
765
+ "step": 3600
766
+ },
767
+ {
768
+ "epoch": 0.55,
769
+ "learning_rate": 4.722721822541967e-05,
770
+ "loss": 0.258,
771
+ "step": 3700
772
+ },
773
+ {
774
+ "epoch": 0.55,
775
+ "eval_F1": 74.4311,
776
+ "eval_Precision": 82.7874,
777
+ "eval_Recall": 67.607,
778
+ "eval_accuracy": 67.9718,
779
+ "eval_accuracy_negative": 68.3366,
780
+ "eval_accuracy_positive": 67.607,
781
+ "eval_gen_len": 3.0,
782
+ "eval_loss": 0.2335677146911621,
783
+ "eval_runtime": 78.0681,
784
+ "eval_samples_per_second": 52.672,
785
+ "eval_steps_per_second": 3.292,
786
+ "step": 3700
787
+ },
788
+ {
789
+ "epoch": 0.57,
790
+ "learning_rate": 4.7152278177458036e-05,
791
+ "loss": 0.2367,
792
+ "step": 3800
793
+ },
794
+ {
795
+ "epoch": 0.57,
796
+ "eval_F1": 69.2561,
797
+ "eval_Precision": 89.7754,
798
+ "eval_Recall": 56.3716,
799
+ "eval_accuracy": 67.4368,
800
+ "eval_accuracy_negative": 78.5019,
801
+ "eval_accuracy_positive": 56.3716,
802
+ "eval_gen_len": 3.0,
803
+ "eval_loss": 0.2100234031677246,
804
+ "eval_runtime": 77.9683,
805
+ "eval_samples_per_second": 52.739,
806
+ "eval_steps_per_second": 3.296,
807
+ "step": 3800
808
+ },
809
+ {
810
+ "epoch": 0.58,
811
+ "learning_rate": 4.7077338129496404e-05,
812
+ "loss": 0.2157,
813
+ "step": 3900
814
+ },
815
+ {
816
+ "epoch": 0.58,
817
+ "eval_F1": 79.2374,
818
+ "eval_Precision": 85.5612,
819
+ "eval_Recall": 73.784,
820
+ "eval_accuracy": 76.8482,
821
+ "eval_accuracy_negative": 79.9125,
822
+ "eval_accuracy_positive": 73.784,
823
+ "eval_gen_len": 3.0,
824
+ "eval_loss": 0.23944681882858276,
825
+ "eval_runtime": 77.9314,
826
+ "eval_samples_per_second": 52.764,
827
+ "eval_steps_per_second": 3.298,
828
+ "step": 3900
829
+ },
830
+ {
831
+ "epoch": 0.6,
832
+ "learning_rate": 4.700239808153478e-05,
833
+ "loss": 0.2431,
834
+ "step": 4000
835
+ },
836
+ {
837
+ "epoch": 0.6,
838
+ "eval_F1": 72.7115,
839
+ "eval_Precision": 89.4812,
840
+ "eval_Recall": 61.2354,
841
+ "eval_accuracy": 74.0272,
842
+ "eval_accuracy_negative": 86.8191,
843
+ "eval_accuracy_positive": 61.2354,
844
+ "eval_gen_len": 3.0,
845
+ "eval_loss": 0.2376098483800888,
846
+ "eval_runtime": 78.0857,
847
+ "eval_samples_per_second": 52.66,
848
+ "eval_steps_per_second": 3.291,
849
+ "step": 4000
850
+ },
851
+ {
852
+ "epoch": 0.61,
853
+ "learning_rate": 4.6927458033573146e-05,
854
+ "loss": 0.2442,
855
+ "step": 4100
856
+ },
857
+ {
858
+ "epoch": 0.61,
859
+ "eval_F1": 82.3446,
860
+ "eval_Precision": 79.7901,
861
+ "eval_Recall": 85.0681,
862
+ "eval_accuracy": 78.4047,
863
+ "eval_accuracy_negative": 71.7412,
864
+ "eval_accuracy_positive": 85.0681,
865
+ "eval_gen_len": 3.0,
866
+ "eval_loss": 0.2564048767089844,
867
+ "eval_runtime": 78.0736,
868
+ "eval_samples_per_second": 52.668,
869
+ "eval_steps_per_second": 3.292,
870
+ "step": 4100
871
+ },
872
+ {
873
+ "epoch": 0.63,
874
+ "learning_rate": 4.685251798561151e-05,
875
+ "loss": 0.2125,
876
+ "step": 4200
877
+ },
878
+ {
879
+ "epoch": 0.63,
880
+ "eval_F1": 81.7357,
881
+ "eval_Precision": 82.4024,
882
+ "eval_Recall": 81.0798,
883
+ "eval_accuracy": 77.4562,
884
+ "eval_accuracy_negative": 73.8327,
885
+ "eval_accuracy_positive": 81.0798,
886
+ "eval_gen_len": 3.0,
887
+ "eval_loss": 0.26422980427742004,
888
+ "eval_runtime": 77.9729,
889
+ "eval_samples_per_second": 52.736,
890
+ "eval_steps_per_second": 3.296,
891
+ "step": 4200
892
+ },
893
+ {
894
+ "epoch": 0.64,
895
+ "learning_rate": 4.677757793764988e-05,
896
+ "loss": 0.2213,
897
+ "step": 4300
898
+ },
899
+ {
900
+ "epoch": 0.64,
901
+ "eval_F1": 61.6757,
902
+ "eval_Precision": 89.3813,
903
+ "eval_Recall": 47.0817,
904
+ "eval_accuracy": 66.5856,
905
+ "eval_accuracy_negative": 86.0895,
906
+ "eval_accuracy_positive": 47.0817,
907
+ "eval_gen_len": 3.0,
908
+ "eval_loss": 0.2372213900089264,
909
+ "eval_runtime": 78.0051,
910
+ "eval_samples_per_second": 52.714,
911
+ "eval_steps_per_second": 3.295,
912
+ "step": 4300
913
+ },
914
+ {
915
+ "epoch": 0.66,
916
+ "learning_rate": 4.670263788968825e-05,
917
+ "loss": 0.2277,
918
+ "step": 4400
919
+ },
920
+ {
921
+ "epoch": 0.66,
922
+ "eval_F1": 65.461,
923
+ "eval_Precision": 83.8269,
924
+ "eval_Recall": 53.6965,
925
+ "eval_accuracy": 63.2539,
926
+ "eval_accuracy_negative": 72.8113,
927
+ "eval_accuracy_positive": 53.6965,
928
+ "eval_gen_len": 3.0,
929
+ "eval_loss": 0.21777845919132233,
930
+ "eval_runtime": 78.04,
931
+ "eval_samples_per_second": 52.691,
932
+ "eval_steps_per_second": 3.293,
933
+ "step": 4400
934
+ },
935
+ {
936
+ "epoch": 0.67,
937
+ "learning_rate": 4.662769784172662e-05,
938
+ "loss": 0.1893,
939
+ "step": 4500
940
+ },
941
+ {
942
+ "epoch": 0.67,
943
+ "eval_F1": 61.2192,
944
+ "eval_Precision": 91.8288,
945
+ "eval_Recall": 45.9144,
946
+ "eval_accuracy": 62.1839,
947
+ "eval_accuracy_negative": 78.4533,
948
+ "eval_accuracy_positive": 45.9144,
949
+ "eval_gen_len": 3.0,
950
+ "eval_loss": 0.25438836216926575,
951
+ "eval_runtime": 77.9949,
952
+ "eval_samples_per_second": 52.721,
953
+ "eval_steps_per_second": 3.295,
954
+ "step": 4500
955
+ },
956
+ {
957
+ "epoch": 0.69,
958
+ "learning_rate": 4.655275779376499e-05,
959
+ "loss": 0.2577,
960
+ "step": 4600
961
+ },
962
+ {
963
+ "epoch": 0.69,
964
+ "eval_F1": 48.5335,
965
+ "eval_Precision": 86.0149,
966
+ "eval_Recall": 33.8035,
967
+ "eval_accuracy": 56.9309,
968
+ "eval_accuracy_negative": 80.0584,
969
+ "eval_accuracy_positive": 33.8035,
970
+ "eval_gen_len": 3.0,
971
+ "eval_loss": 0.20975889265537262,
972
+ "eval_runtime": 77.9509,
973
+ "eval_samples_per_second": 52.751,
974
+ "eval_steps_per_second": 3.297,
975
+ "step": 4600
976
+ },
977
+ {
978
+ "epoch": 0.7,
979
+ "learning_rate": 4.647781774580336e-05,
980
+ "loss": 0.2255,
981
+ "step": 4700
982
+ },
983
+ {
984
+ "epoch": 0.7,
985
+ "eval_F1": 48.856,
986
+ "eval_Precision": 88.4076,
987
+ "eval_Recall": 33.7549,
988
+ "eval_accuracy": 59.3872,
989
+ "eval_accuracy_negative": 85.0195,
990
+ "eval_accuracy_positive": 33.7549,
991
+ "eval_gen_len": 3.0,
992
+ "eval_loss": 0.2087894082069397,
993
+ "eval_runtime": 78.0665,
994
+ "eval_samples_per_second": 52.673,
995
+ "eval_steps_per_second": 3.292,
996
+ "step": 4700
997
+ },
998
+ {
999
+ "epoch": 0.72,
1000
+ "learning_rate": 4.640287769784173e-05,
1001
+ "loss": 0.2052,
1002
+ "step": 4800
1003
+ },
1004
+ {
1005
+ "epoch": 0.72,
1006
+ "eval_F1": 62.0057,
1007
+ "eval_Precision": 88.6878,
1008
+ "eval_Recall": 47.6654,
1009
+ "eval_accuracy": 64.6887,
1010
+ "eval_accuracy_negative": 81.7121,
1011
+ "eval_accuracy_positive": 47.6654,
1012
+ "eval_gen_len": 3.0,
1013
+ "eval_loss": 0.2096925675868988,
1014
+ "eval_runtime": 78.1278,
1015
+ "eval_samples_per_second": 52.632,
1016
+ "eval_steps_per_second": 3.289,
1017
+ "step": 4800
1018
+ },
1019
+ {
1020
+ "epoch": 0.73,
1021
+ "learning_rate": 4.63279376498801e-05,
1022
+ "loss": 0.1803,
1023
+ "step": 4900
1024
+ },
1025
+ {
1026
+ "epoch": 0.73,
1027
+ "eval_F1": 76.7251,
1028
+ "eval_Precision": 87.782,
1029
+ "eval_Recall": 68.142,
1030
+ "eval_accuracy": 75.8025,
1031
+ "eval_accuracy_negative": 83.463,
1032
+ "eval_accuracy_positive": 68.142,
1033
+ "eval_gen_len": 3.0,
1034
+ "eval_loss": 0.23566679656505585,
1035
+ "eval_runtime": 78.1099,
1036
+ "eval_samples_per_second": 52.644,
1037
+ "eval_steps_per_second": 3.29,
1038
+ "step": 4900
1039
+ },
1040
+ {
1041
+ "epoch": 0.75,
1042
+ "learning_rate": 4.625299760191847e-05,
1043
+ "loss": 0.2405,
1044
+ "step": 5000
1045
+ },
1046
+ {
1047
+ "epoch": 0.75,
1048
+ "eval_F1": 79.0246,
1049
+ "eval_Precision": 85.7224,
1050
+ "eval_Recall": 73.2977,
1051
+ "eval_accuracy": 75.8998,
1052
+ "eval_accuracy_negative": 78.5019,
1053
+ "eval_accuracy_positive": 73.2977,
1054
+ "eval_gen_len": 3.0,
1055
+ "eval_loss": 0.22571609914302826,
1056
+ "eval_runtime": 77.9758,
1057
+ "eval_samples_per_second": 52.734,
1058
+ "eval_steps_per_second": 3.296,
1059
+ "step": 5000
1060
+ },
1061
+ {
1062
+ "epoch": 0.76,
1063
+ "learning_rate": 4.617805755395684e-05,
1064
+ "loss": 0.195,
1065
+ "step": 5100
1066
+ },
1067
+ {
1068
+ "epoch": 0.76,
1069
+ "eval_F1": 38.7195,
1070
+ "eval_Precision": 89.4366,
1071
+ "eval_Recall": 24.7082,
1072
+ "eval_accuracy": 55.0097,
1073
+ "eval_accuracy_negative": 85.3113,
1074
+ "eval_accuracy_positive": 24.7082,
1075
+ "eval_gen_len": 3.0,
1076
+ "eval_loss": 0.25449925661087036,
1077
+ "eval_runtime": 78.1451,
1078
+ "eval_samples_per_second": 52.62,
1079
+ "eval_steps_per_second": 3.289,
1080
+ "step": 5100
1081
+ },
1082
+ {
1083
+ "epoch": 0.78,
1084
+ "learning_rate": 4.610311750599521e-05,
1085
+ "loss": 0.2136,
1086
+ "step": 5200
1087
+ },
1088
+ {
1089
+ "epoch": 0.78,
1090
+ "eval_F1": 72.9004,
1091
+ "eval_Precision": 89.638,
1092
+ "eval_Recall": 61.43,
1093
+ "eval_accuracy": 70.1605,
1094
+ "eval_accuracy_negative": 78.8911,
1095
+ "eval_accuracy_positive": 61.43,
1096
+ "eval_gen_len": 3.0,
1097
+ "eval_loss": 0.23430722951889038,
1098
+ "eval_runtime": 78.0577,
1099
+ "eval_samples_per_second": 52.679,
1100
+ "eval_steps_per_second": 3.292,
1101
+ "step": 5200
1102
+ },
1103
+ {
1104
+ "epoch": 0.79,
1105
+ "learning_rate": 4.602817745803358e-05,
1106
+ "loss": 0.205,
1107
+ "step": 5300
1108
+ },
1109
+ {
1110
+ "epoch": 0.79,
1111
+ "eval_F1": 61.5385,
1112
+ "eval_Precision": 88.8073,
1113
+ "eval_Recall": 47.0817,
1114
+ "eval_accuracy": 63.6916,
1115
+ "eval_accuracy_negative": 80.3016,
1116
+ "eval_accuracy_positive": 47.0817,
1117
+ "eval_gen_len": 3.0,
1118
+ "eval_loss": 0.20516321063041687,
1119
+ "eval_runtime": 78.06,
1120
+ "eval_samples_per_second": 52.677,
1121
+ "eval_steps_per_second": 3.292,
1122
+ "step": 5300
1123
+ },
1124
+ {
1125
+ "epoch": 0.81,
1126
+ "learning_rate": 4.595323741007194e-05,
1127
+ "loss": 0.227,
1128
+ "step": 5400
1129
+ },
1130
+ {
1131
+ "epoch": 0.81,
1132
+ "eval_F1": 71.5575,
1133
+ "eval_Precision": 89.1226,
1134
+ "eval_Recall": 59.7763,
1135
+ "eval_accuracy": 70.1119,
1136
+ "eval_accuracy_negative": 80.4475,
1137
+ "eval_accuracy_positive": 59.7763,
1138
+ "eval_gen_len": 3.0,
1139
+ "eval_loss": 0.20273716747760773,
1140
+ "eval_runtime": 78.0817,
1141
+ "eval_samples_per_second": 52.663,
1142
+ "eval_steps_per_second": 3.291,
1143
+ "step": 5400
1144
+ },
1145
+ {
1146
+ "epoch": 0.82,
1147
+ "learning_rate": 4.5878297362110315e-05,
1148
+ "loss": 0.221,
1149
+ "step": 5500
1150
+ },
1151
+ {
1152
+ "epoch": 0.82,
1153
+ "eval_F1": 70.1068,
1154
+ "eval_Precision": 89.8176,
1155
+ "eval_Recall": 57.4903,
1156
+ "eval_accuracy": 68.3123,
1157
+ "eval_accuracy_negative": 79.1342,
1158
+ "eval_accuracy_positive": 57.4903,
1159
+ "eval_gen_len": 3.0,
1160
+ "eval_loss": 0.20614123344421387,
1161
+ "eval_runtime": 78.0595,
1162
+ "eval_samples_per_second": 52.678,
1163
+ "eval_steps_per_second": 3.292,
1164
+ "step": 5500
1165
+ },
1166
+ {
1167
+ "epoch": 0.84,
1168
+ "learning_rate": 4.580335731414868e-05,
1169
+ "loss": 0.2148,
1170
+ "step": 5600
1171
+ },
1172
+ {
1173
+ "epoch": 0.84,
1174
+ "eval_F1": 78.0293,
1175
+ "eval_Precision": 86.2272,
1176
+ "eval_Recall": 71.2549,
1177
+ "eval_accuracy": 71.6683,
1178
+ "eval_accuracy_negative": 72.0817,
1179
+ "eval_accuracy_positive": 71.2549,
1180
+ "eval_gen_len": 3.0,
1181
+ "eval_loss": 0.23381440341472626,
1182
+ "eval_runtime": 78.0578,
1183
+ "eval_samples_per_second": 52.679,
1184
+ "eval_steps_per_second": 3.292,
1185
+ "step": 5600
1186
+ },
1187
+ {
1188
+ "epoch": 0.85,
1189
+ "learning_rate": 4.572841726618705e-05,
1190
+ "loss": 0.203,
1191
+ "step": 5700
1192
+ },
1193
+ {
1194
+ "epoch": 0.85,
1195
+ "eval_F1": 58.205,
1196
+ "eval_Precision": 93.4409,
1197
+ "eval_Recall": 42.2665,
1198
+ "eval_accuracy": 60.4572,
1199
+ "eval_accuracy_negative": 78.6479,
1200
+ "eval_accuracy_positive": 42.2665,
1201
+ "eval_gen_len": 3.0,
1202
+ "eval_loss": 0.2337663322687149,
1203
+ "eval_runtime": 78.2027,
1204
+ "eval_samples_per_second": 52.581,
1205
+ "eval_steps_per_second": 3.286,
1206
+ "step": 5700
1207
+ },
1208
+ {
1209
+ "epoch": 0.87,
1210
+ "learning_rate": 4.5653477218225424e-05,
1211
+ "loss": 0.1912,
1212
+ "step": 5800
1213
+ },
1214
+ {
1215
+ "epoch": 0.87,
1216
+ "eval_F1": 82.295,
1217
+ "eval_Precision": 83.3416,
1218
+ "eval_Recall": 81.2743,
1219
+ "eval_accuracy": 78.6722,
1220
+ "eval_accuracy_negative": 76.07,
1221
+ "eval_accuracy_positive": 81.2743,
1222
+ "eval_gen_len": 3.0,
1223
+ "eval_loss": 0.23967531323432922,
1224
+ "eval_runtime": 78.2026,
1225
+ "eval_samples_per_second": 52.581,
1226
+ "eval_steps_per_second": 3.286,
1227
+ "step": 5800
1228
+ },
1229
+ {
1230
+ "epoch": 0.88,
1231
+ "learning_rate": 4.557853717026379e-05,
1232
+ "loss": 0.2292,
1233
+ "step": 5900
1234
+ },
1235
+ {
1236
+ "epoch": 0.88,
1237
+ "eval_F1": 77.3544,
1238
+ "eval_Precision": 87.8245,
1239
+ "eval_Recall": 69.1148,
1240
+ "eval_accuracy": 71.8142,
1241
+ "eval_accuracy_negative": 74.5136,
1242
+ "eval_accuracy_positive": 69.1148,
1243
+ "eval_gen_len": 3.0,
1244
+ "eval_loss": 0.21783529222011566,
1245
+ "eval_runtime": 78.0505,
1246
+ "eval_samples_per_second": 52.684,
1247
+ "eval_steps_per_second": 3.293,
1248
+ "step": 5900
1249
+ },
1250
+ {
1251
+ "epoch": 0.9,
1252
+ "learning_rate": 4.550359712230216e-05,
1253
+ "loss": 0.2366,
1254
+ "step": 6000
1255
+ },
1256
+ {
1257
+ "epoch": 0.9,
1258
+ "eval_F1": 73.1382,
1259
+ "eval_Precision": 90.4659,
1260
+ "eval_Recall": 61.3813,
1261
+ "eval_accuracy": 69.3337,
1262
+ "eval_accuracy_negative": 77.286,
1263
+ "eval_accuracy_positive": 61.3813,
1264
+ "eval_gen_len": 3.0,
1265
+ "eval_loss": 0.21324363350868225,
1266
+ "eval_runtime": 78.0489,
1267
+ "eval_samples_per_second": 52.685,
1268
+ "eval_steps_per_second": 3.293,
1269
+ "step": 6000
1270
+ },
1271
+ {
1272
+ "epoch": 0.91,
1273
+ "learning_rate": 4.542865707434053e-05,
1274
+ "loss": 0.2172,
1275
+ "step": 6100
1276
+ },
1277
+ {
1278
+ "epoch": 0.91,
1279
+ "eval_F1": 82.6628,
1280
+ "eval_Precision": 81.4151,
1281
+ "eval_Recall": 83.9494,
1282
+ "eval_accuracy": 80.0584,
1283
+ "eval_accuracy_negative": 76.1673,
1284
+ "eval_accuracy_positive": 83.9494,
1285
+ "eval_gen_len": 3.0,
1286
+ "eval_loss": 0.21681031584739685,
1287
+ "eval_runtime": 77.9889,
1288
+ "eval_samples_per_second": 52.725,
1289
+ "eval_steps_per_second": 3.295,
1290
+ "step": 6100
1291
+ },
1292
+ {
1293
+ "epoch": 0.93,
1294
+ "learning_rate": 4.53537170263789e-05,
1295
+ "loss": 0.2176,
1296
+ "step": 6200
1297
+ },
1298
+ {
1299
+ "epoch": 0.93,
1300
+ "eval_F1": 81.7284,
1301
+ "eval_Precision": 82.999,
1302
+ "eval_Recall": 80.4961,
1303
+ "eval_accuracy": 79.2802,
1304
+ "eval_accuracy_negative": 78.0642,
1305
+ "eval_accuracy_positive": 80.4961,
1306
+ "eval_gen_len": 3.0,
1307
+ "eval_loss": 0.19668923318386078,
1308
+ "eval_runtime": 78.1339,
1309
+ "eval_samples_per_second": 52.628,
1310
+ "eval_steps_per_second": 3.289,
1311
+ "step": 6200
1312
+ },
1313
+ {
1314
+ "epoch": 0.94,
1315
+ "learning_rate": 4.527877697841727e-05,
1316
+ "loss": 0.1703,
1317
+ "step": 6300
1318
+ },
1319
+ {
1320
+ "epoch": 0.94,
1321
+ "eval_F1": 82.7941,
1322
+ "eval_Precision": 83.4486,
1323
+ "eval_Recall": 82.1498,
1324
+ "eval_accuracy": 79.9125,
1325
+ "eval_accuracy_negative": 77.6751,
1326
+ "eval_accuracy_positive": 82.1498,
1327
+ "eval_gen_len": 3.0,
1328
+ "eval_loss": 0.23434630036354065,
1329
+ "eval_runtime": 77.9722,
1330
+ "eval_samples_per_second": 52.737,
1331
+ "eval_steps_per_second": 3.296,
1332
+ "step": 6300
1333
+ },
1334
+ {
1335
+ "epoch": 0.96,
1336
+ "learning_rate": 4.520383693045564e-05,
1337
+ "loss": 0.2234,
1338
+ "step": 6400
1339
+ },
1340
+ {
1341
+ "epoch": 0.96,
1342
+ "eval_F1": 83.6217,
1343
+ "eval_Precision": 81.5534,
1344
+ "eval_Recall": 85.7977,
1345
+ "eval_accuracy": 80.2529,
1346
+ "eval_accuracy_negative": 74.7082,
1347
+ "eval_accuracy_positive": 85.7977,
1348
+ "eval_gen_len": 3.0,
1349
+ "eval_loss": 0.22015611827373505,
1350
+ "eval_runtime": 78.0553,
1351
+ "eval_samples_per_second": 52.681,
1352
+ "eval_steps_per_second": 3.293,
1353
+ "step": 6400
1354
+ },
1355
+ {
1356
+ "epoch": 0.97,
1357
+ "learning_rate": 4.512889688249401e-05,
1358
+ "loss": 0.2047,
1359
+ "step": 6500
1360
+ },
1361
+ {
1362
+ "epoch": 0.97,
1363
+ "eval_F1": 83.6764,
1364
+ "eval_Precision": 82.3751,
1365
+ "eval_Recall": 85.0195,
1366
+ "eval_accuracy": 79.1586,
1367
+ "eval_accuracy_negative": 73.2977,
1368
+ "eval_accuracy_positive": 85.0195,
1369
+ "eval_gen_len": 3.0,
1370
+ "eval_loss": 0.20958371460437775,
1371
+ "eval_runtime": 77.9877,
1372
+ "eval_samples_per_second": 52.726,
1373
+ "eval_steps_per_second": 3.295,
1374
+ "step": 6500
1375
+ }
1376
+ ],
1377
+ "logging_steps": 100,
1378
+ "max_steps": 66720,
1379
+ "num_train_epochs": 10,
1380
+ "save_steps": 100,
1381
+ "total_flos": 1.83775713030144e+16,
1382
+ "trial_name": null,
1383
+ "trial_params": null
1384
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc06968df6ffa080e66881d54e5b42b706b771520ad1bde5994d5f71263e9760
3
+ size 4411