avuhong commited on
Commit
a341115
1 Parent(s): 850edcf

Upload 9 files

Browse files
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: output_v2
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # output_v2
16
+
17
+ This model is a fine-tuned version of [avuhong/ParvoGPT2](https://huggingface.co/avuhong/ParvoGPT2) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.9835
20
+ - Accuracy: 0.8502
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 1e-05
40
+ - train_batch_size: 1
41
+ - eval_batch_size: 1
42
+ - seed: 42
43
+ - distributed_type: multi-GPU
44
+ - num_devices: 2
45
+ - gradient_accumulation_steps: 4
46
+ - total_train_batch_size: 8
47
+ - total_eval_batch_size: 2
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: linear
50
+ - num_epochs: 16.0
51
+ - mixed_precision_training: Native AMP
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
56
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
57
+ | No log | 1.0 | 220 | 1.5038 | 0.7795 |
58
+ | No log | 2.0 | 440 | 1.3765 | 0.7965 |
59
+ | 1.5308 | 3.0 | 660 | 1.2920 | 0.8075 |
60
+ | 1.5308 | 4.0 | 880 | 1.2308 | 0.8156 |
61
+ | 1.2695 | 5.0 | 1100 | 1.1788 | 0.8226 |
62
+ | 1.2695 | 6.0 | 1320 | 1.1363 | 0.8279 |
63
+ | 1.1353 | 7.0 | 1540 | 1.1027 | 0.8324 |
64
+ | 1.1353 | 8.0 | 1760 | 1.0726 | 0.8373 |
65
+ | 1.1353 | 9.0 | 1980 | 1.0481 | 0.8405 |
66
+ | 1.0713 | 10.0 | 2200 | 1.0299 | 0.8433 |
67
+ | 1.0713 | 11.0 | 2420 | 1.0174 | 0.8455 |
68
+ | 1.0233 | 12.0 | 2640 | 1.0028 | 0.8477 |
69
+ | 1.0233 | 13.0 | 2860 | 0.9939 | 0.8488 |
70
+ | 0.9811 | 14.0 | 3080 | 0.9889 | 0.8497 |
71
+ | 0.9811 | 15.0 | 3300 | 0.9854 | 0.8500 |
72
+ | 0.9696 | 16.0 | 3520 | 0.9835 | 0.8502 |
73
+
74
+
75
+ ### Framework versions
76
+
77
+ - Transformers 4.26.1
78
+ - Pytorch 1.13.1+cu117
79
+ - Datasets 2.9.0
80
+ - Tokenizers 0.13.2
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "eval_accuracy": 0.8501928179347534,
4
+ "eval_loss": 0.9834597110748291,
5
+ "eval_runtime": 5.6916,
6
+ "eval_samples": 91,
7
+ "eval_samples_per_second": 15.988,
8
+ "eval_steps_per_second": 8.082,
9
+ "perplexity": 2.6736904553424052,
10
+ "train_loss": 1.138753395730799,
11
+ "train_runtime": 5180.444,
12
+ "train_samples": 1762,
13
+ "train_samples_per_second": 5.442,
14
+ "train_steps_per_second": 0.679
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "eval_accuracy": 0.8501928179347534,
4
+ "eval_loss": 0.9834597110748291,
5
+ "eval_runtime": 5.6916,
6
+ "eval_samples": 91,
7
+ "eval_samples_per_second": 15.988,
8
+ "eval_steps_per_second": 8.082,
9
+ "perplexity": 2.6736904553424052
10
+ }
runs/Mar15_13-45-15_srvgpu/1678888022.4053812/events.out.tfevents.1678888022.srvgpu.2934731.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb55f0b56bd077ff05bb404073ebd9b700dd89e390c65875c76be8db2efb13d0
3
+ size 5643
runs/Mar15_13-45-15_srvgpu/events.out.tfevents.1678888022.srvgpu.2934731.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e94e334f9383683ccb851b24b7d0580226cc0caaac8ba3538bf9bf50f5b36c96
3
+ size 10687
runs/Mar15_13-45-15_srvgpu/events.out.tfevents.1678893212.srvgpu.2934731.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c51db3ea9acf866ec087ab9fcff153adbec9c9a44c135d80362fdf0dffdc70e
3
+ size 363
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "train_loss": 1.138753395730799,
4
+ "train_runtime": 5180.444,
5
+ "train_samples": 1762,
6
+ "train_samples_per_second": 5.442,
7
+ "train_steps_per_second": 0.679
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.998864926220204,
5
+ "global_step": 3520,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.7795322956613279,
13
+ "eval_loss": 1.5038145780563354,
14
+ "eval_runtime": 5.9639,
15
+ "eval_samples_per_second": 15.258,
16
+ "eval_steps_per_second": 7.713,
17
+ "step": 220
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.7964938287518932,
22
+ "eval_loss": 1.3765002489089966,
23
+ "eval_runtime": 5.973,
24
+ "eval_samples_per_second": 15.235,
25
+ "eval_steps_per_second": 7.701,
26
+ "step": 440
27
+ },
28
+ {
29
+ "epoch": 2.27,
30
+ "learning_rate": 8.579545454545455e-06,
31
+ "loss": 1.5308,
32
+ "step": 500
33
+ },
34
+ {
35
+ "epoch": 3.0,
36
+ "eval_accuracy": 0.8075258075258075,
37
+ "eval_loss": 1.2920387983322144,
38
+ "eval_runtime": 5.9722,
39
+ "eval_samples_per_second": 15.237,
40
+ "eval_steps_per_second": 7.702,
41
+ "step": 660
42
+ },
43
+ {
44
+ "epoch": 4.0,
45
+ "eval_accuracy": 0.8155607833027188,
46
+ "eval_loss": 1.230821967124939,
47
+ "eval_runtime": 5.6615,
48
+ "eval_samples_per_second": 16.073,
49
+ "eval_steps_per_second": 8.125,
50
+ "step": 880
51
+ },
52
+ {
53
+ "epoch": 4.54,
54
+ "learning_rate": 7.161931818181819e-06,
55
+ "loss": 1.2695,
56
+ "step": 1000
57
+ },
58
+ {
59
+ "epoch": 5.0,
60
+ "eval_accuracy": 0.8225860161344032,
61
+ "eval_loss": 1.1788371801376343,
62
+ "eval_runtime": 5.9852,
63
+ "eval_samples_per_second": 15.204,
64
+ "eval_steps_per_second": 7.686,
65
+ "step": 1100
66
+ },
67
+ {
68
+ "epoch": 6.0,
69
+ "eval_accuracy": 0.8278817956237311,
70
+ "eval_loss": 1.136326789855957,
71
+ "eval_runtime": 5.9723,
72
+ "eval_samples_per_second": 15.237,
73
+ "eval_steps_per_second": 7.702,
74
+ "step": 1320
75
+ },
76
+ {
77
+ "epoch": 6.82,
78
+ "learning_rate": 5.741477272727272e-06,
79
+ "loss": 1.1353,
80
+ "step": 1500
81
+ },
82
+ {
83
+ "epoch": 7.0,
84
+ "eval_accuracy": 0.8323826710923485,
85
+ "eval_loss": 1.102668285369873,
86
+ "eval_runtime": 5.9851,
87
+ "eval_samples_per_second": 15.204,
88
+ "eval_steps_per_second": 7.686,
89
+ "step": 1540
90
+ },
91
+ {
92
+ "epoch": 8.0,
93
+ "eval_accuracy": 0.8373239663562244,
94
+ "eval_loss": 1.072572112083435,
95
+ "eval_runtime": 5.9939,
96
+ "eval_samples_per_second": 15.182,
97
+ "eval_steps_per_second": 7.674,
98
+ "step": 1760
99
+ },
100
+ {
101
+ "epoch": 9.0,
102
+ "eval_accuracy": 0.8404928404928405,
103
+ "eval_loss": 1.0481319427490234,
104
+ "eval_runtime": 5.9927,
105
+ "eval_samples_per_second": 15.185,
106
+ "eval_steps_per_second": 7.676,
107
+ "step": 1980
108
+ },
109
+ {
110
+ "epoch": 9.09,
111
+ "learning_rate": 4.321022727272728e-06,
112
+ "loss": 1.0713,
113
+ "step": 2000
114
+ },
115
+ {
116
+ "epoch": 10.0,
117
+ "eval_accuracy": 0.8432642626191014,
118
+ "eval_loss": 1.0299291610717773,
119
+ "eval_runtime": 5.6745,
120
+ "eval_samples_per_second": 16.037,
121
+ "eval_steps_per_second": 8.106,
122
+ "step": 2200
123
+ },
124
+ {
125
+ "epoch": 11.0,
126
+ "eval_accuracy": 0.8455415552189746,
127
+ "eval_loss": 1.0174473524093628,
128
+ "eval_runtime": 5.9763,
129
+ "eval_samples_per_second": 15.227,
130
+ "eval_steps_per_second": 7.697,
131
+ "step": 2420
132
+ },
133
+ {
134
+ "epoch": 11.36,
135
+ "learning_rate": 2.900568181818182e-06,
136
+ "loss": 1.0233,
137
+ "step": 2500
138
+ },
139
+ {
140
+ "epoch": 12.0,
141
+ "eval_accuracy": 0.8477006864103638,
142
+ "eval_loss": 1.0027512311935425,
143
+ "eval_runtime": 5.9881,
144
+ "eval_samples_per_second": 15.197,
145
+ "eval_steps_per_second": 7.682,
146
+ "step": 2640
147
+ },
148
+ {
149
+ "epoch": 13.0,
150
+ "eval_accuracy": 0.8488178488178488,
151
+ "eval_loss": 0.9938735961914062,
152
+ "eval_runtime": 5.9759,
153
+ "eval_samples_per_second": 15.228,
154
+ "eval_steps_per_second": 7.698,
155
+ "step": 2860
156
+ },
157
+ {
158
+ "epoch": 13.64,
159
+ "learning_rate": 1.4829545454545454e-06,
160
+ "loss": 0.9811,
161
+ "step": 3000
162
+ },
163
+ {
164
+ "epoch": 14.0,
165
+ "eval_accuracy": 0.8496772045159142,
166
+ "eval_loss": 0.9889363646507263,
167
+ "eval_runtime": 5.9819,
168
+ "eval_samples_per_second": 15.213,
169
+ "eval_steps_per_second": 7.69,
170
+ "step": 3080
171
+ },
172
+ {
173
+ "epoch": 15.0,
174
+ "eval_accuracy": 0.8499779790102371,
175
+ "eval_loss": 0.9854440093040466,
176
+ "eval_runtime": 5.9887,
177
+ "eval_samples_per_second": 15.195,
178
+ "eval_steps_per_second": 7.681,
179
+ "step": 3300
180
+ },
181
+ {
182
+ "epoch": 15.91,
183
+ "learning_rate": 6.250000000000001e-08,
184
+ "loss": 0.9696,
185
+ "step": 3500
186
+ },
187
+ {
188
+ "epoch": 16.0,
189
+ "eval_accuracy": 0.8501928179347534,
190
+ "eval_loss": 0.9834597110748291,
191
+ "eval_runtime": 5.978,
192
+ "eval_samples_per_second": 15.223,
193
+ "eval_steps_per_second": 7.695,
194
+ "step": 3520
195
+ },
196
+ {
197
+ "epoch": 16.0,
198
+ "step": 3520,
199
+ "total_flos": 1.2269276173959168e+17,
200
+ "train_loss": 1.138753395730799,
201
+ "train_runtime": 5180.444,
202
+ "train_samples_per_second": 5.442,
203
+ "train_steps_per_second": 0.679
204
+ }
205
+ ],
206
+ "max_steps": 3520,
207
+ "num_train_epochs": 16,
208
+ "total_flos": 1.2269276173959168e+17,
209
+ "trial_name": null,
210
+ "trial_params": null
211
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d279c7dbc43acd518e710d89f64a4cc417adfa56edacc6d0708f9864295a4747
3
+ size 3579