Datamance commited on
Commit
3d033a1
·
1 Parent(s): f76ee97

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. config.json +52 -0
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +197 -0
  7. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "yikuan8/Clinical-Longformer",
3
+ "architectures": [
4
+ "LongformerForSequenceClassification"
5
+ ],
6
+ "attention_mode": "longformer",
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "attention_window": [
9
+ 512,
10
+ 512,
11
+ 512,
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512
21
+ ],
22
+ "bos_token_id": 0,
23
+ "eos_token_id": 2,
24
+ "gradient_checkpointing": false,
25
+ "hidden_act": "gelu",
26
+ "hidden_dropout_prob": 0.1,
27
+ "hidden_size": 768,
28
+ "id2label": {
29
+ "0": "LABEL_0"
30
+ },
31
+ "ignore_attention_mask": false,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 3072,
34
+ "label2id": {
35
+ "LABEL_0": 0
36
+ },
37
+ "layer_norm_eps": 1e-05,
38
+ "max_position_embeddings": 4098,
39
+ "model_type": "longformer",
40
+ "num_attention_heads": 12,
41
+ "num_hidden_layers": 12,
42
+ "onnx_export": false,
43
+ "pad_token_id": 1,
44
+ "position_embedding_type": "absolute",
45
+ "problem_type": "regression",
46
+ "sep_token_id": 2,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.34.0",
49
+ "type_vocab_size": 1,
50
+ "use_cache": true,
51
+ "vocab_size": 50265
52
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aba72b6161ae1eeabd427c3be571876c76fe70dd53ea3752d93aafe229f96c8
3
+ size 1189502522
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4da4f1d4d6f0ef4db8385ad2d7d4504a9d6947044de9dfdf69de28970907be0
3
+ size 594732814
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d32051101ec51c2b04c4ee6a6d2c7f40562e56836cbb02d6e6e3126490484d
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4141bd82293fbad58a7034a6164e1b15ebb3bd98a6108d37ca224bb15d7ee5f0
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9671150655371568,
3
+ "best_model_checkpoint": "models/longformer_classifier/checkpoint-6544",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 13088,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "learning_rate": 1.9235941320293402e-05,
14
+ "loss": 7.0429,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.15,
19
+ "learning_rate": 1.84718826405868e-05,
20
+ "loss": 6.7762,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.23,
25
+ "learning_rate": 1.7707823960880196e-05,
26
+ "loss": 6.3278,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.31,
31
+ "learning_rate": 1.6943765281173596e-05,
32
+ "loss": 5.1982,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.38,
37
+ "learning_rate": 1.6179706601466993e-05,
38
+ "loss": 6.9234,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.46,
43
+ "learning_rate": 1.5415647921760393e-05,
44
+ "loss": 3.3308,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.53,
49
+ "learning_rate": 1.4651589242053792e-05,
50
+ "loss": 11.0545,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.61,
55
+ "learning_rate": 1.3887530562347189e-05,
56
+ "loss": 11.4149,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.69,
61
+ "learning_rate": 1.3123471882640589e-05,
62
+ "loss": 3.9411,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.76,
67
+ "learning_rate": 1.2359413202933986e-05,
68
+ "loss": 5.0606,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.84,
73
+ "learning_rate": 1.1595354523227385e-05,
74
+ "loss": 4.0193,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.92,
79
+ "learning_rate": 1.0831295843520783e-05,
80
+ "loss": 6.3164,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.99,
85
+ "learning_rate": 1.006723716381418e-05,
86
+ "loss": 10.0802,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_f1": 0.9671150655371568,
92
+ "eval_loss": 6.602837085723877,
93
+ "eval_precision": 0.9564744352317358,
94
+ "eval_recall": 0.9779951100244498,
95
+ "eval_runtime": 1373.3102,
96
+ "eval_samples_per_second": 1.191,
97
+ "eval_steps_per_second": 1.191,
98
+ "step": 6544
99
+ },
100
+ {
101
+ "epoch": 1.07,
102
+ "learning_rate": 9.30317848410758e-06,
103
+ "loss": 6.419,
104
+ "step": 7000
105
+ },
106
+ {
107
+ "epoch": 1.15,
108
+ "learning_rate": 8.539119804400979e-06,
109
+ "loss": 3.2558,
110
+ "step": 7500
111
+ },
112
+ {
113
+ "epoch": 1.22,
114
+ "learning_rate": 7.775061124694378e-06,
115
+ "loss": 6.4214,
116
+ "step": 8000
117
+ },
118
+ {
119
+ "epoch": 1.3,
120
+ "learning_rate": 7.011002444987775e-06,
121
+ "loss": 5.2197,
122
+ "step": 8500
123
+ },
124
+ {
125
+ "epoch": 1.38,
126
+ "learning_rate": 6.246943765281174e-06,
127
+ "loss": 7.7577,
128
+ "step": 9000
129
+ },
130
+ {
131
+ "epoch": 1.45,
132
+ "learning_rate": 5.482885085574573e-06,
133
+ "loss": 4.2541,
134
+ "step": 9500
135
+ },
136
+ {
137
+ "epoch": 1.53,
138
+ "learning_rate": 4.718826405867971e-06,
139
+ "loss": 6.9299,
140
+ "step": 10000
141
+ },
142
+ {
143
+ "epoch": 1.6,
144
+ "learning_rate": 3.954767726161369e-06,
145
+ "loss": 4.4269,
146
+ "step": 10500
147
+ },
148
+ {
149
+ "epoch": 1.68,
150
+ "learning_rate": 3.190709046454768e-06,
151
+ "loss": 4.4276,
152
+ "step": 11000
153
+ },
154
+ {
155
+ "epoch": 1.76,
156
+ "learning_rate": 2.4266503667481666e-06,
157
+ "loss": 4.2753,
158
+ "step": 11500
159
+ },
160
+ {
161
+ "epoch": 1.83,
162
+ "learning_rate": 1.6625916870415647e-06,
163
+ "loss": 4.9898,
164
+ "step": 12000
165
+ },
166
+ {
167
+ "epoch": 1.91,
168
+ "learning_rate": 8.985330073349634e-07,
169
+ "loss": 5.014,
170
+ "step": 12500
171
+ },
172
+ {
173
+ "epoch": 1.99,
174
+ "learning_rate": 1.3447432762836187e-07,
175
+ "loss": 4.3745,
176
+ "step": 13000
177
+ },
178
+ {
179
+ "epoch": 2.0,
180
+ "eval_f1": 0.9671150655371568,
181
+ "eval_loss": 5.483180999755859,
182
+ "eval_precision": 0.9564744352317358,
183
+ "eval_recall": 0.9779951100244498,
184
+ "eval_runtime": 9134.9046,
185
+ "eval_samples_per_second": 0.179,
186
+ "eval_steps_per_second": 0.179,
187
+ "step": 13088
188
+ }
189
+ ],
190
+ "logging_steps": 500,
191
+ "max_steps": 13088,
192
+ "num_train_epochs": 2,
193
+ "save_steps": 500,
194
+ "total_flos": 3.3581297093376e+16,
195
+ "trial_name": null,
196
+ "trial_params": null
197
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba1f3ee7607380d609e013b7589d291fd66597877e88b392866198029930237
3
+ size 4536