AlekseyKorshuk commited on
Commit
c618893
·
1 Parent(s): f5d9b33

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/20cl7laa/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3qf1htak) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3qf1htak/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/22rvzlbq/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/320wuvzx) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/320wuvzx/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 0.8490299582481384, "eval_runtime": 6.2996, "eval_samples_per_second": 22.065, "eval_steps_per_second": 2.857, "epoch": 4.0}
 
1
+ {"eval_loss": 0.9282976984977722, "eval_runtime": 7.0681, "eval_samples_per_second": 20.656, "eval_steps_per_second": 2.688, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c206113ad277df943af1b246e8fbbe80bcbe43ffaae6fccf0407e9a6a26a085c
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:290b054aa26a75472ea89462728e1fbe01bb1cbbb7bf90d372ad250db6831111
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b30ca0900ebc27f3cbfd8a6540e092c04afffa6f018f6a00a9f12bd726579e80
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfb92c5db45ae57ec8704270fd02cd24309911146a62e4ffdfe187cdfacc2cca
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:662d09ceae0ce79ead4cb03910f4f01e5baefa1243d539c22697cbebcaa50e11
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dfc06c83e4ec2c68737881be276359157fac03863eda5033a21d9e80f92ef9
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a811ea5cc5d9c9527f6e81dfbc9d60be228d782159d3b0640a84e4cd48563bd
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc74e881d44215266a1c99f799520e15420c54f0cc889ce0ad363a9b0849b03
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eeb3725eda0751da218aac4b82c91a171feeaac6c25ec948585c5e9228b5f56
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b9d873c8cacf8459303fcc5fcff12d277bec46420342ef76b13541e314fd254
3
  size 623
trainer_state.json CHANGED
@@ -1,274 +1,266 @@
1
  {
2
- "best_metric": 0.8490299582481384,
3
- "best_model_checkpoint": "output/morgenshtern/checkpoint-198",
4
  "epoch": 2.0,
5
- "global_step": 198,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
- "learning_rate": 0.00013630248621914493,
13
- "loss": 1.1495,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.1,
18
- "learning_rate": 0.00013363342974500022,
19
- "loss": 1.133,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.15,
24
- "learning_rate": 0.00012926267066407974,
25
- "loss": 1.1617,
26
  "step": 15
27
  },
28
  {
29
- "epoch": 0.21,
30
- "learning_rate": 0.00012330457680460286,
31
- "loss": 1.1061,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.26,
36
- "learning_rate": 0.0001159150511212779,
37
- "loss": 1.2273,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.31,
42
- "learning_rate": 0.00010728745224777217,
43
- "loss": 1.0743,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.36,
48
- "learning_rate": 9.764753496206386e-05,
49
- "loss": 1.2278,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.41,
54
- "learning_rate": 8.724754295541321e-05,
55
- "loss": 1.1852,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.46,
60
- "learning_rate": 7.635960847699664e-05,
61
- "loss": 1.1321,
62
  "step": 45
63
  },
64
  {
65
- "epoch": 0.52,
66
- "learning_rate": 6.526863156294906e-05,
67
- "loss": 1.0477,
68
  "step": 50
69
  },
70
  {
71
- "epoch": 0.57,
72
- "learning_rate": 5.426482517605479e-05,
73
- "loss": 1.1788,
74
  "step": 55
75
  },
76
  {
77
- "epoch": 0.62,
78
- "learning_rate": 4.363612132430507e-05,
79
- "loss": 1.0456,
80
  "step": 60
81
  },
82
  {
83
- "epoch": 0.67,
84
- "learning_rate": 3.36606368642472e-05,
85
- "loss": 1.1629,
86
  "step": 65
87
  },
88
  {
89
- "epoch": 0.72,
90
- "learning_rate": 2.4599396133304144e-05,
91
- "loss": 1.1195,
92
  "step": 70
93
  },
94
  {
95
  "epoch": 0.77,
96
- "learning_rate": 1.6689500834906116e-05,
97
- "loss": 0.9719,
98
  "step": 75
99
  },
100
  {
101
  "epoch": 0.82,
102
- "learning_rate": 1.0137925897200545e-05,
103
- "loss": 1.159,
104
  "step": 80
105
  },
106
  {
107
- "epoch": 0.88,
108
- "learning_rate": 5.116103646510543e-06,
109
- "loss": 1.0041,
110
  "step": 85
111
  },
112
  {
113
- "epoch": 0.93,
114
- "learning_rate": 1.7554380092209137e-06,
115
- "loss": 1.0917,
116
  "step": 90
117
  },
118
  {
119
- "epoch": 0.98,
120
- "learning_rate": 1.4386612009299613e-07,
121
- "loss": 1.0484,
122
  "step": 95
123
  },
124
  {
125
  "epoch": 1.0,
126
- "eval_loss": 1.0789222717285156,
127
- "eval_runtime": 7.1308,
128
- "eval_samples_per_second": 21.035,
129
- "eval_steps_per_second": 2.664,
130
- "step": 97
131
  },
132
  {
133
- "epoch": 1.03,
134
- "learning_rate": 3.2355730797025283e-07,
135
- "loss": 1.094,
136
  "step": 100
137
  },
138
  {
139
- "epoch": 1.08,
140
- "learning_rate": 2.289809668554777e-06,
141
- "loss": 1.173,
142
  "step": 105
143
  },
144
  {
145
- "epoch": 1.13,
146
- "learning_rate": 5.991173097174171e-06,
147
- "loss": 1.1401,
148
  "step": 110
149
  },
150
  {
151
- "epoch": 1.19,
152
- "learning_rate": 1.1330795562191737e-05,
153
- "loss": 1.1561,
154
  "step": 115
155
  },
156
  {
157
- "epoch": 1.24,
158
- "learning_rate": 1.816895739168569e-05,
159
- "loss": 1.103,
160
  "step": 120
161
  },
162
  {
163
- "epoch": 1.29,
164
- "learning_rate": 2.6326727259776298e-05,
165
- "loss": 1.0156,
166
  "step": 125
167
  },
168
  {
169
- "epoch": 1.34,
170
- "learning_rate": 3.559064420793063e-05,
171
- "loss": 1.047,
172
  "step": 130
173
  },
174
  {
175
- "epoch": 1.39,
176
- "learning_rate": 4.571830318875349e-05,
177
- "loss": 1.0605,
178
  "step": 135
179
  },
180
  {
181
- "epoch": 1.44,
182
- "learning_rate": 5.644469797766504e-05,
183
- "loss": 1.0732,
184
  "step": 140
185
  },
186
  {
187
- "epoch": 1.49,
188
- "learning_rate": 6.748915548014806e-05,
189
- "loss": 1.0798,
190
  "step": 145
191
  },
192
  {
193
- "epoch": 1.55,
194
- "learning_rate": 7.856267998744024e-05,
195
- "loss": 1.132,
196
  "step": 150
197
  },
198
  {
199
- "epoch": 1.6,
200
- "learning_rate": 8.937551520659945e-05,
201
- "loss": 1.097,
202
  "step": 155
203
  },
204
  {
205
- "epoch": 1.65,
206
- "learning_rate": 9.964472619245533e-05,
207
- "loss": 1.1685,
208
  "step": 160
209
  },
210
  {
211
- "epoch": 1.7,
212
- "learning_rate": 0.00010910160278820622,
213
- "loss": 1.0965,
214
  "step": 165
215
  },
216
  {
217
- "epoch": 1.75,
218
- "learning_rate": 0.00011749869085191239,
219
- "loss": 1.0409,
220
  "step": 170
221
  },
222
  {
223
- "epoch": 1.8,
224
- "learning_rate": 0.00012461626728572453,
225
- "loss": 1.2079,
226
  "step": 175
227
  },
228
  {
229
- "epoch": 1.86,
230
- "learning_rate": 0.000130268089438458,
231
- "loss": 1.109,
232
  "step": 180
233
  },
234
  {
235
- "epoch": 1.91,
236
- "learning_rate": 0.00013430626843929596,
237
- "loss": 1.1506,
238
  "step": 185
239
  },
240
  {
241
- "epoch": 1.96,
242
- "learning_rate": 0.00013662513894413276,
243
- "loss": 1.074,
244
  "step": 190
245
  },
246
  {
247
- "epoch": 2.0,
248
- "eval_loss": 0.9612834453582764,
249
- "eval_runtime": 6.602,
250
- "eval_samples_per_second": 23.023,
251
- "eval_steps_per_second": 2.878,
252
- "step": 194
253
- },
254
- {
255
- "epoch": 1.97,
256
- "learning_rate": 0.00010659891638121502,
257
- "loss": 1.1752,
258
  "step": 195
259
  },
260
  {
261
  "epoch": 2.0,
262
- "eval_loss": 0.8490299582481384,
263
- "eval_runtime": 6.0445,
264
- "eval_samples_per_second": 22.996,
265
- "eval_steps_per_second": 2.978,
266
- "step": 198
267
  }
268
  ],
269
- "max_steps": 396,
270
- "num_train_epochs": 4,
271
- "total_flos": 206028767232000.0,
272
  "trial_name": null,
273
  "trial_params": null
274
  }
 
1
  {
2
+ "best_metric": 0.9282976984977722,
3
+ "best_model_checkpoint": "output/morgenshtern/checkpoint-196",
4
  "epoch": 2.0,
5
+ "global_step": 196,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.05,
12
+ "learning_rate": 2.8353852816851834e-06,
13
+ "loss": 0.9329,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.1,
18
+ "learning_rate": 5.632050517253893e-07,
19
+ "loss": 1.0399,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.15,
24
+ "learning_rate": 3.5245568632818114e-08,
25
+ "loss": 0.9758,
26
  "step": 15
27
  },
28
  {
29
+ "epoch": 0.2,
30
+ "learning_rate": 1.2650418304129032e-06,
31
+ "loss": 1.1379,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.26,
36
+ "learning_rate": 4.2210662473863345e-06,
37
+ "loss": 1.0053,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.31,
42
+ "learning_rate": 8.827536897135236e-06,
43
+ "loss": 0.9426,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.36,
48
+ "learning_rate": 1.4966360302693292e-05,
49
+ "loss": 0.975,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.41,
54
+ "learning_rate": 2.2480158928073662e-05,
55
+ "loss": 0.9562,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.46,
60
+ "learning_rate": 3.1176305776956165e-05,
61
+ "loss": 0.9848,
62
  "step": 45
63
  },
64
  {
65
+ "epoch": 0.51,
66
+ "learning_rate": 4.08318626618038e-05,
67
+ "loss": 0.9881,
68
  "step": 50
69
  },
70
  {
71
+ "epoch": 0.56,
72
+ "learning_rate": 5.11992955438076e-05,
73
+ "loss": 0.8541,
74
  "step": 55
75
  },
76
  {
77
+ "epoch": 0.61,
78
+ "learning_rate": 6.201282042273309e-05,
79
+ "loss": 0.9165,
80
  "step": 60
81
  },
82
  {
83
+ "epoch": 0.66,
84
+ "learning_rate": 7.299521709067675e-05,
85
+ "loss": 1.1571,
86
  "step": 65
87
  },
88
  {
89
+ "epoch": 0.71,
90
+ "learning_rate": 8.386493606940326e-05,
91
+ "loss": 0.9756,
92
  "step": 70
93
  },
94
  {
95
  "epoch": 0.77,
96
+ "learning_rate": 9.434331653472495e-05,
97
+ "loss": 1.0831,
98
  "step": 75
99
  },
100
  {
101
  "epoch": 0.82,
102
+ "learning_rate": 0.00010416173018610171,
103
+ "loss": 1.0254,
104
  "step": 80
105
  },
106
  {
107
+ "epoch": 0.87,
108
+ "learning_rate": 0.00011306846791811384,
109
+ "loss": 0.9955,
110
  "step": 85
111
  },
112
  {
113
+ "epoch": 0.92,
114
+ "learning_rate": 0.00012083519274412272,
115
+ "loss": 0.9694,
116
  "step": 90
117
  },
118
  {
119
+ "epoch": 0.97,
120
+ "learning_rate": 0.0001272627935421667,
121
+ "loss": 1.0328,
122
  "step": 95
123
  },
124
  {
125
  "epoch": 1.0,
126
+ "eval_loss": 0.9301878213882446,
127
+ "eval_runtime": 7.0442,
128
+ "eval_samples_per_second": 20.726,
129
+ "eval_steps_per_second": 2.697,
130
+ "step": 98
131
  },
132
  {
133
+ "epoch": 1.02,
134
+ "learning_rate": 0.00013218648955393698,
135
+ "loss": 0.8862,
136
  "step": 100
137
  },
138
  {
139
+ "epoch": 1.07,
140
+ "learning_rate": 0.00013548005477567298,
141
+ "loss": 0.9512,
142
  "step": 105
143
  },
144
  {
145
+ "epoch": 1.12,
146
+ "learning_rate": 0.00013705905394267309,
147
+ "loss": 1.0269,
148
  "step": 110
149
  },
150
  {
151
+ "epoch": 1.17,
152
+ "learning_rate": 0.000136883007148315,
153
+ "loss": 1.0087,
154
  "step": 115
155
  },
156
  {
157
+ "epoch": 1.22,
158
+ "learning_rate": 0.00013495642760447747,
159
+ "loss": 0.999,
160
  "step": 120
161
  },
162
  {
163
+ "epoch": 1.28,
164
+ "learning_rate": 0.00013132870593888493,
165
+ "loss": 1.1179,
166
  "step": 125
167
  },
168
  {
169
+ "epoch": 1.33,
170
+ "learning_rate": 0.00012609284399558039,
171
+ "loss": 0.9949,
172
  "step": 130
173
  },
174
  {
175
+ "epoch": 1.38,
176
+ "learning_rate": 0.0001193830705993666,
177
+ "loss": 0.9807,
178
  "step": 135
179
  },
180
  {
181
+ "epoch": 1.43,
182
+ "learning_rate": 0.00011137140040750923,
183
+ "loss": 1.0501,
184
  "step": 140
185
  },
186
  {
187
+ "epoch": 1.48,
188
+ "learning_rate": 0.00010226322406747004,
189
+ "loss": 0.9548,
190
  "step": 145
191
  },
192
  {
193
+ "epoch": 1.53,
194
+ "learning_rate": 9.229204273330182e-05,
195
+ "loss": 1.0041,
196
  "step": 150
197
  },
198
  {
199
+ "epoch": 1.58,
200
+ "learning_rate": 8.171348192891448e-05,
201
+ "loss": 0.9339,
202
  "step": 155
203
  },
204
  {
205
+ "epoch": 1.63,
206
+ "learning_rate": 7.079873822141611e-05,
207
+ "loss": 0.9923,
208
  "step": 160
209
  },
210
  {
211
+ "epoch": 1.68,
212
+ "learning_rate": 5.982762670844271e-05,
213
+ "loss": 1.009,
214
  "step": 165
215
  },
216
  {
217
+ "epoch": 1.73,
218
+ "learning_rate": 4.908140755711122e-05,
219
+ "loss": 0.9595,
220
  "step": 170
221
  },
222
  {
223
+ "epoch": 1.79,
224
+ "learning_rate": 3.883557549653576e-05,
225
+ "loss": 0.9652,
226
  "step": 175
227
  },
228
  {
229
+ "epoch": 1.84,
230
+ "learning_rate": 2.935279711561958e-05,
231
+ "loss": 1.0619,
232
  "step": 180
233
  },
234
  {
235
+ "epoch": 1.89,
236
+ "learning_rate": 2.087617702860066e-05,
237
+ "loss": 0.9697,
238
  "step": 185
239
  },
240
  {
241
+ "epoch": 1.94,
242
+ "learning_rate": 1.3623025539858162e-05,
243
+ "loss": 1.0304,
244
  "step": 190
245
  },
246
  {
247
+ "epoch": 1.99,
248
+ "learning_rate": 7.779287582812291e-06,
249
+ "loss": 0.9482,
 
 
 
 
 
 
 
 
250
  "step": 195
251
  },
252
  {
253
  "epoch": 2.0,
254
+ "eval_loss": 0.9282976984977722,
255
+ "eval_runtime": 7.0544,
256
+ "eval_samples_per_second": 20.696,
257
+ "eval_steps_per_second": 2.693,
258
+ "step": 196
259
  }
260
  ],
261
+ "max_steps": 294,
262
+ "num_train_epochs": 3,
263
+ "total_flos": 203546492928000.0,
264
  "trial_name": null,
265
  "trial_params": null
266
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07920e82e382322fc86cba9e3137afe186d39ae3cfd7def39a9f14a99ffd7b99
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640d62725592ddeb2633cb42cfa4872adce692e463f0c31e16d57bedfc0f8439
3
  size 2671