AlekseyKorshuk commited on
Commit
677c7a8
1 Parent(s): 9d5436c

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/hyuna")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11k7ux75/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on HyunA (현아)'s lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2adombki) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2adombki/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/hyuna")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3uo94mxd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on HyunA (현아)'s lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1o8t0mq0) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1o8t0mq0/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "huggingartists/hyuna",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.4857912063598633, "eval_runtime": 0.9974, "eval_samples_per_second": 75.193, "eval_steps_per_second": 10.026, "epoch": 1.0}
 
1
+ {"eval_loss": 1.948645830154419, "eval_runtime": 1.0716, "eval_samples_per_second": 74.654, "eval_steps_per_second": 9.332, "epoch": 21.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eea24b5a87ba581a31b68f6feb9d8984cefc4193561631b2a0c37eb533abd099
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c6c5f3002aa8553ef3d1076f8c241aea7e67dd9a061ae400fb1f73dd0af518e
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fad55fc0023f5af751d53fae38afce0ad829438474c0eb5ca992b913ed0c7cbc
3
- size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98fd9abc79219d8710076f00fa144a25a95234e0af1cef66fee27c4a7e2f212
3
+ size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acef724cc857ea36533b962cf392ab1ce9f14179d814b19d7dcda36abc7dcc3d
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a147ed0181a141745e94a33aca89bd326c03f5d7245235b650749fb53eeb5c03
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07a059709928e7c9750812147dd6bb5220fbe286a930ef0174e5e4fa47c45f0f
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e11aaedead41a17f8b2fd14bbca2c2a93cce3573b1c09a78b5f4f2e0da4ded
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e450cb9c3dd5d5e299310845f6d7ab8731c0d1d1d427cd56769b403ce862ce4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2193380bd6253a6f966c23f88da8618a1e1fec5e54a292b0a7960495f5972fa
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/hyuna", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.4857912063598633,
3
- "best_model_checkpoint": "output/hyuna/checkpoint-58",
4
- "epoch": 1.0,
5
- "global_step": 58,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -80,11 +80,325 @@
80
  "eval_samples_per_second": 75.519,
81
  "eval_steps_per_second": 10.069,
82
  "step": 58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  }
84
  ],
85
- "max_steps": 58,
86
- "num_train_epochs": 1,
87
- "total_flos": 60489105408000.0,
88
  "trial_name": null,
89
  "trial_params": null
90
  }
 
1
  {
2
+ "best_metric": 1.948645830154419,
3
+ "best_model_checkpoint": "output/hyuna/checkpoint-290",
4
+ "epoch": 5.0,
5
+ "global_step": 290,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
80
  "eval_samples_per_second": 75.519,
81
  "eval_steps_per_second": 10.069,
82
  "step": 58
83
+ },
84
+ {
85
+ "epoch": 1.03,
86
+ "learning_rate": 4.0213613921093164e-07,
87
+ "loss": 2.3666,
88
+ "step": 60
89
+ },
90
+ {
91
+ "epoch": 1.12,
92
+ "learning_rate": 4.8721970205680935e-06,
93
+ "loss": 2.4303,
94
+ "step": 65
95
+ },
96
+ {
97
+ "epoch": 1.21,
98
+ "learning_rate": 1.3988015692592823e-05,
99
+ "loss": 2.3856,
100
+ "step": 70
101
+ },
102
+ {
103
+ "epoch": 1.29,
104
+ "learning_rate": 2.708504883770769e-05,
105
+ "loss": 2.1811,
106
+ "step": 75
107
+ },
108
+ {
109
+ "epoch": 1.38,
110
+ "learning_rate": 4.320852254368187e-05,
111
+ "loss": 2.4049,
112
+ "step": 80
113
+ },
114
+ {
115
+ "epoch": 1.47,
116
+ "learning_rate": 6.118303533611755e-05,
117
+ "loss": 2.2865,
118
+ "step": 85
119
+ },
120
+ {
121
+ "epoch": 1.55,
122
+ "learning_rate": 7.969824496351964e-05,
123
+ "loss": 2.1265,
124
+ "step": 90
125
+ },
126
+ {
127
+ "epoch": 1.64,
128
+ "learning_rate": 9.740439236703416e-05,
129
+ "loss": 2.1689,
130
+ "step": 95
131
+ },
132
+ {
133
+ "epoch": 1.72,
134
+ "learning_rate": 0.00011301069913603334,
135
+ "loss": 2.4769,
136
+ "step": 100
137
+ },
138
+ {
139
+ "epoch": 1.81,
140
+ "learning_rate": 0.00012537946527356269,
141
+ "loss": 2.282,
142
+ "step": 105
143
+ },
144
+ {
145
+ "epoch": 1.9,
146
+ "learning_rate": 0.00013360900754314024,
147
+ "loss": 2.2972,
148
+ "step": 110
149
+ },
150
+ {
151
+ "epoch": 1.98,
152
+ "learning_rate": 0.0001370993921901871,
153
+ "loss": 2.3702,
154
+ "step": 115
155
+ },
156
+ {
157
+ "epoch": 2.0,
158
+ "eval_loss": 2.0102086067199707,
159
+ "eval_runtime": 1.0576,
160
+ "eval_samples_per_second": 75.643,
161
+ "eval_steps_per_second": 9.455,
162
+ "step": 116
163
+ },
164
+ {
165
+ "epoch": 2.07,
166
+ "learning_rate": 0.00013559617012171197,
167
+ "loss": 2.0597,
168
+ "step": 120
169
+ },
170
+ {
171
+ "epoch": 2.16,
172
+ "learning_rate": 0.00012920892624899717,
173
+ "loss": 2.1937,
174
+ "step": 125
175
+ },
176
+ {
177
+ "epoch": 2.24,
178
+ "learning_rate": 0.00011840329074592684,
179
+ "loss": 2.1953,
180
+ "step": 130
181
+ },
182
+ {
183
+ "epoch": 2.33,
184
+ "learning_rate": 0.00010396699460234374,
185
+ "loss": 2.0115,
186
+ "step": 135
187
+ },
188
+ {
189
+ "epoch": 2.41,
190
+ "learning_rate": 8.69524440231046e-05,
191
+ "loss": 2.0135,
192
+ "step": 140
193
+ },
194
+ {
195
+ "epoch": 2.5,
196
+ "learning_rate": 6.860000000000001e-05,
197
+ "loss": 1.9796,
198
+ "step": 145
199
+ },
200
+ {
201
+ "epoch": 2.59,
202
+ "learning_rate": 5.024755597689551e-05,
203
+ "loss": 2.0685,
204
+ "step": 150
205
+ },
206
+ {
207
+ "epoch": 2.67,
208
+ "learning_rate": 3.3233005397656285e-05,
209
+ "loss": 2.075,
210
+ "step": 155
211
+ },
212
+ {
213
+ "epoch": 2.76,
214
+ "learning_rate": 1.8796709254073232e-05,
215
+ "loss": 2.0578,
216
+ "step": 160
217
+ },
218
+ {
219
+ "epoch": 2.84,
220
+ "learning_rate": 7.99107375100285e-06,
221
+ "loss": 1.9644,
222
+ "step": 165
223
+ },
224
+ {
225
+ "epoch": 2.93,
226
+ "learning_rate": 1.6038298782880706e-06,
227
+ "loss": 2.0836,
228
+ "step": 170
229
+ },
230
+ {
231
+ "epoch": 3.0,
232
+ "eval_loss": 1.9711263179779053,
233
+ "eval_runtime": 1.056,
234
+ "eval_samples_per_second": 75.756,
235
+ "eval_steps_per_second": 9.469,
236
+ "step": 174
237
+ },
238
+ {
239
+ "epoch": 3.02,
240
+ "learning_rate": 1.0060780981290602e-07,
241
+ "loss": 2.0553,
242
+ "step": 175
243
+ },
244
+ {
245
+ "epoch": 3.1,
246
+ "learning_rate": 3.5909924568597365e-06,
247
+ "loss": 1.9319,
248
+ "step": 180
249
+ },
250
+ {
251
+ "epoch": 3.19,
252
+ "learning_rate": 1.182053472643733e-05,
253
+ "loss": 2.0587,
254
+ "step": 185
255
+ },
256
+ {
257
+ "epoch": 3.28,
258
+ "learning_rate": 2.418930086396662e-05,
259
+ "loss": 2.1358,
260
+ "step": 190
261
+ },
262
+ {
263
+ "epoch": 3.36,
264
+ "learning_rate": 3.9795607632965815e-05,
265
+ "loss": 1.9452,
266
+ "step": 195
267
+ },
268
+ {
269
+ "epoch": 3.45,
270
+ "learning_rate": 5.750175503648027e-05,
271
+ "loss": 1.9591,
272
+ "step": 200
273
+ },
274
+ {
275
+ "epoch": 3.53,
276
+ "learning_rate": 7.601696466388229e-05,
277
+ "loss": 1.8235,
278
+ "step": 205
279
+ },
280
+ {
281
+ "epoch": 3.62,
282
+ "learning_rate": 9.39914774563181e-05,
283
+ "loss": 1.8749,
284
+ "step": 210
285
+ },
286
+ {
287
+ "epoch": 3.71,
288
+ "learning_rate": 0.00011011495116229225,
289
+ "loss": 2.0178,
290
+ "step": 215
291
+ },
292
+ {
293
+ "epoch": 3.79,
294
+ "learning_rate": 0.00012321198430740717,
295
+ "loss": 1.9038,
296
+ "step": 220
297
+ },
298
+ {
299
+ "epoch": 3.88,
300
+ "learning_rate": 0.0001323278029794319,
301
+ "loss": 2.1059,
302
+ "step": 225
303
+ },
304
+ {
305
+ "epoch": 3.97,
306
+ "learning_rate": 0.00013679786386078908,
307
+ "loss": 1.946,
308
+ "step": 230
309
+ },
310
+ {
311
+ "epoch": 4.0,
312
+ "eval_loss": 1.9748159646987915,
313
+ "eval_runtime": 1.0572,
314
+ "eval_samples_per_second": 75.673,
315
+ "eval_steps_per_second": 9.459,
316
+ "step": 232
317
+ },
318
+ {
319
+ "epoch": 4.05,
320
+ "learning_rate": 0.0001362962994463487,
321
+ "loss": 1.8662,
322
+ "step": 235
323
+ },
324
+ {
325
+ "epoch": 4.14,
326
+ "learning_rate": 0.00013085967378942767,
327
+ "loss": 1.9053,
328
+ "step": 240
329
+ },
330
+ {
331
+ "epoch": 4.22,
332
+ "learning_rate": 0.00012088431698175582,
333
+ "loss": 1.8573,
334
+ "step": 245
335
+ },
336
+ {
337
+ "epoch": 4.31,
338
+ "learning_rate": 0.00010709743268385941,
339
+ "loss": 1.9869,
340
+ "step": 250
341
+ },
342
+ {
343
+ "epoch": 4.4,
344
+ "learning_rate": 9.050408496732835e-05,
345
+ "loss": 1.7389,
346
+ "step": 255
347
+ },
348
+ {
349
+ "epoch": 4.48,
350
+ "learning_rate": 7.231392912895982e-05,
351
+ "loss": 2.0663,
352
+ "step": 260
353
+ },
354
+ {
355
+ "epoch": 4.57,
356
+ "learning_rate": 5.385302780152384e-05,
357
+ "loss": 1.8779,
358
+ "step": 265
359
+ },
360
+ {
361
+ "epoch": 4.66,
362
+ "learning_rate": 3.646718096799452e-05,
363
+ "loss": 1.7718,
364
+ "step": 270
365
+ },
366
+ {
367
+ "epoch": 4.74,
368
+ "learning_rate": 2.14238171226552e-05,
369
+ "loss": 1.7325,
370
+ "step": 275
371
+ },
372
+ {
373
+ "epoch": 4.83,
374
+ "learning_rate": 9.819597714903422e-06,
375
+ "loss": 1.7267,
376
+ "step": 280
377
+ },
378
+ {
379
+ "epoch": 4.91,
380
+ "learning_rate": 2.5004705131813084e-06,
381
+ "loss": 1.8411,
382
+ "step": 285
383
+ },
384
+ {
385
+ "epoch": 5.0,
386
+ "learning_rate": 0.0,
387
+ "loss": 1.6194,
388
+ "step": 290
389
+ },
390
+ {
391
+ "epoch": 5.0,
392
+ "eval_loss": 1.948645830154419,
393
+ "eval_runtime": 1.0612,
394
+ "eval_samples_per_second": 75.387,
395
+ "eval_steps_per_second": 9.423,
396
+ "step": 290
397
  }
398
  ],
399
+ "max_steps": 1218,
400
+ "num_train_epochs": 21,
401
+ "total_flos": 299832606720000.0,
402
  "trial_name": null,
403
  "trial_params": null
404
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fd667400197eea16b356e51e788c416df8d872cfb807cbde9f2de566162e9e1
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24bd115b15d961e04c04be0e6fe76f00bfddd0ce2e3a83546da90037c6341b8
3
  size 2863