hugodk-sch commited on
Commit
855b604
1 Parent(s): 4cf5199

Training in progress, step 600

Browse files
README.md CHANGED
@@ -20,15 +20,15 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [data/ap-gpt-j-6b-sft-qlora-04-08](https://huggingface.co/data/ap-gpt-j-6b-sft-qlora-04-08) on the hugodk-sch/aftonposten_title_prefs dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.6751
24
- - Rewards/chosen: -0.1961
25
- - Rewards/rejected: -0.2675
26
- - Rewards/accuracies: 0.5918
27
- - Rewards/margins: 0.0713
28
- - Logps/rejected: -38.8539
29
- - Logps/chosen: -35.0152
30
- - Logits/rejected: -1.9911
31
- - Logits/chosen: -1.9956
32
 
33
  ## Model description
34
 
@@ -57,27 +57,15 @@ The following hyperparameters were used during training:
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
- - num_epochs: 4
61
 
62
  ### Training results
63
 
64
- | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
65
- |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
66
- | 0.6813 | 0.26 | 100 | -2.2319 | -2.2270 | -34.0295 | -37.5345 | 0.6915 | 0.5220 | 0.0010 | 0.0046 | -0.0036 |
67
- | 0.6632 | 0.52 | 200 | -2.2288 | -2.2240 | -34.0514 | -37.5852 | 0.6888 | 0.5660 | -0.0034 | 0.0103 | -0.0137 |
68
- | 0.6327 | 0.78 | 300 | -2.2262 | -2.2214 | -34.0940 | -37.6097 | 0.6909 | 0.4950 | -0.0119 | 0.0067 | -0.0186 |
69
- | 0.581 | 1.04 | 400 | 0.6878 | -0.0220 | -0.0363 | 0.5714 | 0.0143 | -37.6981 | -34.1446 | -2.1968 | -2.2017 |
70
- | 0.5252 | 1.3 | 500 | 0.6833 | -0.0468 | -0.0746 | 0.5801 | 0.0277 | -37.8894 | -34.2686 | -2.1386 | -2.1434 |
71
- | 0.5389 | 1.56 | 600 | 0.6771 | -0.0859 | -0.1320 | 0.5714 | 0.0462 | -38.1768 | -34.4638 | -2.0901 | -2.0949 |
72
- | 0.5239 | 1.82 | 700 | 0.6812 | -0.1285 | -0.1728 | 0.5627 | 0.0443 | -38.3806 | -34.6768 | -2.0609 | -2.0656 |
73
- | 0.4527 | 2.08 | 800 | 0.6754 | -0.1347 | -0.1932 | 0.5627 | 0.0585 | -38.4827 | -34.7079 | -2.0466 | -2.0512 |
74
- | 0.4042 | 2.34 | 900 | 0.6782 | -0.1674 | -0.2278 | 0.5656 | 0.0604 | -38.6554 | -34.8714 | -2.0180 | -2.0226 |
75
- | 0.4706 | 2.6 | 1000 | 0.6768 | -0.1887 | -0.2552 | 0.5772 | 0.0664 | -38.7926 | -34.9783 | -1.9986 | -2.0031 |
76
- | 0.4851 | 2.86 | 1100 | 0.6753 | -0.1952 | -0.2659 | 0.5772 | 0.0707 | -38.8462 | -35.0107 | -1.9926 | -1.9972 |
77
- | 0.4079 | 3.12 | 1200 | 0.6757 | -0.1976 | -0.2675 | 0.5685 | 0.0699 | -38.8539 | -35.0224 | -1.9914 | -1.9960 |
78
- | 0.3644 | 3.38 | 1300 | 0.6762 | -0.1974 | -0.2668 | 0.5714 | 0.0694 | -38.8508 | -35.0218 | -1.9907 | -1.9952 |
79
- | 0.4147 | 3.64 | 1400 | 0.6765 | -0.1982 | -0.2664 | 0.5831 | 0.0682 | -38.8488 | -35.0255 | -1.9909 | -1.9955 |
80
- | 0.4021 | 3.9 | 1500 | 0.6767 | -0.1985 | -0.2668 | 0.5714 | 0.0683 | -38.8505 | -35.0269 | -1.9910 | -1.9955 |
81
 
82
 
83
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [data/ap-gpt-j-6b-sft-qlora-04-08](https://huggingface.co/data/ap-gpt-j-6b-sft-qlora-04-08) on the hugodk-sch/aftonposten_title_prefs dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4975
24
+ - Rewards/chosen: 0.0550
25
+ - Rewards/rejected: 0.0448
26
+ - Rewards/accuracies: 0.5395
27
+ - Rewards/margins: 0.0102
28
+ - Logps/rejected: -37.2926
29
+ - Logps/chosen: -33.7597
30
+ - Logits/rejected: -2.2271
31
+ - Logits/chosen: -2.2319
32
 
33
  ## Model description
34
 
 
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
 
62
  ### Training results
63
 
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.4947 | 0.26 | 100 | 0.4994 | 0.0239 | 0.0208 | 0.5216 | 0.0031 | -37.4126 | -33.9151 | -2.2299 | -2.2348 |
67
+ | 0.4825 | 0.52 | 200 | 0.4974 | 0.0511 | 0.0397 | 0.5544 | 0.0113 | -37.3179 | -33.7792 | -2.2274 | -2.2322 |
68
+ | 0.4669 | 0.78 | 300 | 0.4980 | 0.0533 | 0.0449 | 0.5158 | 0.0084 | -37.2920 | -33.7681 | -2.2264 | -2.2312 |
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "up_proj",
23
- "k_proj",
24
  "q_proj",
25
- "down_proj",
26
  "gate_proj",
27
- "o_proj",
28
- "v_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "v_proj",
23
+ "o_proj",
24
  "q_proj",
 
25
  "gate_proj",
26
+ "down_proj",
27
+ "k_proj",
28
+ "up_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2e2d3dde6ac3327fece9a3798ca418892e5ae3ef423d9857a354a7c84510e1f
3
  size 176183216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb2dca7ba6f44037dd1a890cd8cebfa6c5ba50248941ac541cb41e6628ea1cd
3
  size 176183216
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -1.9956191778182983,
4
- "eval_logits/rejected": -1.991063117980957,
5
- "eval_logps/chosen": -35.015167236328125,
6
- "eval_logps/rejected": -38.8538703918457,
7
- "eval_loss": 0.6751248240470886,
8
- "eval_rewards/accuracies": 0.5917773842811584,
9
- "eval_rewards/chosen": -0.19612376391887665,
10
- "eval_rewards/margins": 0.07132655382156372,
11
- "eval_rewards/rejected": -0.26745033264160156,
12
- "eval_runtime": 145.7429,
13
  "eval_samples": 343,
14
- "eval_samples_per_second": 2.353,
15
  "eval_steps_per_second": 0.295,
16
- "train_loss": 0.3823247471413055,
17
- "train_runtime": 10805.5642,
18
  "train_samples": 3079,
19
- "train_samples_per_second": 1.14,
20
- "train_steps_per_second": 0.143
21
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.2319114208221436,
4
+ "eval_logits/rejected": -2.2271130084991455,
5
+ "eval_logps/chosen": -33.75971984863281,
6
+ "eval_logps/rejected": -37.29259490966797,
7
+ "eval_loss": 0.49754369258880615,
8
+ "eval_rewards/accuracies": 0.5394518375396729,
9
+ "eval_rewards/chosen": 0.05496572330594063,
10
+ "eval_rewards/margins": 0.010160263627767563,
11
+ "eval_rewards/rejected": 0.044805459678173065,
12
+ "eval_runtime": 145.6941,
13
  "eval_samples": 343,
14
+ "eval_samples_per_second": 2.354,
15
  "eval_steps_per_second": 0.295,
16
+ "train_loss": 0.4876757522682091,
17
+ "train_runtime": 3250.1859,
18
  "train_samples": 3079,
19
+ "train_samples_per_second": 0.947,
20
+ "train_steps_per_second": 0.118
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -1.9956191778182983,
4
- "eval_logits/rejected": -1.991063117980957,
5
- "eval_logps/chosen": -35.015167236328125,
6
- "eval_logps/rejected": -38.8538703918457,
7
- "eval_loss": 0.6751248240470886,
8
- "eval_rewards/accuracies": 0.5917773842811584,
9
- "eval_rewards/chosen": -0.19612376391887665,
10
- "eval_rewards/margins": 0.07132655382156372,
11
- "eval_rewards/rejected": -0.26745033264160156,
12
- "eval_runtime": 145.7429,
13
  "eval_samples": 343,
14
- "eval_samples_per_second": 2.353,
15
  "eval_steps_per_second": 0.295
16
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.2319114208221436,
4
+ "eval_logits/rejected": -2.2271130084991455,
5
+ "eval_logps/chosen": -33.75971984863281,
6
+ "eval_logps/rejected": -37.29259490966797,
7
+ "eval_loss": 0.49754369258880615,
8
+ "eval_rewards/accuracies": 0.5394518375396729,
9
+ "eval_rewards/chosen": 0.05496572330594063,
10
+ "eval_rewards/margins": 0.010160263627767563,
11
+ "eval_rewards/rejected": 0.044805459678173065,
12
+ "eval_runtime": 145.6941,
13
  "eval_samples": 343,
14
+ "eval_samples_per_second": 2.354,
15
  "eval_steps_per_second": 0.295
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "train_loss": 0.3823247471413055,
4
- "train_runtime": 10805.5642,
5
  "train_samples": 3079,
6
- "train_samples_per_second": 1.14,
7
- "train_steps_per_second": 0.143
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.4876757522682091,
4
+ "train_runtime": 3250.1859,
5
  "train_samples": 3079,
6
+ "train_samples_per_second": 0.947,
7
+ "train_steps_per_second": 0.118
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c288dcbf28ede7cf49e7ecb2979521d825f0f03f89c2659f96cf853b83096653
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c326ac89e4c50e7bc12176a6c925130ebcfcfb2364bc0120a8ef9c18dc894821
3
  size 4984