hugodk-sch commited on
Commit
e9b2829
1 Parent(s): 3d03a38

Training in progress, step 800

Browse files
README.md CHANGED
@@ -20,15 +20,15 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [data/ap-gpt-j-6b-sft-qlora-04-08](https://huggingface.co/data/ap-gpt-j-6b-sft-qlora-04-08) on the hugodk-sch/aftonposten_title_prefs dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.6833
24
- - Rewards/chosen: -0.2814
25
- - Rewards/rejected: -0.4540
26
- - Rewards/accuracies: 0.6009
27
- - Rewards/margins: 0.1726
28
- - Logps/rejected: -38.0842
29
- - Logps/chosen: -34.3863
30
- - Logits/rejected: -2.1265
31
- - Logits/chosen: -2.1312
32
 
33
  ## Model description
34
 
@@ -57,27 +57,15 @@ The following hyperparameters were used during training:
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
- - num_epochs: 4
61
 
62
  ### Training results
63
 
64
- | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
65
- |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
66
- | 0.6538 | 0.26 | 100 | -2.2339 | -2.2290 | -34.0302 | -37.5273 | 0.6955 | 0.5108 | 0.0035 | 0.0120 | -0.0085 |
67
- | 0.6015 | 0.52 | 200 | -2.2322 | -2.2274 | -34.0607 | -37.5657 | 0.6956 | 0.5249 | -0.0209 | 0.0183 | -0.0393 |
68
- | 0.5385 | 0.78 | 300 | -2.2294 | -2.2246 | -34.0909 | -37.5973 | 0.6957 | 0.5399 | -0.0451 | 0.0194 | -0.0645 |
69
- | 0.4017 | 1.04 | 400 | 0.6775 | -0.0241 | -0.0945 | 0.6005 | 0.0703 | -37.6347 | -34.0647 | -2.2052 | -2.2100 |
70
- | 0.3216 | 1.3 | 500 | 0.6820 | -0.1165 | -0.2131 | 0.5768 | 0.0966 | -37.7830 | -34.1802 | -2.1659 | -2.1707 |
71
- | 0.336 | 1.56 | 600 | 0.6618 | -0.1839 | -0.3388 | 0.6242 | 0.1549 | -37.9401 | -34.2644 | -2.1554 | -2.1602 |
72
- | 0.3559 | 1.82 | 700 | 0.6947 | -0.2571 | -0.3713 | 0.5341 | 0.1141 | -37.9807 | -34.3560 | -2.1535 | -2.1583 |
73
- | 0.1978 | 2.08 | 800 | 0.6838 | -0.2324 | -0.3669 | 0.5835 | 0.1345 | -37.9753 | -34.3250 | -2.1501 | -2.1549 |
74
- | 0.1619 | 2.34 | 900 | 0.6788 | -0.2463 | -0.4156 | 0.5860 | 0.1693 | -38.0361 | -34.3424 | -2.1384 | -2.1431 |
75
- | 0.209 | 2.6 | 1000 | 0.6777 | -0.2767 | -0.4535 | 0.5918 | 0.1767 | -38.0835 | -34.3805 | -2.1309 | -2.1357 |
76
- | 0.2513 | 2.86 | 1100 | 0.6897 | -0.2986 | -0.4591 | 0.5831 | 0.1605 | -38.0905 | -34.4077 | -2.1270 | -2.1317 |
77
- | 0.1713 | 3.12 | 1200 | 0.6780 | -0.2775 | -0.4614 | 0.5947 | 0.1839 | -38.0934 | -34.3814 | -2.1270 | -2.1317 |
78
- | 0.1199 | 3.38 | 1300 | 0.6740 | -0.2726 | -0.4645 | 0.5980 | 0.1919 | -38.0972 | -34.3753 | -2.1269 | -2.1317 |
79
- | 0.1578 | 3.64 | 1400 | 0.6839 | -0.2867 | -0.4600 | 0.5860 | 0.1734 | -38.0917 | -34.3929 | -2.1266 | -2.1314 |
80
- | 0.1614 | 3.9 | 1500 | 0.6820 | -0.2813 | -0.4578 | 0.5743 | 0.1765 | -38.0889 | -34.3862 | -2.1265 | -2.1312 |
81
 
82
 
83
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [data/ap-gpt-j-6b-sft-qlora-04-08](https://huggingface.co/data/ap-gpt-j-6b-sft-qlora-04-08) on the hugodk-sch/aftonposten_title_prefs dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4934
24
+ - Rewards/chosen: 0.2139
25
+ - Rewards/rejected: 0.1872
26
+ - Rewards/accuracies: 0.5457
27
+ - Rewards/margins: 0.0267
28
+ - Logps/rejected: -37.2826
29
+ - Logps/chosen: -33.7672
30
+ - Logits/rejected: -2.2262
31
+ - Logits/chosen: -2.2310
32
 
33
  ## Model description
34
 
 
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
 
62
  ### Training results
63
 
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.4749 | 0.26 | 100 | 0.4963 | 0.1467 | 0.1303 | 0.5336 | 0.0164 | -37.3537 | -33.8512 | -2.2327 | -2.2375 |
67
+ | 0.4376 | 0.52 | 200 | 0.4956 | 0.1959 | 0.1769 | 0.5486 | 0.0191 | -37.2955 | -33.7896 | -2.2291 | -2.2339 |
68
+ | 0.3835 | 0.78 | 300 | 0.4950 | 0.2045 | 0.1836 | 0.5245 | 0.0210 | -37.2872 | -33.7789 | -2.2264 | -2.2312 |
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
22
  "up_proj",
23
- "q_proj",
24
- "o_proj",
25
  "gate_proj",
 
 
26
  "v_proj",
27
- "k_proj",
28
- "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "k_proj",
23
  "up_proj",
 
 
24
  "gate_proj",
25
+ "q_proj",
26
+ "down_proj",
27
  "v_proj",
28
+ "o_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e88918600a30b4ce059435ac14ae95a63b5112549f43a93d66811978ceeb9d6
3
  size 176183216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b0dc0544e8b7068df99b78d63614f09e83f94fe6940cae744471670fee6b52
3
  size 176183216
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -2.1312131881713867,
4
- "eval_logits/rejected": -2.1264569759368896,
5
- "eval_logps/chosen": -34.38629150390625,
6
- "eval_logps/rejected": -38.08415985107422,
7
- "eval_loss": 0.683270275592804,
8
- "eval_rewards/accuracies": 0.6009136438369751,
9
- "eval_rewards/chosen": -0.2813924252986908,
10
- "eval_rewards/margins": 0.1726410835981369,
11
- "eval_rewards/rejected": -0.4540335237979889,
12
- "eval_runtime": 145.7759,
13
  "eval_samples": 343,
14
- "eval_samples_per_second": 2.353,
15
  "eval_steps_per_second": 0.295,
16
- "train_loss": 0.20605388523696305,
17
- "train_runtime": 10804.1609,
18
  "train_samples": 3079,
19
- "train_samples_per_second": 1.14,
20
- "train_steps_per_second": 0.143
21
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.2309982776641846,
4
+ "eval_logits/rejected": -2.2261931896209717,
5
+ "eval_logps/chosen": -33.767173767089844,
6
+ "eval_logps/rejected": -37.282623291015625,
7
+ "eval_loss": 0.49339157342910767,
8
+ "eval_rewards/accuracies": 0.5456810593605042,
9
+ "eval_rewards/chosen": 0.21390400826931,
10
+ "eval_rewards/margins": 0.026708098128437996,
11
+ "eval_rewards/rejected": 0.18719588220119476,
12
+ "eval_runtime": 145.5786,
13
  "eval_samples": 343,
14
+ "eval_samples_per_second": 2.356,
15
  "eval_steps_per_second": 0.295,
16
+ "train_loss": 0.4538224170734356,
17
+ "train_runtime": 3252.427,
18
  "train_samples": 3079,
19
+ "train_samples_per_second": 0.947,
20
+ "train_steps_per_second": 0.118
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -2.1312131881713867,
4
- "eval_logits/rejected": -2.1264569759368896,
5
- "eval_logps/chosen": -34.38629150390625,
6
- "eval_logps/rejected": -38.08415985107422,
7
- "eval_loss": 0.683270275592804,
8
- "eval_rewards/accuracies": 0.6009136438369751,
9
- "eval_rewards/chosen": -0.2813924252986908,
10
- "eval_rewards/margins": 0.1726410835981369,
11
- "eval_rewards/rejected": -0.4540335237979889,
12
- "eval_runtime": 145.7759,
13
  "eval_samples": 343,
14
- "eval_samples_per_second": 2.353,
15
  "eval_steps_per_second": 0.295
16
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.2309982776641846,
4
+ "eval_logits/rejected": -2.2261931896209717,
5
+ "eval_logps/chosen": -33.767173767089844,
6
+ "eval_logps/rejected": -37.282623291015625,
7
+ "eval_loss": 0.49339157342910767,
8
+ "eval_rewards/accuracies": 0.5456810593605042,
9
+ "eval_rewards/chosen": 0.21390400826931,
10
+ "eval_rewards/margins": 0.026708098128437996,
11
+ "eval_rewards/rejected": 0.18719588220119476,
12
+ "eval_runtime": 145.5786,
13
  "eval_samples": 343,
14
+ "eval_samples_per_second": 2.356,
15
  "eval_steps_per_second": 0.295
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "train_loss": 0.20605388523696305,
4
- "train_runtime": 10804.1609,
5
  "train_samples": 3079,
6
- "train_samples_per_second": 1.14,
7
- "train_steps_per_second": 0.143
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.4538224170734356,
4
+ "train_runtime": 3252.427,
5
  "train_samples": 3079,
6
+ "train_samples_per_second": 0.947,
7
+ "train_steps_per_second": 0.118
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ced003eefc2ef53b0f6fc7a5d0f6a8ced40ea676a70d9c847ab2504a361738f
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3419ace4881b5957bfb88592b26f574ecaf7267a04c585c6e35c0710ee18e507
3
  size 4984