hugodk-sch commited on
Commit
9387051
1 Parent(s): fe2e595

Training in progress, step 700

Browse files
README.md CHANGED
@@ -20,15 +20,15 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [data/ap-gpt-j-6b-sft-qlora-04-08](https://huggingface.co/data/ap-gpt-j-6b-sft-qlora-04-08) on the hugodk-sch/aftonposten_title_prefs dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.9586
24
- - Rewards/chosen: -0.1568
25
- - Rewards/rejected: -0.1982
26
- - Rewards/accuracies: 0.5743
27
- - Rewards/margins: 0.0414
28
- - Logps/rejected: -39.4984
29
- - Logps/chosen: -35.6023
30
- - Logits/rejected: -1.9133
31
- - Logits/chosen: -1.9178
32
 
33
  ## Model description
34
 
@@ -57,27 +57,15 @@ The following hyperparameters were used during training:
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
- - num_epochs: 4
61
 
62
  ### Training results
63
 
64
- | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
65
- |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
66
- | 0.9851 | 0.26 | 100 | -2.2305 | -2.2257 | -34.0218 | -37.5310 | 0.9973 | 0.5311 | 0.0013 | 0.0027 | -0.0014 |
67
- | 0.9596 | 0.52 | 200 | -2.2285 | -2.2237 | -34.0069 | -37.5450 | 0.9944 | 0.5336 | 0.0028 | 0.0056 | -0.0028 |
68
- | 0.9305 | 0.78 | 300 | -2.2243 | -2.2195 | -34.0508 | -37.5717 | 0.9961 | 0.5594 | -0.0016 | 0.0039 | -0.0055 |
69
- | 0.863 | 1.04 | 400 | 0.9918 | -0.0077 | -0.0159 | 0.5710 | 0.0081 | -37.6755 | -34.1120 | -2.1910 | -2.1958 |
70
- | 0.7835 | 1.3 | 500 | 0.9853 | -0.0258 | -0.0405 | 0.5772 | 0.0146 | -37.9211 | -34.2926 | -2.1283 | -2.1330 |
71
- | 0.7983 | 1.56 | 600 | 0.9758 | -0.0570 | -0.0811 | 0.5860 | 0.0241 | -38.3277 | -34.6044 | -2.0577 | -2.0623 |
72
- | 0.7488 | 1.82 | 700 | 0.9750 | -0.0958 | -0.1207 | 0.5714 | 0.0249 | -38.7239 | -34.9924 | -2.0056 | -2.0102 |
73
- | 0.6596 | 2.08 | 800 | 0.9685 | -0.1098 | -0.1413 | 0.5569 | 0.0315 | -38.9295 | -35.1326 | -1.9804 | -1.9850 |
74
- | 0.5721 | 2.34 | 900 | 0.9644 | -0.1350 | -0.1706 | 0.5598 | 0.0355 | -39.2223 | -35.3850 | -1.9434 | -1.9479 |
75
- | 0.6999 | 2.6 | 1000 | 0.9601 | -0.1501 | -0.1899 | 0.5627 | 0.0398 | -39.4161 | -35.5356 | -1.9241 | -1.9286 |
76
- | 0.724 | 2.86 | 1100 | 0.9595 | -0.1555 | -0.1960 | 0.5743 | 0.0405 | -39.4767 | -35.5896 | -1.9149 | -1.9194 |
77
- | 0.5713 | 3.12 | 1200 | 0.9584 | -0.1563 | -0.1978 | 0.5714 | 0.0415 | -39.4951 | -35.5976 | -1.9141 | -1.9186 |
78
- | 0.5014 | 3.38 | 1300 | 0.9597 | -0.1576 | -0.1978 | 0.5656 | 0.0402 | -39.4945 | -35.6101 | -1.9130 | -1.9174 |
79
- | 0.5933 | 3.64 | 1400 | 0.9603 | -0.1569 | -0.1966 | 0.5627 | 0.0397 | -39.4824 | -35.6038 | -1.9143 | -1.9187 |
80
- | 0.5663 | 3.9 | 1500 | 0.9575 | -0.1550 | -0.1975 | 0.5743 | 0.0425 | -39.4918 | -35.5847 | -1.9134 | -1.9178 |
81
 
82
 
83
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [data/ap-gpt-j-6b-sft-qlora-04-08](https://huggingface.co/data/ap-gpt-j-6b-sft-qlora-04-08) on the hugodk-sch/aftonposten_title_prefs dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.6907
24
+ - Rewards/chosen: -0.0131
25
+ - Rewards/rejected: -0.0201
26
+ - Rewards/accuracies: 0.5133
27
+ - Rewards/margins: 0.0070
28
+ - Logps/rejected: -37.6172
29
+ - Logps/chosen: -34.1001
30
+ - Logits/rejected: -2.2213
31
+ - Logits/chosen: -2.2261
32
 
33
  ## Model description
34
 
 
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
 
62
  ### Training results
63
 
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.6813 | 0.26 | 100 | 0.6915 | 0.0010 | -0.0036 | 0.5220 | 0.0046 | -37.5345 | -34.0295 | -2.2270 | -2.2319 |
67
+ | 0.6632 | 0.52 | 200 | 0.6888 | -0.0034 | -0.0137 | 0.5660 | 0.0103 | -37.5852 | -34.0514 | -2.2240 | -2.2288 |
68
+ | 0.6327 | 0.78 | 300 | 0.6909 | -0.0119 | -0.0186 | 0.4950 | 0.0067 | -37.6097 | -34.0940 | -2.2214 | -2.2262 |
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  ### Framework versions
adapter_config.json CHANGED
@@ -21,10 +21,10 @@
21
  "target_modules": [
22
  "up_proj",
23
  "k_proj",
 
24
  "down_proj",
25
- "o_proj",
26
  "gate_proj",
27
- "q_proj",
28
  "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
 
21
  "target_modules": [
22
  "up_proj",
23
  "k_proj",
24
+ "q_proj",
25
  "down_proj",
 
26
  "gate_proj",
27
+ "o_proj",
28
  "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6782fc53cd434a725a53ef2fdc13f188d8f9ebe43a9c2192412a9761fc58483
3
  size 176183216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a0cb6c14f8b02493cfc5b5eef1160c0acff99f0ac39f31fc89a2a6c96e329a
3
  size 176183216
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -1.9177559614181519,
4
- "eval_logits/rejected": -1.913318157196045,
5
- "eval_logps/chosen": -35.60228729248047,
6
- "eval_logps/rejected": -39.49840545654297,
7
- "eval_loss": 0.958584725856781,
8
- "eval_rewards/accuracies": 0.574335515499115,
9
- "eval_rewards/chosen": -0.1567731499671936,
10
- "eval_rewards/margins": 0.041405245661735535,
11
- "eval_rewards/rejected": -0.19817839562892914,
12
- "eval_runtime": 145.6765,
13
  "eval_samples": 343,
14
- "eval_samples_per_second": 2.355,
15
- "eval_steps_per_second": 0.295,
16
- "train_loss": 0.553979323127053,
17
- "train_runtime": 10795.8022,
18
  "train_samples": 3079,
19
- "train_samples_per_second": 1.141,
20
- "train_steps_per_second": 0.143
21
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.2261412143707275,
4
+ "eval_logits/rejected": -2.2213146686553955,
5
+ "eval_logps/chosen": -34.100101470947266,
6
+ "eval_logps/rejected": -37.61717224121094,
7
+ "eval_loss": 0.6907125115394592,
8
+ "eval_rewards/accuracies": 0.5132890343666077,
9
+ "eval_rewards/chosen": -0.013110256753861904,
10
+ "eval_rewards/margins": 0.007000547368079424,
11
+ "eval_rewards/rejected": -0.020110804587602615,
12
+ "eval_runtime": 145.4751,
13
  "eval_samples": 343,
14
+ "eval_samples_per_second": 2.358,
15
+ "eval_steps_per_second": 0.296,
16
+ "train_loss": 0.6689951481757226,
17
+ "train_runtime": 3249.3574,
18
  "train_samples": 3079,
19
+ "train_samples_per_second": 0.948,
20
+ "train_steps_per_second": 0.118
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_logits/chosen": -1.9177559614181519,
4
- "eval_logits/rejected": -1.913318157196045,
5
- "eval_logps/chosen": -35.60228729248047,
6
- "eval_logps/rejected": -39.49840545654297,
7
- "eval_loss": 0.958584725856781,
8
- "eval_rewards/accuracies": 0.574335515499115,
9
- "eval_rewards/chosen": -0.1567731499671936,
10
- "eval_rewards/margins": 0.041405245661735535,
11
- "eval_rewards/rejected": -0.19817839562892914,
12
- "eval_runtime": 145.6765,
13
  "eval_samples": 343,
14
- "eval_samples_per_second": 2.355,
15
- "eval_steps_per_second": 0.295
16
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.2261412143707275,
4
+ "eval_logits/rejected": -2.2213146686553955,
5
+ "eval_logps/chosen": -34.100101470947266,
6
+ "eval_logps/rejected": -37.61717224121094,
7
+ "eval_loss": 0.6907125115394592,
8
+ "eval_rewards/accuracies": 0.5132890343666077,
9
+ "eval_rewards/chosen": -0.013110256753861904,
10
+ "eval_rewards/margins": 0.007000547368079424,
11
+ "eval_rewards/rejected": -0.020110804587602615,
12
+ "eval_runtime": 145.4751,
13
  "eval_samples": 343,
14
+ "eval_samples_per_second": 2.358,
15
+ "eval_steps_per_second": 0.296
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "train_loss": 0.553979323127053,
4
- "train_runtime": 10795.8022,
5
  "train_samples": 3079,
6
- "train_samples_per_second": 1.141,
7
- "train_steps_per_second": 0.143
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6689951481757226,
4
+ "train_runtime": 3249.3574,
5
  "train_samples": 3079,
6
+ "train_samples_per_second": 0.948,
7
+ "train_steps_per_second": 0.118
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92975a328b03a559447db779537fdb0f988160c889014a91c570a5de327474d4
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c288dcbf28ede7cf49e7ecb2979521d825f0f03f89c2659f96cf853b83096653
3
  size 4984