narekvslife commited on
Commit
e4b57a5
1 Parent(s): beed70f

dpo_p69wqnv2

Browse files
README.md CHANGED
@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model was trained from scratch on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.7471
21
- - Rewards/chosen: 1.4869
22
- - Rewards/rejected: 1.1641
23
- - Rewards/accuracies: 0.4383
24
- - Rewards/margins: 0.3229
25
- - Logps/rejected: -140.3850
26
- - Logps/chosen: -153.8973
27
- - Logits/rejected: -0.6448
28
- - Logits/chosen: -0.6851
29
 
30
  ## Model description
31
 
@@ -44,7 +44,7 @@ More information needed
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
- - learning_rate: 0.0001
48
  - train_batch_size: 1
49
  - eval_batch_size: 1
50
  - seed: 0
@@ -53,16 +53,21 @@ The following hyperparameters were used during training:
53
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
  - lr_scheduler_type: cosine
55
  - lr_scheduler_warmup_steps: 100
56
- - training_steps: 2000
57
 
58
  ### Training results
59
 
 
 
 
 
 
60
 
61
 
62
  ### Framework versions
63
 
64
  - PEFT 0.11.1
65
- - Transformers 4.40.2
66
  - Pytorch 2.3.0+cu121
67
  - Datasets 2.19.1
68
  - Tokenizers 0.19.1
 
17
 
18
  This model was trained from scratch on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6845
21
+ - Rewards/chosen: 0.3768
22
+ - Rewards/rejected: 0.3499
23
+ - Rewards/accuracies: 0.5373
24
+ - Rewards/margins: 0.0269
25
+ - Logps/rejected: -233.9739
26
+ - Logps/chosen: -254.7565
27
+ - Logits/rejected: -0.6114
28
+ - Logits/chosen: -0.7300
29
 
30
  ## Model description
31
 
 
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
+ - learning_rate: 5e-06
48
  - train_batch_size: 1
49
  - eval_batch_size: 1
50
  - seed: 0
 
53
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
  - lr_scheduler_type: cosine
55
  - lr_scheduler_warmup_steps: 100
56
+ - training_steps: 2500
57
 
58
  ### Training results
59
 
60
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | 0.6689 | 0.0567 | 750 | 0.6852 | 0.2546 | 0.2335 | 0.5373 | 0.0211 | -235.1381 | -255.9783 | -0.6108 | -0.7325 |
63
+ | 0.6831 | 0.1133 | 1500 | 0.6835 | 0.3555 | 0.3270 | 0.5597 | 0.0285 | -234.2029 | -254.9690 | -0.6135 | -0.7317 |
64
+ | 0.6821 | 0.1700 | 2250 | 0.6855 | 0.3655 | 0.3411 | 0.5485 | 0.0243 | -234.0616 | -254.8697 | -0.6115 | -0.7293 |
65
 
66
 
67
  ### Framework versions
68
 
69
  - PEFT 0.11.1
70
+ - Transformers 4.41.1
71
  - Pytorch 2.3.0+cu121
72
  - Datasets 2.19.1
73
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "fc_out",
24
  "k_proj",
25
  "fc_in",
 
26
  "out_proj",
27
- "q_proj",
28
- "v_proj",
29
- "wte"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
  "k_proj",
25
  "fc_in",
26
+ "wte",
27
  "out_proj",
28
+ "fc_out",
29
+ "q_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3336b30496cedb477a68def19166b41054123f3cbd0f48e23a18bc33dafcf79
3
  size 37774720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6827823ede9b048120358d474d275c2cce3a04d7392bb4042d9b98dfedf7e326
3
  size 37774720
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e8d935741839e6606b91e10fcc75f5571638a97b5871cec541b7b522e6905d6
3
- size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:902272a107f94aafbb28a7b6e03093f5405a1272252662eef7575a3a9714afd7
3
+ size 5688