YYYYYYibo commited on
Commit
4738e8b
1 Parent(s): b03cb12

Model save

Browse files
Files changed (4) hide show
  1. README.md +3 -7
  2. all_results.json +5 -18
  3. train_results.json +5 -5
  4. trainer_state.json +14 -56
README.md CHANGED
@@ -2,14 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
- base_model: mistralai/Mistral-7B-v0.1
10
- datasets:
11
- - updated
12
- - original
13
  model-index:
14
  - name: zephyr-7b-dpo-qlora-min-pi-part-0
15
  results: []
@@ -20,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # zephyr-7b-dpo-qlora-min-pi-part-0
22
 
23
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the updated and the original datasets.
24
 
25
  ## Model description
26
 
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
+ - generated_from_trainer
8
+ base_model: alignment-handbook/zephyr-7b-sft-full
 
 
9
  model-index:
10
  - name: zephyr-7b-dpo-qlora-min-pi-part-0
11
  results: []
 
16
 
17
  # zephyr-7b-dpo-qlora-min-pi-part-0
18
 
19
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,21 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_logits/chosen": -2.498661994934082,
4
- "eval_logits/rejected": -2.4008209705352783,
5
- "eval_logps/chosen": -269.65570068359375,
6
- "eval_logps/rejected": -257.6377868652344,
7
- "eval_loss": 0.6512373089790344,
8
- "eval_rewards/accuracies": 0.6639999747276306,
9
- "eval_rewards/chosen": -0.06204665079712868,
10
- "eval_rewards/margins": 0.10243026912212372,
11
- "eval_rewards/rejected": -0.1644769161939621,
12
- "eval_runtime": 542.9483,
13
- "eval_samples": 2000,
14
- "eval_samples_per_second": 3.684,
15
- "eval_steps_per_second": 0.46,
16
- "train_loss": 0.0,
17
- "train_runtime": 0.0278,
18
  "train_samples": 10000,
19
- "train_samples_per_second": 359461.447,
20
- "train_steps_per_second": 1401.9
21
  }
 
1
  {
2
+ "epoch": 0.77,
3
+ "train_loss": 0.692988375822703,
4
+ "train_runtime": 257.3561,
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "train_samples": 10000,
6
+ "train_samples_per_second": 3.886,
7
+ "train_steps_per_second": 0.012
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 0.0278,
5
  "train_samples": 10000,
6
- "train_samples_per_second": 359461.447,
7
- "train_steps_per_second": 1401.9
8
  }
 
1
  {
2
+ "epoch": 0.77,
3
+ "train_loss": 0.692988375822703,
4
+ "train_runtime": 257.3561,
5
  "train_samples": 10000,
6
+ "train_samples_per_second": 3.886,
7
+ "train_steps_per_second": 0.012
8
  }
trainer_state.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9984,
5
  "eval_steps": 100,
6
- "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03,
13
- "learning_rate": 1.25e-06,
14
- "logits/chosen": -2.3930084705352783,
15
- "logits/rejected": -2.4061639308929443,
16
- "logps/chosen": -245.26866149902344,
17
- "logps/rejected": -261.33184814453125,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
@@ -22,60 +22,18 @@
22
  "rewards/rejected": 0.0,
23
  "step": 1
24
  },
25
- {
26
- "epoch": 0.26,
27
- "learning_rate": 4.646121984004666e-06,
28
- "logits/chosen": -2.525505542755127,
29
- "logits/rejected": -2.416348695755005,
30
- "logps/chosen": -264.3957214355469,
31
- "logps/rejected": -243.22918701171875,
32
- "loss": 0.6859,
33
- "rewards/accuracies": 0.5711805820465088,
34
- "rewards/chosen": 0.037347737699747086,
35
- "rewards/margins": 0.01542865764349699,
36
- "rewards/rejected": 0.02191907912492752,
37
- "step": 10
38
- },
39
- {
40
- "epoch": 0.51,
41
- "learning_rate": 2.835583164544139e-06,
42
- "logits/chosen": -2.4794507026672363,
43
- "logits/rejected": -2.397329568862915,
44
- "logps/chosen": -257.61175537109375,
45
- "logps/rejected": -238.9801025390625,
46
- "loss": 0.6674,
47
- "rewards/accuracies": 0.699999988079071,
48
- "rewards/chosen": 0.03777569904923439,
49
- "rewards/margins": 0.06583560258150101,
50
- "rewards/rejected": -0.028059903532266617,
51
- "step": 20
52
- },
53
  {
54
  "epoch": 0.77,
55
- "learning_rate": 7.723433775328385e-07,
56
- "logits/chosen": -2.5016331672668457,
57
- "logits/rejected": -2.392857074737549,
58
- "logps/chosen": -282.13555908203125,
59
- "logps/rejected": -262.0860595703125,
60
- "loss": 0.6557,
61
- "rewards/accuracies": 0.671875,
62
- "rewards/chosen": -0.02580374851822853,
63
- "rewards/margins": 0.10012584924697876,
64
- "rewards/rejected": -0.125929594039917,
65
- "step": 30
66
- },
67
- {
68
- "epoch": 1.0,
69
- "step": 39,
70
  "total_flos": 0.0,
71
- "train_loss": 0.0,
72
- "train_runtime": 0.0278,
73
- "train_samples_per_second": 359461.447,
74
- "train_steps_per_second": 1401.9
75
  }
76
  ],
77
  "logging_steps": 10,
78
- "max_steps": 39,
79
  "num_input_tokens_seen": 0,
80
  "num_train_epochs": 1,
81
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.768,
5
  "eval_steps": 100,
6
+ "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.26,
13
+ "learning_rate": 5e-06,
14
+ "logits/chosen": -2.7575411796569824,
15
+ "logits/rejected": -2.648534059524536,
16
+ "logps/chosen": -262.0839538574219,
17
+ "logps/rejected": -262.1346435546875,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
 
22
  "rewards/rejected": 0.0,
23
  "step": 1
24
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  {
26
  "epoch": 0.77,
27
+ "step": 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "total_flos": 0.0,
29
+ "train_loss": 0.692988375822703,
30
+ "train_runtime": 257.3561,
31
+ "train_samples_per_second": 3.886,
32
+ "train_steps_per_second": 0.012
33
  }
34
  ],
35
  "logging_steps": 10,
36
+ "max_steps": 3,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 100,