yakazimir commited on
Commit
8303cfa
·
verified ·
1 Parent(s): 29e2a0e

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -6
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +8 -8
README.md CHANGED
@@ -3,23 +3,21 @@ library_name: transformers
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - simpo
 
9
  - generated_from_trainer
10
- datasets:
11
- - yakazimir/ultrafeedback_binarized
12
  model-index:
13
- - name: qwen_05b_simpo
14
  results: []
15
  ---
16
 
17
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
  should probably proofread and complete it, then remove this comment. -->
19
 
20
- # qwen_05b_simpo
21
 
22
- This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on the yakazimir/ultrafeedback_binarized dataset.
23
 
24
  ## Model description
25
 
 
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
 
6
  - trl
7
  - simpo
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
+ - name: simpo-exps_qwen05b
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
+ # simpo-exps_qwen05b
19
 
20
+ This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 0.0029436360595417295,
3
  "eval_logits/chosen": -0.35490095615386963,
4
  "eval_logits/rejected": -0.32419130206108093,
5
  "eval_logps/chosen": -1.7861407995224,
@@ -14,9 +14,9 @@
14
  "eval_samples_per_second": 22.457,
15
  "eval_steps_per_second": 5.627,
16
  "total_flos": 0.0,
17
- "train_loss": 1.0150891217318447,
18
- "train_runtime": 13.4251,
19
  "train_samples": 59790,
20
- "train_samples_per_second": 11.918,
21
- "train_steps_per_second": 0.745
22
  }
 
1
  {
2
+ "epoch": 0.0032112393376818866,
3
  "eval_logits/chosen": -0.35490095615386963,
4
  "eval_logits/rejected": -0.32419130206108093,
5
  "eval_logps/chosen": -1.7861407995224,
 
14
  "eval_samples_per_second": 22.457,
15
  "eval_steps_per_second": 5.627,
16
  "total_flos": 0.0,
17
+ "train_loss": 0.40756722291310626,
18
+ "train_runtime": 13.7037,
19
  "train_samples": 59790,
20
+ "train_samples_per_second": 11.676,
21
+ "train_steps_per_second": 0.73
22
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.0029436360595417295,
3
  "total_flos": 0.0,
4
- "train_loss": 1.0150891217318447,
5
- "train_runtime": 13.4251,
6
  "train_samples": 59790,
7
- "train_samples_per_second": 11.918,
8
- "train_steps_per_second": 0.745
9
  }
 
1
  {
2
+ "epoch": 0.0032112393376818866,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.40756722291310626,
5
+ "train_runtime": 13.7037,
6
  "train_samples": 59790,
7
+ "train_samples_per_second": 11.676,
8
+ "train_steps_per_second": 0.73
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0029436360595417295,
5
  "eval_steps": 400,
6
- "global_step": 11,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -39,13 +39,13 @@
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.0029436360595417295,
43
- "step": 11,
44
  "total_flos": 0.0,
45
- "train_loss": 1.0150891217318447,
46
- "train_runtime": 13.4251,
47
- "train_samples_per_second": 11.918,
48
- "train_steps_per_second": 0.745
49
  }
50
  ],
51
  "logging_steps": 5,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0032112393376818866,
5
  "eval_steps": 400,
6
+ "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.0032112393376818866,
43
+ "step": 12,
44
  "total_flos": 0.0,
45
+ "train_loss": 0.40756722291310626,
46
+ "train_runtime": 13.7037,
47
+ "train_samples_per_second": 11.676,
48
+ "train_steps_per_second": 0.73
49
  }
50
  ],
51
  "logging_steps": 5,