yakazimir commited on
Commit
29e2a0e
·
verified ·
1 Parent(s): a98d3d6

Training in progress, step 12

Browse files
Files changed (5) hide show
  1. README.md +6 -4
  2. all_results.json +3 -3
  3. eval_results.json +4 -4
  4. model.safetensors +1 -1
  5. training_args.bin +1 -1
README.md CHANGED
@@ -3,21 +3,23 @@ library_name: transformers
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
 
6
  - trl
7
  - simpo
8
- - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
- - name: simpo-exps_qwen05b
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # simpo-exps_qwen05b
19
 
20
- This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
21
 
22
  ## Model description
23
 
 
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
6
+ - alignment-handbook
7
  - trl
8
  - simpo
 
9
  - generated_from_trainer
10
+ datasets:
11
+ - yakazimir/ultrafeedback_binarized
12
  model-index:
13
+ - name: qwen_05b_simpo
14
  results: []
15
  ---
16
 
17
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
  should probably proofread and complete it, then remove this comment. -->
19
 
20
+ # qwen_05b_simpo
21
 
22
+ This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on the yakazimir/ultrafeedback_binarized dataset.
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -9,10 +9,10 @@
9
  "eval_rewards/chosen": -17.861406326293945,
10
  "eval_rewards/margins": 0.8571121692657471,
11
  "eval_rewards/rejected": -18.71851921081543,
12
- "eval_runtime": 59.9102,
13
  "eval_samples": 1345,
14
- "eval_samples_per_second": 22.45,
15
- "eval_steps_per_second": 5.625,
16
  "total_flos": 0.0,
17
  "train_loss": 1.0150891217318447,
18
  "train_runtime": 13.4251,
 
9
  "eval_rewards/chosen": -17.861406326293945,
10
  "eval_rewards/margins": 0.8571121692657471,
11
  "eval_rewards/rejected": -18.71851921081543,
12
+ "eval_runtime": 59.8932,
13
  "eval_samples": 1345,
14
+ "eval_samples_per_second": 22.457,
15
+ "eval_steps_per_second": 5.627,
16
  "total_flos": 0.0,
17
  "train_loss": 1.0150891217318447,
18
  "train_runtime": 13.4251,
eval_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 0.002676032781401572,
3
  "eval_logits/chosen": -0.35490095615386963,
4
  "eval_logits/rejected": -0.32419130206108093,
5
  "eval_logps/chosen": -1.7861407995224,
@@ -9,8 +9,8 @@
9
  "eval_rewards/chosen": -17.861406326293945,
10
  "eval_rewards/margins": 0.8571121692657471,
11
  "eval_rewards/rejected": -18.71851921081543,
12
- "eval_runtime": 59.9102,
13
  "eval_samples": 1345,
14
- "eval_samples_per_second": 22.45,
15
- "eval_steps_per_second": 5.625
16
  }
 
1
  {
2
+ "epoch": 0.0029436360595417295,
3
  "eval_logits/chosen": -0.35490095615386963,
4
  "eval_logits/rejected": -0.32419130206108093,
5
  "eval_logps/chosen": -1.7861407995224,
 
9
  "eval_rewards/chosen": -17.861406326293945,
10
  "eval_rewards/margins": 0.8571121692657471,
11
  "eval_rewards/rejected": -18.71851921081543,
12
+ "eval_runtime": 59.8932,
13
  "eval_samples": 1345,
14
+ "eval_samples_per_second": 22.457,
15
+ "eval_steps_per_second": 5.627
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5b7ea3c0dbf5c09f410a4d243d0b0093712bcb1a0fc3b39c5e1a3ee114da8c9
3
  size 927418280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b5753783f14402963bfe5ea25f5965324f38ee7372bc5eaf0b5cdffbb2dadd
3
  size 927418280
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c58ca4d124c02f8cf917386bb15e918d0443ff115e8c2fa912fd6c328d425a9
3
  size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec781718b2cd9b7fb863d929d61d8c637011f91747842ad26980313b7608837
3
  size 5880