EtashGuha commited on
Commit
0301a30
·
verified ·
1 Parent(s): f520375

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: gemma
4
  base_model: google/gemma-2-9b-it
5
  tags:
 
 
6
  - trl
7
  - dpo
8
  - llama-factory
@@ -18,7 +20,17 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # gemma-simpo-reproduction
20
 
21
- This model is a fine-tuned version of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
22
 
23
  ## Model description
24
 
 
3
  license: gemma
4
  base_model: google/gemma-2-9b-it
5
  tags:
6
+ - llama-factory
7
+ - full
8
  - trl
9
  - dpo
10
  - llama-factory
 
20
 
21
  # gemma-simpo-reproduction
22
 
23
+ This model is a fine-tuned version of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it) on the mlfoundations-dev/gemma2-ultrafeedback-armorm dataset.
24
+ It achieves the following results on the evaluation set:
25
+ - Loss: 3.0558
26
+ - Rewards/chosen: -17.0597
27
+ - Rewards/rejected: -21.9498
28
+ - Rewards/accuracies: 0.7584
29
+ - Rewards/margins: 4.8901
30
+ - Logps/rejected: -2.1950
31
+ - Logps/chosen: -1.7060
32
+ - Logits/rejected: -18.1137
33
+ - Logits/chosen: -18.2041
34
 
35
  ## Model description
36
 
all_results.json CHANGED
@@ -9,12 +9,12 @@
9
  "eval_rewards/chosen": -17.05968475341797,
10
  "eval_rewards/margins": 4.890104293823242,
11
  "eval_rewards/rejected": -21.949787139892578,
12
- "eval_runtime": 2126.294,
13
- "eval_samples_per_second": 1.401,
14
- "eval_steps_per_second": 0.35,
15
  "total_flos": 227674672136192.0,
16
  "train_loss": 0.0,
17
- "train_runtime": 1.6273,
18
- "train_samples_per_second": 34774.982,
19
- "train_steps_per_second": 271.612
20
  }
 
9
  "eval_rewards/chosen": -17.05968475341797,
10
  "eval_rewards/margins": 4.890104293823242,
11
  "eval_rewards/rejected": -21.949787139892578,
12
+ "eval_runtime": 703.5263,
13
+ "eval_samples_per_second": 4.234,
14
+ "eval_steps_per_second": 1.059,
15
  "total_flos": 227674672136192.0,
16
  "train_loss": 0.0,
17
+ "train_runtime": 1.3679,
18
+ "train_samples_per_second": 41370.599,
19
+ "train_steps_per_second": 323.128
20
  }
eval_results.json CHANGED
@@ -9,7 +9,7 @@
9
  "eval_rewards/chosen": -17.05968475341797,
10
  "eval_rewards/margins": 4.890104293823242,
11
  "eval_rewards/rejected": -21.949787139892578,
12
- "eval_runtime": 2126.294,
13
- "eval_samples_per_second": 1.401,
14
- "eval_steps_per_second": 0.35
15
  }
 
9
  "eval_rewards/chosen": -17.05968475341797,
10
  "eval_rewards/margins": 4.890104293823242,
11
  "eval_rewards/rejected": -21.949787139892578,
12
+ "eval_runtime": 703.5263,
13
+ "eval_samples_per_second": 4.234,
14
+ "eval_steps_per_second": 1.059
15
  }
train_results.json CHANGED
@@ -2,7 +2,7 @@
2
  "epoch": 0.9997172745264349,
3
  "total_flos": 227674672136192.0,
4
  "train_loss": 0.0,
5
- "train_runtime": 1.6273,
6
- "train_samples_per_second": 34774.982,
7
- "train_steps_per_second": 271.612
8
  }
 
2
  "epoch": 0.9997172745264349,
3
  "total_flos": 227674672136192.0,
4
  "train_loss": 0.0,
5
+ "train_runtime": 1.3679,
6
+ "train_samples_per_second": 41370.599,
7
+ "train_steps_per_second": 323.128
8
  }
trainer_state.json CHANGED
@@ -6643,9 +6643,9 @@
6643
  "step": 442,
6644
  "total_flos": 227674672136192.0,
6645
  "train_loss": 0.0,
6646
- "train_runtime": 1.6273,
6647
- "train_samples_per_second": 34774.982,
6648
- "train_steps_per_second": 271.612
6649
  }
6650
  ],
6651
  "logging_steps": 1,
 
6643
  "step": 442,
6644
  "total_flos": 227674672136192.0,
6645
  "train_loss": 0.0,
6646
+ "train_runtime": 1.3679,
6647
+ "train_samples_per_second": 41370.599,
6648
+ "train_steps_per_second": 323.128
6649
  }
6650
  ],
6651
  "logging_steps": 1,