yakazimir
/

simpo-exps_qwen05b

@@ -3,23 +3,21 @@ library_name: transformers
 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
-- alignment-handbook
 - trl
 - simpo
 - generated_from_trainer
-datasets:
-- yakazimir/ultrafeedback_binarized
 model-index:
-- name: qwen_05b_simpo
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# qwen_05b_simpo
-This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on the yakazimir/ultrafeedback_binarized dataset.
 ## Model description

 license: other
 base_model: trl-lib/qwen1.5-0.5b-sft
 tags:
 - trl
 - simpo
+- alignment-handbook
 - generated_from_trainer
 model-index:
+- name: simpo-exps_qwen05b
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# simpo-exps_qwen05b
+This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 ## Model description

all_results.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "epoch": 0.0029436360595417295,
     "eval_logits/chosen": -0.35490095615386963,
     "eval_logits/rejected": -0.32419130206108093,
     "eval_logps/chosen": -1.7861407995224,
@@ -14,9 +14,9 @@
     "eval_samples_per_second": 22.457,
     "eval_steps_per_second": 5.627,
     "total_flos": 0.0,
-    "train_loss": 1.0150891217318447,
-    "train_runtime": 13.4251,
     "train_samples": 59790,
-    "train_samples_per_second": 11.918,
-    "train_steps_per_second": 0.745
 }

 {
+    "epoch": 0.0032112393376818866,
     "eval_logits/chosen": -0.35490095615386963,
     "eval_logits/rejected": -0.32419130206108093,
     "eval_logps/chosen": -1.7861407995224,
     "eval_samples_per_second": 22.457,
     "eval_steps_per_second": 5.627,
     "total_flos": 0.0,
+    "train_loss": 0.40756722291310626,
+    "train_runtime": 13.7037,
     "train_samples": 59790,
+    "train_samples_per_second": 11.676,
+    "train_steps_per_second": 0.73
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 0.0029436360595417295,
     "total_flos": 0.0,
-    "train_loss": 1.0150891217318447,
-    "train_runtime": 13.4251,
     "train_samples": 59790,
-    "train_samples_per_second": 11.918,
-    "train_steps_per_second": 0.745
 }

 {
+    "epoch": 0.0032112393376818866,
     "total_flos": 0.0,
+    "train_loss": 0.40756722291310626,
+    "train_runtime": 13.7037,
     "train_samples": 59790,
+    "train_samples_per_second": 11.676,
+    "train_steps_per_second": 0.73
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0029436360595417295,
   "eval_steps": 400,
-  "global_step": 11,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -39,13 +39,13 @@
       "step": 10
     },
     {
-      "epoch": 0.0029436360595417295,
-      "step": 11,
       "total_flos": 0.0,
-      "train_loss": 1.0150891217318447,
-      "train_runtime": 13.4251,
-      "train_samples_per_second": 11.918,
-      "train_steps_per_second": 0.745
     }
   ],
   "logging_steps": 5,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0032112393376818866,
   "eval_steps": 400,
+  "global_step": 12,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 10
     },
     {
+      "epoch": 0.0032112393376818866,
+      "step": 12,
       "total_flos": 0.0,
+      "train_loss": 0.40756722291310626,
+      "train_runtime": 13.7037,
+      "train_samples_per_second": 11.676,
+      "train_steps_per_second": 0.73
     }
   ],
   "logging_steps": 5,