enkhtogtokh/mistral-dpom

Browse files

Files changed (5) hide show

README.md +15 -35
adapter_config.json +2 -2
adapter_model.safetensors +1 -1
runs/Jan15_14-36-59_d6f2f96c2d0f/events.out.tfevents.1705329535.d6f2f96c2d0f.7927.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8386
-- Rewards/chosen: 1.3959
-- Rewards/rejected: 1.2944
-- Rewards/accuracies: 0.6635
-- Rewards/margins: 0.1015
-- Logps/rejected: -204.8275
-- Logps/chosen: -212.7901
-- Logits/rejected: -2.3737
-- Logits/chosen: -2.4043
 ## Model description
@@ -51,38 +51,18 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
-- training_steps: 250
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.7034        | 0.0   | 10   | 0.6881          | 0.0669         | 0.0544           | 0.6442             | 0.0125          | -217.2274      | -226.0799    | -2.3083         | -2.3446       |
-| 0.6817        | 0.0   | 20   | 0.7081          | -0.0529        | -0.0428          | 0.5096             | -0.0100         | -218.1994      | -227.2778    | -2.3270         | -2.3588       |
-| 0.8729        | 0.0   | 30   | 0.7720          | -0.3001        | -0.2331          | 0.5                | -0.0670         | -220.1020      | -229.7500    | -2.3539         | -2.3796       |
-| 0.8805        | 0.0   | 40   | 0.7841          | -0.2908        | -0.2089          | 0.4712             | -0.0819         | -219.8597      | -229.6567    | -2.3597         | -2.3866       |
-| 0.7656        | 0.0   | 50   | 0.7334          | 0.0924         | 0.1060           | 0.5288             | -0.0136         | -216.7109      | -225.8250    | -2.3542         | -2.3835       |
-| 0.7447        | 0.0   | 60   | 0.7034          | 0.4187         | 0.3651           | 0.5288             | 0.0536          | -214.1196      | -222.5618    | -2.3546         | -2.3821       |
-| 0.5761        | 0.01  | 70   | 0.6868          | 0.7922         | 0.6818           | 0.6635             | 0.1105          | -210.9534      | -218.8266    | -2.3600         | -2.3880       |
-| 0.6459        | 0.01  | 80   | 0.6960          | 0.9805         | 0.8607           | 0.6154             | 0.1198          | -209.1636      | -216.9435    | -2.3727         | -2.4040       |
-| 0.6212        | 0.01  | 90   | 0.7216          | 1.0026         | 0.9052           | 0.5962             | 0.0974          | -208.7185      | -216.7225    | -2.3769         | -2.4083       |
-| 0.5771        | 0.01  | 100  | 0.7343          | 1.2182         | 1.1107           | 0.625              | 0.1075          | -206.6636      | -214.5667    | -2.3794         | -2.4081       |
-| 1.105         | 0.01  | 110  | 0.7612          | 1.3164         | 1.2262           | 0.625              | 0.0901          | -205.5086      | -213.5851    | -2.3803         | -2.4103       |
-| 0.5704        | 0.01  | 120  | 0.7712          | 1.2350         | 1.1516           | 0.6154             | 0.0834          | -206.2548      | -214.3988    | -2.3789         | -2.4092       |
-| 0.651         | 0.01  | 130  | 0.7840          | 1.2433         | 1.1676           | 0.6058             | 0.0757          | -206.0951      | -214.3164    | -2.3799         | -2.4109       |
-| 0.5913        | 0.01  | 140  | 0.7916          | 1.1579         | 1.0827           | 0.6058             | 0.0752          | -206.9440      | -215.1704    | -2.3800         | -2.4116       |
-| 0.7816        | 0.01  | 150  | 0.7879          | 1.0975         | 1.0127           | 0.6058             | 0.0849          | -207.6444      | -215.7737    | -2.3767         | -2.4085       |
-| 0.7859        | 0.01  | 160  | 0.7833          | 0.9808         | 0.8809           | 0.6346             | 0.0999          | -208.9623      | -216.9414    | -2.3730         | -2.4050       |
-| 0.6368        | 0.01  | 170  | 0.8020          | 1.1103         | 1.0068           | 0.6538             | 0.1035          | -207.7034      | -215.6462    | -2.3739         | -2.4052       |
-| 0.7555        | 0.01  | 180  | 0.8232          | 1.3013         | 1.2030           | 0.6731             | 0.0983          | -205.7407      | -213.7359    | -2.3795         | -2.4099       |
-| 1.0436        | 0.02  | 190  | 0.8346          | 1.3330         | 1.2393           | 0.6635             | 0.0938          | -205.3784      | -213.4188    | -2.3772         | -2.4080       |
-| 0.8714        | 0.02  | 200  | 0.8394          | 1.4131         | 1.3150           | 0.6731             | 0.0980          | -204.6207      | -212.6184    | -2.3790         | -2.4098       |
-| 0.5898        | 0.02  | 210  | 0.8321          | 1.4170         | 1.3099           | 0.6731             | 0.1072          | -204.6723      | -212.5786    | -2.3751         | -2.4056       |
-| 0.5896        | 0.02  | 220  | 0.8264          | 1.4029         | 1.2923           | 0.6827             | 0.1106          | -204.8480      | -212.7195    | -2.3727         | -2.4031       |
-| 0.7008        | 0.02  | 230  | 0.8307          | 1.4009         | 1.2911           | 0.6731             | 0.1098          | -204.8596      | -212.7401    | -2.3724         | -2.4028       |
-| 2.6272        | 0.02  | 240  | 0.8366          | 1.3934         | 1.2906           | 0.6635             | 0.1028          | -204.8651      | -212.8147    | -2.3735         | -2.4039       |
-| 0.9059        | 0.02  | 250  | 0.8386          | 1.3959         | 1.2944           | 0.6635             | 0.1015          | -204.8275      | -212.7901    | -2.3737         | -2.4043       |
 ### Framework versions

 This model is a fine-tuned version of [TheBloke/OpenHermes-2-Mistral-7B-GPTQ](https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GPTQ) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0002
+- Rewards/chosen: 24.3722
+- Rewards/rejected: -6.8482
+- Rewards/accuracies: 1.0
+- Rewards/margins: 31.2205
+- Logps/rejected: -88.3652
+- Logps/chosen: -328.0153
+- Logits/rejected: -1.3660
+- Logits/chosen: -1.8558
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
+- training_steps: 50
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.5974        | 0.01  | 10   | 0.3220          | 2.6702         | -0.4084          | 0.9712             | 3.0786          | -23.9665       | -545.0353    | -1.2378         | -1.5696       |
+| 0.0879        | 0.02  | 20   | 0.0217          | 13.3520        | -3.0537          | 1.0                | 16.4057         | -50.4196       | -438.2177    | -1.2934         | -1.7050       |
+| 0.0413        | 0.03  | 30   | 0.0015          | 20.3280        | -5.4777          | 1.0                | 25.8057         | -74.6603       | -368.4581    | -1.3375         | -1.8145       |
+| 0.0003        | 0.04  | 40   | 0.0003          | 23.8990        | -6.4549          | 1.0                | 30.3539         | -84.4315       | -332.7477    | -1.3562         | -1.8484       |
+| 0.0002        | 0.05  | 50   | 0.0002          | 24.3722        | -6.8482          | 1.0                | 31.2205         | -88.3652       | -328.0153    | -1.3660         | -1.8558       |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,8 +19,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42b1b58c214146e1ef7e4fba8f7e8372f453587e7537ae7fe1283b9fe29751ba
 size 6832600

 version https://git-lfs.github.com/spec/v1
+oid sha256:2690627b1ee8d50ca1e9089ca5636f40a6038c42e2782441ecd4e1b1f0f6a681
 size 6832600

runs/Jan15_14-36-59_d6f2f96c2d0f/events.out.tfevents.1705329535.d6f2f96c2d0f.7927.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebf061059a94c6c43782541b5fa5fc295eba0b524f4a97b4a990b434c3fc0157
+size 12559

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:855d694fed9e062d72458809eb91cec0a8c065e1ae7665772773c22b3eb1c231
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd825ceee038058b8a2f5bece56f2f93a6bcb66ea5a6992b366df62855309b21
 size 4091