yunaseo/google_gemma_lora_emotion_detection

Browse files

Files changed (4) hide show

README.md +35 -57
adapter_config.json +35 -0
adapter_model.safetensors +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,8 +1,9 @@
 ---
 license: gemma
-base_model: google/gemma-1.1-2b-it
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
 model-index:
@@ -17,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [google/gemma-1.1-2b-it](https://huggingface.co/google/gemma-1.1-2b-it) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8963
-- F1 Micro: 0.6927
-- F1 Macro: 0.5713
-- Accuracy: 0.2537
 ## Model description
@@ -40,11 +41,11 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 4
-- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 5
@@ -53,59 +54,36 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | F1 Micro | F1 Macro | Accuracy |
 |:-------------:|:------:|:----:|:---------------:|:--------:|:--------:|:--------:|
-| 0.7843        | 0.1035 | 20   | 0.6332          | 0.6164   | 0.4369   | 0.1301   |
-| 0.5717        | 0.2070 | 40   | 0.5575          | 0.6468   | 0.5359   | 0.1793   |
-| 0.5341        | 0.3105 | 60   | 0.5292          | 0.6788   | 0.5562   | 0.2006   |
-| 0.5054        | 0.4140 | 80   | 0.5143          | 0.6830   | 0.5716   | 0.2045   |
-| 0.4748        | 0.5175 | 100  | 0.5039          | 0.6875   | 0.5797   | 0.1754   |
-| 0.5144        | 0.6210 | 120  | 0.5028          | 0.6804   | 0.5988   | 0.1631   |
-| 0.5055        | 0.7245 | 140  | 0.5101          | 0.6823   | 0.5728   | 0.2039   |
-| 0.5124        | 0.8279 | 160  | 0.4851          | 0.6854   | 0.5947   | 0.1793   |
-| 0.488         | 0.9314 | 180  | 0.4906          | 0.6777   | 0.5947   | 0.1638   |
-| 0.4867        | 1.0349 | 200  | 0.4970          | 0.6845   | 0.6033   | 0.2227   |
-| 0.3367        | 1.1384 | 220  | 0.5478          | 0.6977   | 0.5848   | 0.2188   |
-| 0.3342        | 1.2419 | 240  | 0.5531          | 0.6860   | 0.5898   | 0.2110   |
-| 0.3161        | 1.3454 | 260  | 0.5754          | 0.6719   | 0.5768   | 0.1955   |
-| 0.3312        | 1.4489 | 280  | 0.5335          | 0.6840   | 0.5906   | 0.1961   |
-| 0.3633        | 1.5524 | 300  | 0.5255          | 0.6799   | 0.5940   | 0.1883   |
-| 0.3199        | 1.6559 | 320  | 0.5461          | 0.6722   | 0.5868   | 0.1922   |
-| 0.3385        | 1.7594 | 340  | 0.5417          | 0.6888   | 0.5795   | 0.2149   |
-| 0.3292        | 1.8629 | 360  | 0.5324          | 0.6883   | 0.5969   | 0.1981   |
-| 0.3347        | 1.9664 | 380  | 0.5274          | 0.6890   | 0.5881   | 0.2006   |
-| 0.2122        | 2.0699 | 400  | 0.6957          | 0.6755   | 0.5671   | 0.2350   |
-| 0.1289        | 2.1734 | 420  | 0.6570          | 0.6814   | 0.5825   | 0.1974   |
-| 0.1505        | 2.2768 | 440  | 0.6495          | 0.6854   | 0.5857   | 0.2117   |
-| 0.1345        | 2.3803 | 460  | 0.7193          | 0.6813   | 0.5681   | 0.2045   |
-| 0.1438        | 2.4838 | 480  | 0.7042          | 0.6782   | 0.5649   | 0.2065   |
-| 0.14          | 2.5873 | 500  | 0.6777          | 0.6855   | 0.5826   | 0.2104   |
-| 0.146         | 2.6908 | 520  | 0.6699          | 0.6837   | 0.5840   | 0.2129   |
-| 0.138         | 2.7943 | 540  | 0.6954          | 0.6884   | 0.5820   | 0.2369   |
-| 0.1302        | 2.8978 | 560  | 0.7090          | 0.6828   | 0.5777   | 0.2220   |
-| 0.1324        | 3.0013 | 580  | 0.7075          | 0.6845   | 0.5818   | 0.2259   |
-| 0.0472        | 3.1048 | 600  | 0.8346          | 0.6867   | 0.5575   | 0.2414   |
-| 0.0544        | 3.2083 | 620  | 0.7725          | 0.6785   | 0.5706   | 0.2207   |
-| 0.0483        | 3.3118 | 640  | 0.8136          | 0.6865   | 0.5659   | 0.2291   |
-| 0.0465        | 3.4153 | 660  | 0.8333          | 0.6797   | 0.5613   | 0.2278   |
-| 0.0511        | 3.5188 | 680  | 0.8234          | 0.6852   | 0.5641   | 0.2265   |
-| 0.0511        | 3.6223 | 700  | 0.8298          | 0.6905   | 0.5712   | 0.2401   |
-| 0.0406        | 3.7257 | 720  | 0.8292          | 0.6886   | 0.5721   | 0.2421   |
-| 0.0565        | 3.8292 | 740  | 0.8266          | 0.6927   | 0.5721   | 0.2408   |
-| 0.0554        | 3.9327 | 760  | 0.7764          | 0.6887   | 0.5765   | 0.2350   |
-| 0.0319        | 4.0362 | 780  | 0.8450          | 0.6825   | 0.5650   | 0.2388   |
-| 0.0161        | 4.1397 | 800  | 0.8948          | 0.6892   | 0.5648   | 0.2524   |
-| 0.0174        | 4.2432 | 820  | 0.9146          | 0.6910   | 0.5659   | 0.2570   |
-| 0.0168        | 4.3467 | 840  | 0.9068          | 0.6874   | 0.5657   | 0.2414   |
-| 0.0184        | 4.4502 | 860  | 0.9225          | 0.6872   | 0.5615   | 0.2531   |
-| 0.0123        | 4.5537 | 880  | 0.9062          | 0.6882   | 0.5639   | 0.2511   |
-| 0.0149        | 4.6572 | 900  | 0.9087          | 0.6889   | 0.5660   | 0.2492   |
-| 0.0199        | 4.7607 | 920  | 0.8948          | 0.6917   | 0.5722   | 0.2472   |
-| 0.0144        | 4.8642 | 940  | 0.8944          | 0.6929   | 0.5724   | 0.2518   |
-| 0.015         | 4.9677 | 960  | 0.8963          | 0.6925   | 0.5709   | 0.2531   |
 ### Framework versions
 - Transformers 4.40.2
 - Pytorch 2.2.1+cu121
 - Datasets 2.19.1
-- Tokenizers 0.19.1

 ---
 license: gemma
+library_name: peft
 tags:
 - generated_from_trainer
+base_model: google/gemma-1.1-2b-it
 metrics:
 - accuracy
 model-index:
 This model is a fine-tuned version of [google/gemma-1.1-2b-it](https://huggingface.co/google/gemma-1.1-2b-it) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4792
+- F1 Micro: 0.6970
+- F1 Macro: 0.6089
+- Accuracy: 0.2104
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
+- train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
 - gradient_accumulation_steps: 4
+- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 5
 | Training Loss | Epoch  | Step | Validation Loss | F1 Micro | F1 Macro | Accuracy |
 |:-------------:|:------:|:----:|:---------------:|:--------:|:--------:|:--------:|
+| 0.7081        | 0.2067 | 20   | 0.6048          | 0.6244   | 0.5113   | 0.1528   |
+| 0.5228        | 0.4134 | 40   | 0.5096          | 0.6713   | 0.5815   | 0.1883   |
+| 0.5048        | 0.6202 | 60   | 0.4928          | 0.7002   | 0.5865   | 0.2155   |
+| 0.5129        | 0.8269 | 80   | 0.4792          | 0.6970   | 0.6089   | 0.2104   |
+| 0.4842        | 1.0336 | 100  | 0.4801          | 0.6972   | 0.6023   | 0.2369   |
+| 0.3372        | 1.2403 | 120  | 0.5545          | 0.6687   | 0.5877   | 0.1761   |
+| 0.3302        | 1.4470 | 140  | 0.5374          | 0.6895   | 0.6020   | 0.2019   |
+| 0.3342        | 1.6537 | 160  | 0.5330          | 0.6860   | 0.5993   | 0.2117   |
+| 0.3392        | 1.8605 | 180  | 0.5190          | 0.6894   | 0.5913   | 0.2006   |
+| 0.2844        | 2.0672 | 200  | 0.5853          | 0.6891   | 0.5819   | 0.2369   |
+| 0.1458        | 2.2739 | 220  | 0.7038          | 0.6743   | 0.5749   | 0.2097   |
+| 0.1508        | 2.4806 | 240  | 0.6808          | 0.6802   | 0.5834   | 0.1994   |
+| 0.1481        | 2.6873 | 260  | 0.7026          | 0.6773   | 0.5721   | 0.2      |
+| 0.1378        | 2.8941 | 280  | 0.7336          | 0.6790   | 0.5768   | 0.2162   |
+| 0.0961        | 3.1008 | 300  | 0.8397          | 0.6709   | 0.5465   | 0.2272   |
+| 0.0552        | 3.3075 | 320  | 0.8260          | 0.6743   | 0.5654   | 0.2168   |
+| 0.0509        | 3.5142 | 340  | 0.8692          | 0.6777   | 0.5666   | 0.2233   |
+| 0.0489        | 3.7209 | 360  | 0.8505          | 0.6874   | 0.5722   | 0.2388   |
+| 0.0526        | 3.9276 | 380  | 0.8269          | 0.6842   | 0.5778   | 0.2233   |
+| 0.0278        | 4.1344 | 400  | 0.9280          | 0.6813   | 0.5557   | 0.2414   |
+| 0.0187        | 4.3411 | 420  | 0.9390          | 0.6829   | 0.5588   | 0.2382   |
+| 0.0169        | 4.5478 | 440  | 0.9510          | 0.6834   | 0.5612   | 0.2485   |
+| 0.0158        | 4.7545 | 460  | 0.9325          | 0.6819   | 0.5612   | 0.2427   |
+| 0.0161        | 4.9612 | 480  | 0.9311          | 0.6822   | 0.5634   | 0.2440   |
 ### Framework versions
+- PEFT 0.10.0
 - Transformers 4.40.2
 - Pytorch 2.2.1+cu121
 - Datasets 2.19.1
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-1.1-2b-it",
+  "bias": "lora_only",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_dropout": 0.01,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "q_proj",
+    "v_proj",
+    "down_proj",
+    "up_proj",
+    "score",
+    "o_proj",
+    "k_proj"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cdd6c5dd3983ce0e322336bfe21c89e94579dfb5ac094a05a5c4ef0a43d33d9
+size 630860528

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d88d070736fcf2cc8a2ba8aa754a00ced2262d0ed4a7816ab632d728fac8986
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea96437ebc9da0288a42bc399bddcb8a110fe9cddc3026a9816daa08f38219c4
 size 5112