hcene/finetuned-mDEBERTa-v3-mnli-xnli

Browse files

Files changed (5) hide show

README.md +124 -0
adapter_config.json +27 -0
adapter_model.safetensors +3 -0
logs/events.out.tfevents.1709148966.d79675d90a22.405.0 +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,124 @@

+---
+license: mit
+library_name: peft
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+- precision
+- recall
+- f1
+base_model: MoritzLaurer/mDeBERTa-v3-base-mnli-xnli
+model-index:
+- name: results
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# results
+This model is a fine-tuned version of [MoritzLaurer/mDeBERTa-v3-base-mnli-xnli](https://huggingface.co/MoritzLaurer/mDeBERTa-v3-base-mnli-xnli) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.6765
+- Accuracy: 0.7634
+- Precision: 0.7675
+- Recall: 0.7644
+- F1: 0.7627
+- Ratio: 0.3297
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0005
+- train_batch_size: 20
+- eval_batch_size: 20
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.1
+- lr_scheduler_warmup_steps: 4
+- num_epochs: 20
+- label_smoothing_factor: 0.1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1     | Ratio  |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|:------:|
+| 1.7741        | 0.17  | 10   | 1.0961          | 0.7061   | 0.7103    | 0.7073 | 0.7078 | 0.3262 |
+| 1.2149        | 0.34  | 20   | 0.8783          | 0.7025   | 0.7066    | 0.7038 | 0.7044 | 0.3262 |
+| 0.959         | 0.52  | 30   | 0.8413          | 0.6774   | 0.6981    | 0.6784 | 0.6854 | 0.2939 |
+| 0.9582        | 0.69  | 40   | 0.7705          | 0.7312   | 0.7417    | 0.7321 | 0.7314 | 0.3190 |
+| 0.8706        | 0.86  | 50   | 0.6728          | 0.7419   | 0.7545    | 0.7437 | 0.7385 | 0.3190 |
+| 0.8804        | 1.03  | 60   | 0.6933          | 0.7133   | 0.7402    | 0.7157 | 0.6919 | 0.3190 |
+| 0.8999        | 1.21  | 70   | 0.7167          | 0.7133   | 0.7208    | 0.7144 | 0.7158 | 0.3190 |
+| 0.8914        | 1.38  | 80   | 0.6910          | 0.7384   | 0.7549    | 0.7390 | 0.7325 | 0.3226 |
+| 0.8578        | 1.55  | 90   | 0.6862          | 0.7348   | 0.7533    | 0.7369 | 0.7243 | 0.3262 |
+| 0.8755        | 1.72  | 100  | 0.6889          | 0.7240   | 0.7449    | 0.7263 | 0.7095 | 0.3262 |
+| 0.8551        | 1.9   | 110  | 0.7090          | 0.7133   | 0.7408    | 0.7158 | 0.6899 | 0.3262 |
+| 0.8736        | 2.07  | 120  | 0.7019          | 0.7097   | 0.7236    | 0.7120 | 0.6975 | 0.3262 |
+| 0.8647        | 2.24  | 130  | 0.7078          | 0.7240   | 0.7354    | 0.7261 | 0.7174 | 0.3262 |
+| 0.8755        | 2.41  | 140  | 0.7023          | 0.7527   | 0.7716    | 0.7533 | 0.7448 | 0.3262 |
+| 0.858         | 2.59  | 150  | 0.6745          | 0.7384   | 0.7450    | 0.7393 | 0.7372 | 0.3262 |
+| 0.8912        | 2.76  | 160  | 0.6842          | 0.7491   | 0.7635    | 0.7511 | 0.7424 | 0.3297 |
+| 0.8294        | 2.93  | 170  | 0.6623          | 0.7599   | 0.7624    | 0.7609 | 0.7602 | 0.3297 |
+| 0.8481        | 3.1   | 180  | 0.6652          | 0.7599   | 0.7715    | 0.7617 | 0.7551 | 0.3333 |
+| 0.8488        | 3.28  | 190  | 0.6782          | 0.7312   | 0.7609    | 0.7335 | 0.7131 | 0.3297 |
+| 0.8418        | 3.45  | 200  | 0.6884          | 0.7706   | 0.7738    | 0.7719 | 0.7720 | 0.3262 |
+| 0.8774        | 3.62  | 210  | 0.7066          | 0.7419   | 0.7523    | 0.7438 | 0.7381 | 0.3262 |
+| 0.8496        | 3.79  | 220  | 0.6687          | 0.7133   | 0.7214    | 0.7154 | 0.7060 | 0.3333 |
+| 0.825         | 3.97  | 230  | 0.6618          | 0.7634   | 0.7833    | 0.7639 | 0.7547 | 0.3297 |
+| 0.8933        | 4.14  | 240  | 0.6946          | 0.7419   | 0.7692    | 0.7424 | 0.7278 | 0.3262 |
+| 0.8579        | 4.31  | 250  | 0.6795          | 0.7491   | 0.7786    | 0.7495 | 0.7353 | 0.3262 |
+| 0.8023        | 4.48  | 260  | 0.6595          | 0.7563   | 0.7727    | 0.7569 | 0.7501 | 0.3262 |
+| 0.8736        | 4.66  | 270  | 0.6703          | 0.7491   | 0.7558    | 0.7508 | 0.7482 | 0.3262 |
+| 0.8291        | 4.83  | 280  | 0.7102          | 0.6989   | 0.7630    | 0.7019 | 0.6499 | 0.3262 |
+| 0.8923        | 5.0   | 290  | 0.7004          | 0.7097   | 0.7571    | 0.7124 | 0.6756 | 0.3262 |
+| 0.8571        | 5.17  | 300  | 0.6739          | 0.7634   | 0.7717    | 0.7642 | 0.7621 | 0.3262 |
+| 0.8521        | 5.34  | 310  | 0.6666          | 0.7563   | 0.7710    | 0.7569 | 0.7511 | 0.3262 |
+| 0.8369        | 5.52  | 320  | 0.6815          | 0.7455   | 0.7487    | 0.7467 | 0.7472 | 0.3262 |
+| 0.7897        | 5.69  | 330  | 0.6731          | 0.7097   | 0.7343    | 0.7122 | 0.6871 | 0.3262 |
+| 0.8801        | 5.86  | 340  | 0.6773          | 0.7419   | 0.7631    | 0.7441 | 0.7304 | 0.3297 |
+| 0.891         | 6.03  | 350  | 0.7107          | 0.7491   | 0.7556    | 0.7509 | 0.7473 | 0.3297 |
+| 0.8444        | 6.21  | 360  | 0.6805          | 0.7634   | 0.7879    | 0.7639 | 0.7543 | 0.3262 |
+| 0.8375        | 6.38  | 370  | 0.6562          | 0.7599   | 0.7725    | 0.7605 | 0.7560 | 0.3262 |
+| 0.8141        | 6.55  | 380  | 0.6578          | 0.7276   | 0.7409    | 0.7296 | 0.7217 | 0.3262 |
+| 0.8792        | 6.72  | 390  | 0.6790          | 0.7204   | 0.7355    | 0.7226 | 0.7121 | 0.3262 |
+| 0.8868        | 6.9   | 400  | 0.7063          | 0.7384   | 0.7411    | 0.7397 | 0.7404 | 0.3262 |
+| 0.8767        | 7.07  | 410  | 0.7074          | 0.7240   | 0.7440    | 0.7262 | 0.7126 | 0.3262 |
+| 0.8545        | 7.24  | 420  | 0.6725          | 0.7276   | 0.7520    | 0.7300 | 0.7108 | 0.3297 |
+| 0.8589        | 7.41  | 430  | 0.6712          | 0.7276   | 0.7473    | 0.7299 | 0.7139 | 0.3297 |
+| 0.8522        | 7.59  | 440  | 0.6853          | 0.7634   | 0.7655    | 0.7649 | 0.7644 | 0.3297 |
+| 0.777         | 7.76  | 450  | 0.6623          | 0.7634   | 0.7714    | 0.7642 | 0.7604 | 0.3297 |
+| 0.8903        | 7.93  | 460  | 0.6629          | 0.7599   | 0.7629    | 0.7609 | 0.7598 | 0.3297 |
+| 0.8168        | 8.1   | 470  | 0.6714          | 0.7599   | 0.7650    | 0.7608 | 0.7584 | 0.3297 |
+| 0.7979        | 8.28  | 480  | 0.6469          | 0.7491   | 0.7505    | 0.7505 | 0.7504 | 0.3297 |
+| 0.8674        | 8.45  | 490  | 0.6553          | 0.7455   | 0.7603    | 0.7475 | 0.7382 | 0.3297 |
+| 0.8475        | 8.62  | 500  | 0.6788          | 0.7563   | 0.7576    | 0.7576 | 0.7576 | 0.3297 |
+| 0.8723        | 8.79  | 510  | 0.6862          | 0.7599   | 0.7613    | 0.7612 | 0.7611 | 0.3297 |
+| 0.8684        | 8.97  | 520  | 0.6938          | 0.7563   | 0.7604    | 0.7579 | 0.7560 | 0.3297 |
+| 0.8278        | 9.14  | 530  | 0.6765          | 0.7634   | 0.7675    | 0.7644 | 0.7627 | 0.3297 |
+### Framework versions
+- PEFT 0.9.0
+- Transformers 4.39.0.dev0
+- Pytorch 2.1.0+cu121
+- Datasets 2.17.1
+- Tokenizers 0.15.2

adapter_config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "classifier"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0a2a9a902e804d199fac33a014f1577378c6fdf4459645ce1dd39686a680ccc
+size 768987780

logs/events.out.tfevents.1709148966.d79675d90a22.405.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16f961d528c63c9e993c11849867b14e1826a21523257333d09e35f0997387da
+size 45011

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfd342eff91d0c0167627d75783bdc6dc3986996b5a3ffef26d04e3c36532650
+size 4856