End of training

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: apache-2.0
-base_model: facebook/bart-large
 tags:
 - generated_from_trainer
 model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
 # bart-samsum-finetuned
-This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7697
 ## Model description
@@ -46,9 +46,9 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.3225        | 1.0   | 74   | 3.6672          |
-| 0.3482        | 2.0   | 148  | 0.4796          |
-| 0.2575        | 3.0   | 222  | 0.7697          |
 ### Framework versions

 ---
+license: mit
+base_model: facebook/bart-large-cnn
 tags:
 - generated_from_trainer
 model-index:
 # bart-samsum-finetuned
+This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1331
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.121         | 1.0   | 74   | 0.1348          |
+| 0.0903        | 2.0   | 148  | 0.1331          |
+| 0.0795        | 3.0   | 222  | 0.1331          |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-  "_name_or_path": "facebook/bart-large",
-  "activation_dropout": 0.1,
   "activation_function": "gelu",
-  "add_bias_logits": false,
   "add_final_layer_norm": false,
   "architectures": [
     "BartForConditionalGeneration"
   ],
-  "attention_dropout": 0.1,
   "bos_token_id": 0,
-  "classif_dropout": 0.1,
   "classifier_dropout": 0.0,
   "d_model": 1024,
   "decoder_attention_heads": 16,
@@ -24,6 +24,7 @@
   "encoder_layerdrop": 0.0,
   "encoder_layers": 12,
   "eos_token_id": 2,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
   "gradient_checkpointing": false,
@@ -39,36 +40,31 @@
     "LABEL_1": 1,
     "LABEL_2": 2
   },
   "max_position_embeddings": 1024,
   "model_type": "bart",
   "no_repeat_ngram_size": 3,
   "normalize_before": false,
   "num_beams": 4,
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "scale_embedding": false,
   "task_specific_params": {
     "summarization": {
-      "length_penalty": 1.0,
-      "max_length": 128,
-      "min_length": 12,
-      "num_beams": 4
-    },
-    "summarization_cnn": {
       "length_penalty": 2.0,
       "max_length": 142,
       "min_length": 56,
       "num_beams": 4
-    },
-    "summarization_xsum": {
-      "length_penalty": 1.0,
-      "max_length": 62,
-      "min_length": 11,
-      "num_beams": 6
     }
   },
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
   "use_cache": true,
-  "vocab_size": 50265
 }

 {
+  "_name_or_path": "facebook/bart-large-cnn",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
   "activation_function": "gelu",
   "add_final_layer_norm": false,
   "architectures": [
     "BartForConditionalGeneration"
   ],
+  "attention_dropout": 0.0,
   "bos_token_id": 0,
+  "classif_dropout": 0.0,
   "classifier_dropout": 0.0,
   "d_model": 1024,
   "decoder_attention_heads": 16,
   "encoder_layerdrop": 0.0,
   "encoder_layers": 12,
   "eos_token_id": 2,
+  "force_bos_token_to_be_generated": true,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
   "gradient_checkpointing": false,
     "LABEL_1": 1,
     "LABEL_2": 2
   },
+  "length_penalty": 2.0,
+  "max_length": 142,
   "max_position_embeddings": 1024,
+  "min_length": 56,
   "model_type": "bart",
   "no_repeat_ngram_size": 3,
   "normalize_before": false,
   "num_beams": 4,
   "num_hidden_layers": 12,
+  "output_past": true,
   "pad_token_id": 1,
+  "prefix": " ",
   "scale_embedding": false,
   "task_specific_params": {
     "summarization": {
+      "early_stopping": true,
       "length_penalty": 2.0,
       "max_length": 142,
       "min_length": 56,
+      "no_repeat_ngram_size": 3,
       "num_beams": 4
     }
   },
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
   "use_cache": true,
+  "vocab_size": 50264
 }

generation_config.json CHANGED Viewed

@@ -6,6 +6,9 @@
   "eos_token_id": 2,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
   "no_repeat_ngram_size": 3,
   "num_beams": 4,
   "pad_token_id": 1,

   "eos_token_id": 2,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 142,
+  "min_length": 56,
   "no_repeat_ngram_size": 3,
   "num_beams": 4,
   "pad_token_id": 1,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee81650c7474cae5f50d9ab1eb3cecba95cc8e192b3fc0ab4c09e8d16c4a6bff
-size 1625426996

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdeb6f70a2d8d3f86308e6156491df6db70d34ba434b19ab0cb1dcc76eda09cd
+size 1625422896

runs/Apr15_21-02-35_a57c2dcb2ddd/events.out.tfevents.1713214957.a57c2dcb2ddd.21169.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e500f9b3ae8384e06af33bf1b544ff38b759b998ab14b564d410a50c49d8e5d6
+size 11096

runs/Apr15_21-03-28_a57c2dcb2ddd/events.out.tfevents.1713215012.a57c2dcb2ddd.21169.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7787ef88bad21e1a448f1f4a0b14f5c7938592aeff9932393cd09d09b9a868e
+size 22356

tokenizer.json CHANGED Viewed

@@ -1,7 +1,21 @@
 {
   "version": "1.0",
-  "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 1024,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
+  "padding": {
+    "strategy": {
+      "Fixed": 1024
+    },
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 2,
+    "pad_type_id": 0,
+    "pad_token": "</s>"
+  },
   "added_tokens": [
     {
       "id": 0,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5151dc59904ec7e1813b8b438e04204d3458311a1c40c997b77956984b03251
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:1233a1214722ebed3e47c4bfa28f2151cda4030913ca5fa181ba0915d3cb3771
 size 4920