sss1465236/Bart-large*final

Browse files

Files changed (9) hide show

README.md +14 -10
config.json +14 -18
generation_config.json +3 -0
model.safetensors +2 -2
runs/Dec29_17-23-30_c8b93ab3f819/events.out.tfevents.1703870620.c8b93ab3f819.2550.1 +3 -0
special_tokens_map.json +42 -6
tokenizer_config.json +0 -1
training_args.bin +1 -1
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: apache-2.0
-base_model: facebook/bart-large
 tags:
 - generated_from_trainer
 metrics:
@@ -15,13 +15,13 @@ should probably proofread and complete it, then remove this comment. -->
 # Big-Bart-BBC
-This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.4329
-- Rouge1: 0.2331
-- Rouge2: 0.1114
-- Rougel: 0.1995
-- Rougelsum: 0.1989
 ## Model description
@@ -46,13 +46,17 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 1
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|
-| 3.1436        | 1.0   | 1652 | 2.4329          | 0.2331 | 0.1114 | 0.1995 | 0.1989    |
 ### Framework versions

 ---
+license: mit
+base_model: facebook/bart-large-cnn
 tags:
 - generated_from_trainer
 metrics:
 # Big-Bart-BBC
+This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 4.1339
+- Rouge1: 0.2638
+- Rouge2: 0.1052
+- Rougel: 0.2019
+- Rougelsum: 0.202
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 5
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|
+| 3.001         | 1.0   | 1652 | 2.8616          | 0.2179 | 0.0571 | 0.1565 | 0.1564    |
+| 1.7636        | 2.0   | 3304 | 2.7371          | 0.2423 | 0.0772 | 0.1766 | 0.1767    |
+| 0.9422        | 3.0   | 4956 | 3.1619          | 0.2463 | 0.0842 | 0.1832 | 0.1832    |
+| 0.4259        | 4.0   | 6608 | 3.5730          | 0.2645 | 0.1009 | 0.2001 | 0.2002    |
+| 0.1637        | 5.0   | 8260 | 4.1339          | 0.2638 | 0.1052 | 0.2019 | 0.202     |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-  "_name_or_path": "facebook/bart-large",
-  "activation_dropout": 0.1,
   "activation_function": "gelu",
-  "add_bias_logits": false,
   "add_final_layer_norm": false,
   "architectures": [
     "BartForConditionalGeneration"
   ],
-  "attention_dropout": 0.1,
   "bos_token_id": 0,
-  "classif_dropout": 0.1,
   "classifier_dropout": 0.0,
   "d_model": 1024,
   "decoder_attention_heads": 16,
@@ -24,6 +24,7 @@
   "encoder_layerdrop": 0.0,
   "encoder_layers": 12,
   "eos_token_id": 2,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
   "gradient_checkpointing": false,
@@ -39,36 +40,31 @@
     "LABEL_1": 1,
     "LABEL_2": 2
   },
   "max_position_embeddings": 1024,
   "model_type": "bart",
   "no_repeat_ngram_size": 3,
   "normalize_before": false,
   "num_beams": 4,
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "scale_embedding": false,
   "task_specific_params": {
     "summarization": {
-      "length_penalty": 1.0,
-      "max_length": 128,
-      "min_length": 12,
-      "num_beams": 4
-    },
-    "summarization_cnn": {
       "length_penalty": 2.0,
       "max_length": 142,
       "min_length": 56,
       "num_beams": 4
-    },
-    "summarization_xsum": {
-      "length_penalty": 1.0,
-      "max_length": 62,
-      "min_length": 11,
-      "num_beams": 6
     }
   },
   "torch_dtype": "float32",
   "transformers_version": "4.36.2",
   "use_cache": true,
-  "vocab_size": 50265
 }

 {
+  "_name_or_path": "facebook/bart-large-cnn",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
   "activation_function": "gelu",
   "add_final_layer_norm": false,
   "architectures": [
     "BartForConditionalGeneration"
   ],
+  "attention_dropout": 0.0,
   "bos_token_id": 0,
+  "classif_dropout": 0.0,
   "classifier_dropout": 0.0,
   "d_model": 1024,
   "decoder_attention_heads": 16,
   "encoder_layerdrop": 0.0,
   "encoder_layers": 12,
   "eos_token_id": 2,
+  "force_bos_token_to_be_generated": true,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
   "gradient_checkpointing": false,
     "LABEL_1": 1,
     "LABEL_2": 2
   },
+  "length_penalty": 2.0,
+  "max_length": 142,
   "max_position_embeddings": 1024,
+  "min_length": 56,
   "model_type": "bart",
   "no_repeat_ngram_size": 3,
   "normalize_before": false,
   "num_beams": 4,
   "num_hidden_layers": 12,
+  "output_past": true,
   "pad_token_id": 1,
+  "prefix": " ",
   "scale_embedding": false,
   "task_specific_params": {
     "summarization": {
+      "early_stopping": true,
       "length_penalty": 2.0,
       "max_length": 142,
       "min_length": 56,
+      "no_repeat_ngram_size": 3,
       "num_beams": 4
     }
   },
   "torch_dtype": "float32",
   "transformers_version": "4.36.2",
   "use_cache": true,
+  "vocab_size": 50264
 }

generation_config.json CHANGED Viewed

@@ -5,6 +5,9 @@
   "eos_token_id": 2,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
   "no_repeat_ngram_size": 3,
   "num_beams": 4,
   "pad_token_id": 1,

   "eos_token_id": 2,
   "forced_bos_token_id": 0,
   "forced_eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 142,
+  "min_length": 56,
   "no_repeat_ngram_size": 3,
   "num_beams": 4,
   "pad_token_id": 1,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe8c4b7c54b4f50b5618f0145783f0d964cc338db8078ff83edd6164ce2d69c4
-size 1625426996

 version https://git-lfs.github.com/spec/v1
+oid sha256:e929786cea8189de4fa198b1b7989ff4f40e62b8b53856dcef066611206f69eb
+size 1625422896

runs/Dec29_17-23-30_c8b93ab3f819/events.out.tfevents.1703870620.c8b93ab3f819.2550.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3789cc22c2554f133c57480ba87adc5fa8eeeccdd4199367ec50ef210c1ca38f
+size 9027

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,25 @@
 {
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
   "mask_token": {
     "content": "<mask>",
     "lstrip": true,
@@ -9,7 +27,25 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "unk_token": "<unk>"
 }

 {
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "mask_token": {
     "content": "<mask>",
     "lstrip": true,
     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -52,6 +52,5 @@
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "tokenizer_class": "BartTokenizer",
-  "trim_offsets": true,
   "unk_token": "<unk>"
 }

   "pad_token": "<pad>",
   "sep_token": "</s>",
   "tokenizer_class": "BartTokenizer",
   "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4d2a02bb99c3bdb95dad7c821c8ff0ccfe3e01d5625e72cf5074f5c9dbe69eb
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:54d4fcc00ce2234fa8495b81ee479bacf95bbf7540494f87c27fd91fcbeeda20
 size 4856

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff