raffenmb commited on
Commit
38b9b40
1 Parent(s): 08d8b1e

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: apache-2.0
3
- base_model: facebook/bart-large
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # bart-samsum-finetuned
15
 
16
- This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.7697
19
 
20
  ## Model description
21
 
@@ -46,9 +46,9 @@ The following hyperparameters were used during training:
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | 0.3225 | 1.0 | 74 | 3.6672 |
50
- | 0.3482 | 2.0 | 148 | 0.4796 |
51
- | 0.2575 | 3.0 | 222 | 0.7697 |
52
 
53
 
54
  ### Framework versions
 
1
  ---
2
+ license: mit
3
+ base_model: facebook/bart-large-cnn
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # bart-samsum-finetuned
15
 
16
+ This model is a fine-tuned version of [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.1331
19
 
20
  ## Model description
21
 
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | 0.121 | 1.0 | 74 | 0.1348 |
50
+ | 0.0903 | 2.0 | 148 | 0.1331 |
51
+ | 0.0795 | 3.0 | 222 | 0.1331 |
52
 
53
 
54
  ### Framework versions
config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "_name_or_path": "facebook/bart-large",
3
- "activation_dropout": 0.1,
 
4
  "activation_function": "gelu",
5
- "add_bias_logits": false,
6
  "add_final_layer_norm": false,
7
  "architectures": [
8
  "BartForConditionalGeneration"
9
  ],
10
- "attention_dropout": 0.1,
11
  "bos_token_id": 0,
12
- "classif_dropout": 0.1,
13
  "classifier_dropout": 0.0,
14
  "d_model": 1024,
15
  "decoder_attention_heads": 16,
@@ -24,6 +24,7 @@
24
  "encoder_layerdrop": 0.0,
25
  "encoder_layers": 12,
26
  "eos_token_id": 2,
 
27
  "forced_bos_token_id": 0,
28
  "forced_eos_token_id": 2,
29
  "gradient_checkpointing": false,
@@ -39,36 +40,31 @@
39
  "LABEL_1": 1,
40
  "LABEL_2": 2
41
  },
 
 
42
  "max_position_embeddings": 1024,
 
43
  "model_type": "bart",
44
  "no_repeat_ngram_size": 3,
45
  "normalize_before": false,
46
  "num_beams": 4,
47
  "num_hidden_layers": 12,
 
48
  "pad_token_id": 1,
 
49
  "scale_embedding": false,
50
  "task_specific_params": {
51
  "summarization": {
52
- "length_penalty": 1.0,
53
- "max_length": 128,
54
- "min_length": 12,
55
- "num_beams": 4
56
- },
57
- "summarization_cnn": {
58
  "length_penalty": 2.0,
59
  "max_length": 142,
60
  "min_length": 56,
 
61
  "num_beams": 4
62
- },
63
- "summarization_xsum": {
64
- "length_penalty": 1.0,
65
- "max_length": 62,
66
- "min_length": 11,
67
- "num_beams": 6
68
  }
69
  },
70
  "torch_dtype": "float32",
71
  "transformers_version": "4.38.2",
72
  "use_cache": true,
73
- "vocab_size": 50265
74
  }
 
1
  {
2
+ "_name_or_path": "facebook/bart-large-cnn",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
6
  "add_final_layer_norm": false,
7
  "architectures": [
8
  "BartForConditionalGeneration"
9
  ],
10
+ "attention_dropout": 0.0,
11
  "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
  "classifier_dropout": 0.0,
14
  "d_model": 1024,
15
  "decoder_attention_heads": 16,
 
24
  "encoder_layerdrop": 0.0,
25
  "encoder_layers": 12,
26
  "eos_token_id": 2,
27
+ "force_bos_token_to_be_generated": true,
28
  "forced_bos_token_id": 0,
29
  "forced_eos_token_id": 2,
30
  "gradient_checkpointing": false,
 
40
  "LABEL_1": 1,
41
  "LABEL_2": 2
42
  },
43
+ "length_penalty": 2.0,
44
+ "max_length": 142,
45
  "max_position_embeddings": 1024,
46
+ "min_length": 56,
47
  "model_type": "bart",
48
  "no_repeat_ngram_size": 3,
49
  "normalize_before": false,
50
  "num_beams": 4,
51
  "num_hidden_layers": 12,
52
+ "output_past": true,
53
  "pad_token_id": 1,
54
+ "prefix": " ",
55
  "scale_embedding": false,
56
  "task_specific_params": {
57
  "summarization": {
58
+ "early_stopping": true,
 
 
 
 
 
59
  "length_penalty": 2.0,
60
  "max_length": 142,
61
  "min_length": 56,
62
+ "no_repeat_ngram_size": 3,
63
  "num_beams": 4
 
 
 
 
 
 
64
  }
65
  },
66
  "torch_dtype": "float32",
67
  "transformers_version": "4.38.2",
68
  "use_cache": true,
69
+ "vocab_size": 50264
70
  }
generation_config.json CHANGED
@@ -6,6 +6,9 @@
6
  "eos_token_id": 2,
7
  "forced_bos_token_id": 0,
8
  "forced_eos_token_id": 2,
 
 
 
9
  "no_repeat_ngram_size": 3,
10
  "num_beams": 4,
11
  "pad_token_id": 1,
 
6
  "eos_token_id": 2,
7
  "forced_bos_token_id": 0,
8
  "forced_eos_token_id": 2,
9
+ "length_penalty": 2.0,
10
+ "max_length": 142,
11
+ "min_length": 56,
12
  "no_repeat_ngram_size": 3,
13
  "num_beams": 4,
14
  "pad_token_id": 1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee81650c7474cae5f50d9ab1eb3cecba95cc8e192b3fc0ab4c09e8d16c4a6bff
3
- size 1625426996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdeb6f70a2d8d3f86308e6156491df6db70d34ba434b19ab0cb1dcc76eda09cd
3
+ size 1625422896
runs/Apr15_21-02-35_a57c2dcb2ddd/events.out.tfevents.1713214957.a57c2dcb2ddd.21169.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e500f9b3ae8384e06af33bf1b544ff38b759b998ab14b564d410a50c49d8e5d6
3
+ size 11096
runs/Apr15_21-03-28_a57c2dcb2ddd/events.out.tfevents.1713215012.a57c2dcb2ddd.21169.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7787ef88bad21e1a448f1f4a0b14f5c7938592aeff9932393cd09d09b9a868e
3
+ size 22356
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 1024,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 1024
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 2,
16
+ "pad_type_id": 0,
17
+ "pad_token": "</s>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5151dc59904ec7e1813b8b438e04204d3458311a1c40c997b77956984b03251
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1233a1214722ebed3e47c4bfa28f2151cda4030913ca5fa181ba0915d3cb3771
3
  size 4920