SumanNazir commited on
Commit
bd36bf4
1 Parent(s): b4d1f49

Training in progress, step 500

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  license: mit
4
- base_model: SumanNazir/highlight_summary_model_trained_on_reduced_data
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,19 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # highlight_summary_model_trained_on_reduced_data
16
 
17
- This model is a fine-tuned version of [SumanNazir/highlight_summary_model_trained_on_reduced_data](https://huggingface.co/SumanNazir/highlight_summary_model_trained_on_reduced_data) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - eval_loss: 4.4296
20
- - eval_rouge1: 0.4082
21
- - eval_rouge2: 0.1596
22
- - eval_rougeL: 0.3068
23
- - eval_rougeLsum: 0.3073
24
- - eval_generated_length: 36.5303
25
- - eval_runtime: 99.8804
26
- - eval_samples_per_second: 1.322
27
- - eval_steps_per_second: 0.661
28
- - epoch: 1.0
29
- - step: 263
30
 
31
  ## Model description
32
 
@@ -46,17 +36,25 @@ More information needed
46
 
47
  The following hyperparameters were used during training:
48
  - learning_rate: 2e-05
49
- - train_batch_size: 2
50
- - eval_batch_size: 2
51
  - seed: 42
52
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
  - lr_scheduler_type: linear
54
  - num_epochs: 2
55
  - mixed_precision_training: Native AMP
56
 
 
 
 
 
 
 
 
 
57
  ### Framework versions
58
 
59
- - Transformers 4.45.1
60
  - Pytorch 2.4.1+cu121
61
  - Datasets 3.0.1
62
- - Tokenizers 0.20.0
 
1
  ---
2
  library_name: transformers
3
  license: mit
4
+ base_model: facebook/bart-large-xsum
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # highlight_summary_model_trained_on_reduced_data
16
 
17
+ This model is a fine-tuned version of [facebook/bart-large-xsum](https://huggingface.co/facebook/bart-large-xsum) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.7400
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 2e-05
39
+ - train_batch_size: 8
40
+ - eval_batch_size: 8
41
  - seed: 42
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
  - num_epochs: 2
45
  - mixed_precision_training: Native AMP
46
 
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss |
50
+ |:-------------:|:-----:|:----:|:---------------:|
51
+ | No log | 1.0 | 274 | 1.7445 |
52
+ | 1.6984 | 2.0 | 548 | 1.7400 |
53
+
54
+
55
  ### Framework versions
56
 
57
+ - Transformers 4.44.2
58
  - Pytorch 2.4.1+cu121
59
  - Datasets 3.0.1
60
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "SumanNazir/highlight_summary_model_trained_on_reduced_data",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
@@ -19,7 +19,7 @@
19
  "decoder_layers": 12,
20
  "decoder_start_token_id": 2,
21
  "dropout": 0.1,
22
- "early_stopping": null,
23
  "encoder_attention_heads": 16,
24
  "encoder_ffn_dim": 4096,
25
  "encoder_layerdrop": 0.0,
@@ -42,14 +42,14 @@
42
  "LABEL_1": 1,
43
  "LABEL_2": 2
44
  },
45
- "max_length": null,
46
  "max_position_embeddings": 1024,
47
- "min_length": null,
48
  "model_type": "bart",
49
- "no_repeat_ngram_size": null,
50
  "normalize_before": false,
51
  "normalize_embedding": true,
52
- "num_beams": null,
53
  "num_hidden_layers": 12,
54
  "output_past": true,
55
  "pad_token_id": 1,
@@ -61,7 +61,7 @@
61
  "student_encoder_layers": null,
62
  "task_specific_params": {},
63
  "torch_dtype": "float32",
64
- "transformers_version": "4.45.1",
65
  "use_cache": true,
66
  "vocab_size": 50264
67
  }
 
1
  {
2
+ "_name_or_path": "facebook/bart-large-xsum",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
19
  "decoder_layers": 12,
20
  "decoder_start_token_id": 2,
21
  "dropout": 0.1,
22
+ "early_stopping": true,
23
  "encoder_attention_heads": 16,
24
  "encoder_ffn_dim": 4096,
25
  "encoder_layerdrop": 0.0,
 
42
  "LABEL_1": 1,
43
  "LABEL_2": 2
44
  },
45
+ "max_length": 62,
46
  "max_position_embeddings": 1024,
47
+ "min_length": 11,
48
  "model_type": "bart",
49
+ "no_repeat_ngram_size": 3,
50
  "normalize_before": false,
51
  "normalize_embedding": true,
52
+ "num_beams": 6,
53
  "num_hidden_layers": 12,
54
  "output_past": true,
55
  "pad_token_id": 1,
 
61
  "student_encoder_layers": null,
62
  "task_specific_params": {},
63
  "torch_dtype": "float32",
64
+ "transformers_version": "4.44.2",
65
  "use_cache": true,
66
  "vocab_size": 50264
67
  }
generation_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "bos_token_id": 0,
3
  "decoder_start_token_id": 2,
4
  "early_stopping": true,
@@ -9,5 +10,5 @@
9
  "no_repeat_ngram_size": 3,
10
  "num_beams": 6,
11
  "pad_token_id": 1,
12
- "transformers_version": "4.45.1"
13
  }
 
1
  {
2
+ "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
5
  "early_stopping": true,
 
10
  "no_repeat_ngram_size": 3,
11
  "num_beams": 6,
12
  "pad_token_id": 1,
13
+ "transformers_version": "4.44.2"
14
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef883e2503e6ea2e90b66392be46dbbc26fc523050c806bed6c6207277e15a87
3
  size 1625422896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce55fd82004d417b5cef14b05849fc9dfaa6d8286f5b40bb4b4d6458f51a6ebc
3
  size 1625422896
runs/Sep30_19-58-10_13959885f477/events.out.tfevents.1727726291.13959885f477.218.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e261134492a90d04a7ccb2aea0997c93d4d03fa456e53eb5c1d318225e370655
3
+ size 6300
special_tokens_map.json CHANGED
@@ -1,25 +1,7 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
@@ -27,25 +9,7 @@
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -48,14 +48,10 @@
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
51
- "max_length": 128,
52
  "model_max_length": 1024,
53
  "pad_token": "<pad>",
54
  "sep_token": "</s>",
55
- "stride": 0,
56
  "tokenizer_class": "BartTokenizer",
57
  "trim_offsets": true,
58
- "truncation_side": "right",
59
- "truncation_strategy": "longest_first",
60
  "unk_token": "<unk>"
61
  }
 
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
  "mask_token": "<mask>",
 
51
  "model_max_length": 1024,
52
  "pad_token": "<pad>",
53
  "sep_token": "</s>",
 
54
  "tokenizer_class": "BartTokenizer",
55
  "trim_offsets": true,
 
 
56
  "unk_token": "<unk>"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:040a829b06db162606556c2f6239102a308d4530eaa19d03f1bf19f978ba81dd
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:566c9ecb6f250c606f2f481a33a57aaa068c75a76e5bf7bec15d879ae45b7437
3
+ size 5368