kyryl-georgian commited on
Commit
5e13998
1 Parent(s): 52bdbb7

End of training

Browse files
README.md CHANGED
@@ -1,8 +1,9 @@
1
  ---
2
  license: apache-2.0
3
- base_model: google/flan-t5-small
4
  tags:
5
  - generated_from_trainer
 
6
  model-index:
7
  - name: flan-base-sql
8
  results: []
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.1649
19
 
20
  ## Model description
21
 
@@ -38,24 +39,18 @@ The following hyperparameters were used during training:
38
  - train_batch_size: 16
39
  - eval_batch_size: 16
40
  - seed: 42
41
- - distributed_type: multi-GPU
42
- - num_devices: 8
43
- - total_train_batch_size: 128
44
- - total_eval_batch_size: 128
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
- - num_epochs: 10.0
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss |
52
- |:-------------:|:-----:|:----:|:---------------:|
53
- | 0.4024 | 6.33 | 500 | 0.1728 |
54
 
55
 
56
  ### Framework versions
57
 
58
- - Transformers 4.36.0.dev0
59
- - Pytorch 2.1.0+cu118
60
- - Datasets 2.14.6
61
- - Tokenizers 0.14.1
 
 
1
  ---
2
  license: apache-2.0
3
+ library_name: peft
4
  tags:
5
  - generated_from_trainer
6
+ base_model: google/flan-t5-small
7
  model-index:
8
  - name: flan-base-sql
9
  results: []
 
16
 
17
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 2.0196
20
 
21
  ## Model description
22
 
 
39
  - train_batch_size: 16
40
  - eval_batch_size: 16
41
  - seed: 42
 
 
 
 
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
+ - num_epochs: 0.0001
45
 
46
  ### Training results
47
 
 
 
 
48
 
49
 
50
  ### Framework versions
51
 
52
+ - PEFT 0.7.1
53
+ - Transformers 4.38.0
54
+ - Pytorch 2.1.2+cu121
55
+ - Datasets 2.17.0
56
+ - Tokenizers 0.15.2
adapter_config.json CHANGED
@@ -19,9 +19,8 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "q",
23
- "v"
24
  ],
25
- "task_type": "SEQ_2_SEQ_LM",
26
- "use_rslora": false
27
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "v",
23
+ "q"
24
  ],
25
+ "task_type": "SEQ_2_SEQ_LM"
 
26
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41e5a6c074adf70a3081c0bd661b8d62426a9507595d7d7a2ec73bd4767e2067
3
  size 2765880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a437b988026fcdec3ff95b8370ab7dc9f9498d23bb52b7d853d82bd45c24a1
3
  size 2765880
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 0.16487877070903778,
4
- "eval_runtime": 2.8546,
5
- "eval_samples_per_second": 2752.375,
6
- "eval_steps_per_second": 21.719,
7
- "train_loss": 0.3472654608231557,
8
- "train_runtime": 106.5441,
9
- "train_samples_per_second": 938.578,
10
- "train_steps_per_second": 7.415
11
  }
 
1
  {
2
+ "epoch": 0.02,
3
+ "eval_loss": 2.0195727348327637,
4
+ "eval_runtime": 13.0603,
5
+ "eval_samples_per_second": 601.592,
6
+ "eval_steps_per_second": 37.671,
7
+ "train_loss": 2.8207314014434814,
8
+ "train_runtime": 0.8917,
9
+ "train_samples_per_second": 0.112,
10
+ "train_steps_per_second": 1.121
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 0.16487877070903778,
4
- "eval_runtime": 2.8546,
5
- "eval_samples_per_second": 2752.375,
6
- "eval_steps_per_second": 21.719
7
  }
 
1
  {
2
+ "epoch": 0.02,
3
+ "eval_loss": 2.0195727348327637,
4
+ "eval_runtime": 13.0603,
5
+ "eval_samples_per_second": 601.592,
6
+ "eval_steps_per_second": 37.671
7
  }
special_tokens_map.json CHANGED
@@ -101,7 +101,25 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
  }
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 47,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 47
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
@@ -964,7 +964,8 @@
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
- "add_prefix_space": true
 
968
  },
969
  "post_processor": {
970
  "type": "TemplateProcessing",
@@ -1023,7 +1024,8 @@
1023
  "decoder": {
1024
  "type": "Metaspace",
1025
  "replacement": "▁",
1026
- "add_prefix_space": true
 
1027
  },
1028
  "model": {
1029
  "type": "Unigram",
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 46,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 46
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
+ "add_prefix_space": true,
968
+ "prepend_scheme": "always"
969
  },
970
  "post_processor": {
971
  "type": "TemplateProcessing",
 
1024
  "decoder": {
1025
  "type": "Metaspace",
1026
  "replacement": "▁",
1027
+ "add_prefix_space": true,
1028
+ "prepend_scheme": "always"
1029
  },
1030
  "model": {
1031
  "type": "Unigram",
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 0.3472654608231557,
4
- "train_runtime": 106.5441,
5
- "train_samples_per_second": 938.578,
6
- "train_steps_per_second": 7.415
7
  }
 
1
  {
2
+ "epoch": 0.02,
3
+ "train_loss": 2.8207314014434814,
4
+ "train_runtime": 0.8917,
5
+ "train_samples_per_second": 0.112,
6
+ "train_steps_per_second": 1.121
7
  }
trainer_state.json CHANGED
@@ -1,42 +1,30 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 6.33,
13
- "learning_rate": 0.0003670886075949367,
14
- "loss": 0.4024,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 6.33,
19
- "eval_loss": 0.17280669510364532,
20
- "eval_runtime": 2.8781,
21
- "eval_samples_per_second": 2729.968,
22
- "eval_steps_per_second": 21.542,
23
- "step": 500
24
- },
25
- {
26
- "epoch": 10.0,
27
- "step": 790,
28
- "total_flos": 3267517032169472.0,
29
- "train_loss": 0.3472654608231557,
30
- "train_runtime": 106.5441,
31
- "train_samples_per_second": 938.578,
32
- "train_steps_per_second": 7.415
33
  }
34
  ],
35
  "logging_steps": 500,
36
- "max_steps": 790,
37
- "num_train_epochs": 10,
 
38
  "save_steps": 500,
39
- "total_flos": 3267517032169472.0,
 
40
  "trial_name": null,
41
  "trial_params": null
42
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.015873015873015872,
5
  "eval_steps": 500,
6
+ "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "step": 1,
14
+ "total_flos": 517012193280.0,
15
+ "train_loss": 2.8207314014434814,
16
+ "train_runtime": 0.8917,
17
+ "train_samples_per_second": 0.112,
18
+ "train_steps_per_second": 1.121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 1,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 1,
25
  "save_steps": 500,
26
+ "total_flos": 517012193280.0,
27
+ "train_batch_size": 16,
28
  "trial_name": null,
29
  "trial_params": null
30
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73516ab6f53c226d6a40128d334ce1313b9d34fdf37c818f3cb4ba28312154df
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc556c3de983d027da1ff74eb7ff292ea3fd4da4549339f39cafc1347592486
3
+ size 5112