kyryl-georgian commited on
Commit
52bdbb7
1 Parent(s): 6da15f5

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- library_name: peft
4
  tags:
5
  - generated_from_trainer
6
- base_model: google/flan-t5-small
7
  model-index:
8
  - name: flan-base-sql
9
  results: []
@@ -16,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.1072
20
 
21
  ## Model description
22
 
@@ -51,23 +50,12 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 0.4003 | 0.9 | 500 | 0.1629 |
55
- | 0.2314 | 1.81 | 1000 | 0.1386 |
56
- | 0.2065 | 2.71 | 1500 | 0.1289 |
57
- | 0.187 | 3.62 | 2000 | 0.1233 |
58
- | 0.1791 | 4.52 | 2500 | 0.1169 |
59
- | 0.1713 | 5.42 | 3000 | 0.1153 |
60
- | 0.1661 | 6.33 | 3500 | 0.1122 |
61
- | 0.1604 | 7.23 | 4000 | 0.1085 |
62
- | 0.1574 | 8.14 | 4500 | 0.1099 |
63
- | 0.1541 | 9.04 | 5000 | 0.1064 |
64
- | 0.1521 | 9.95 | 5500 | 0.1071 |
65
 
66
 
67
  ### Framework versions
68
 
69
- - PEFT 0.7.1
70
- - Transformers 4.38.0
71
- - Pytorch 2.1.2+cu121
72
- - Datasets 2.17.0
73
- - Tokenizers 0.15.2
 
1
  ---
2
  license: apache-2.0
3
+ base_model: google/flan-t5-small
4
  tags:
5
  - generated_from_trainer
 
6
  model-index:
7
  - name: flan-base-sql
8
  results: []
 
15
 
16
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.1649
19
 
20
  ## Model description
21
 
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:-----:|:----:|:---------------:|
53
+ | 0.4024 | 6.33 | 500 | 0.1728 |
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
  ### Framework versions
57
 
58
+ - Transformers 4.36.0.dev0
59
+ - Pytorch 2.1.0+cu118
60
+ - Datasets 2.14.6
61
+ - Tokenizers 0.14.1
 
adapter_config.json CHANGED
@@ -19,8 +19,9 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "v",
23
- "q"
24
  ],
25
- "task_type": "SEQ_2_SEQ_LM"
 
26
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "q",
23
+ "v"
24
  ],
25
+ "task_type": "SEQ_2_SEQ_LM",
26
+ "use_rslora": false
27
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4af10a22bd170f6a681891351deeaa4edddd58bc62b4d0732320b6f5fcd408fd
3
  size 2765880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e5a6c074adf70a3081c0bd661b8d62426a9507595d7d7a2ec73bd4767e2067
3
  size 2765880
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 0.10718318819999695,
4
- "eval_runtime": 3.0141,
5
- "eval_samples_per_second": 2607.118,
6
- "eval_steps_per_second": 20.57,
7
- "train_loss": 0.19658087959772425,
8
- "train_runtime": 755.9435,
9
- "train_samples_per_second": 935.506,
10
- "train_steps_per_second": 7.315
11
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 0.16487877070903778,
4
+ "eval_runtime": 2.8546,
5
+ "eval_samples_per_second": 2752.375,
6
+ "eval_steps_per_second": 21.719,
7
+ "train_loss": 0.3472654608231557,
8
+ "train_runtime": 106.5441,
9
+ "train_samples_per_second": 938.578,
10
+ "train_steps_per_second": 7.415
11
  }
emissions.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ timestamp,experiment_id,project_name,duration,emissions,energy_consumed,country_name,country_iso_code,region,on_cloud,cloud_provider,cloud_region
2
+ 2024-02-24T00:05:40,16ea20c8-bcb1-453e-bce2-f6cb0599f084,codecarbon,0.3465697765350342,2.382074432305905e-05,6.453136242463374e-05,United States,USA,virginia,N,,
3
+ 2024-02-24T00:08:29,9fde16a4-8a69-4bb9-a34b-f48f907546a5,codecarbon,0.3421628475189209,2.3325881593192804e-05,6.319075922020199e-05,United States,USA,virginia,N,,
4
+ 2024-02-24T00:12:44,d9ffb887-54e6-41cc-a071-684630728cdc,codecarbon,106.2760329246521,0.012281562312257426,0.03327125038430488,United States,USA,virginia,N,,
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 0.10718318819999695,
4
- "eval_runtime": 3.0141,
5
- "eval_samples_per_second": 2607.118,
6
- "eval_steps_per_second": 20.57
7
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 0.16487877070903778,
4
+ "eval_runtime": 2.8546,
5
+ "eval_samples_per_second": 2752.375,
6
+ "eval_steps_per_second": 21.719
7
  }
runs/Feb24_00-04-25_bd236bb80193/events.out.tfevents.1708733095.bd236bb80193.37.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14bd2e3ee3c5b4adde9589d9edd0fe5032b6ac6246666af865022bb732304326
3
+ size 5342
runs/Feb24_00-05-16_bd236bb80193/events.out.tfevents.1708733139.bd236bb80193.413.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:056ffc6ba8d477c8d7a49b8aed88ea3551315eb633fbd72eae1b4520cb6024c1
3
+ size 5691
runs/Feb24_00-05-16_bd236bb80193/events.out.tfevents.1708733153.bd236bb80193.413.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a923660a4f2cfd27f425ca843977587f67ee4cfd04b0c3883553ba83d9fb6df
3
+ size 354
runs/Feb24_00-08-06_bd236bb80193/events.out.tfevents.1708733309.bd236bb80193.988.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:933752e60e1039ea736004287e5574a9b6b48e73fbfb2b69c188daa264b5bcd3
3
+ size 5691
runs/Feb24_00-08-06_bd236bb80193/events.out.tfevents.1708733322.bd236bb80193.988.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb40150c9eef6df850249faccc4638b6810165c4ff7c89209825de66112280fa
3
+ size 354
runs/Feb24_00-09-32_bd236bb80193/events.out.tfevents.1708733457.bd236bb80193.1435.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e14fc39e5a496ee49bb915026dd342822afd401f51a75283ac2914f25a938f
3
+ size 6123
runs/Feb24_00-09-32_bd236bb80193/events.out.tfevents.1708733567.bd236bb80193.1435.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f842e830a7e5b485c076d40c0993281b6b462bb67af613bd03170c51e94bfd
3
+ size 359
special_tokens_map.json CHANGED
@@ -101,25 +101,7 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": {
105
- "content": "</s>",
106
- "lstrip": false,
107
- "normalized": false,
108
- "rstrip": false,
109
- "single_word": false
110
- },
111
- "pad_token": {
112
- "content": "<pad>",
113
- "lstrip": false,
114
- "normalized": false,
115
- "rstrip": false,
116
- "single_word": false
117
- },
118
- "unk_token": {
119
- "content": "<unk>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false
124
- }
125
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json CHANGED
@@ -964,8 +964,7 @@
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
- "add_prefix_space": true,
968
- "prepend_scheme": "always"
969
  },
970
  "post_processor": {
971
  "type": "TemplateProcessing",
@@ -1024,8 +1023,7 @@
1024
  "decoder": {
1025
  "type": "Metaspace",
1026
  "replacement": "▁",
1027
- "add_prefix_space": true,
1028
- "prepend_scheme": "always"
1029
  },
1030
  "model": {
1031
  "type": "Unigram",
 
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
+ "add_prefix_space": true
 
968
  },
969
  "post_processor": {
970
  "type": "TemplateProcessing",
 
1023
  "decoder": {
1024
  "type": "Metaspace",
1025
  "replacement": "▁",
1026
+ "add_prefix_space": true
 
1027
  },
1028
  "model": {
1029
  "type": "Unigram",
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 10.0,
3
- "train_loss": 0.19658087959772425,
4
- "train_runtime": 755.9435,
5
- "train_samples_per_second": 935.506,
6
- "train_steps_per_second": 7.315
7
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "train_loss": 0.3472654608231557,
4
+ "train_runtime": 106.5441,
5
+ "train_samples_per_second": 938.578,
6
+ "train_steps_per_second": 7.415
7
  }
trainer_state.json CHANGED
@@ -3,193 +3,40 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 5530,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.9,
13
- "grad_norm": 0.1664367914199829,
14
- "learning_rate": 0.0009095840867992767,
15
- "loss": 0.4003,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.9,
20
- "eval_loss": 0.162927508354187,
21
- "eval_runtime": 3.0318,
22
- "eval_samples_per_second": 2591.854,
23
- "eval_steps_per_second": 20.45,
24
- "step": 500
25
- },
26
- {
27
- "epoch": 1.81,
28
- "grad_norm": 0.1386815905570984,
29
- "learning_rate": 0.0008191681735985533,
30
- "loss": 0.2314,
31
- "step": 1000
32
- },
33
- {
34
- "epoch": 1.81,
35
- "eval_loss": 0.1386137157678604,
36
- "eval_runtime": 3.0434,
37
- "eval_samples_per_second": 2582.007,
38
- "eval_steps_per_second": 20.372,
39
- "step": 1000
40
- },
41
- {
42
- "epoch": 2.71,
43
- "grad_norm": 0.1781063824892044,
44
- "learning_rate": 0.0007287522603978301,
45
- "loss": 0.2065,
46
- "step": 1500
47
- },
48
- {
49
- "epoch": 2.71,
50
- "eval_loss": 0.1289130598306656,
51
- "eval_runtime": 3.0677,
52
- "eval_samples_per_second": 2561.542,
53
- "eval_steps_per_second": 20.211,
54
- "step": 1500
55
- },
56
- {
57
- "epoch": 3.62,
58
- "grad_norm": 0.15570929646492004,
59
- "learning_rate": 0.0006383363471971068,
60
- "loss": 0.187,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 3.62,
65
- "eval_loss": 0.12326223403215408,
66
- "eval_runtime": 3.0605,
67
- "eval_samples_per_second": 2567.579,
68
- "eval_steps_per_second": 20.258,
69
- "step": 2000
70
- },
71
- {
72
- "epoch": 4.52,
73
- "grad_norm": 0.16776247322559357,
74
- "learning_rate": 0.0005479204339963833,
75
- "loss": 0.1791,
76
- "step": 2500
77
- },
78
- {
79
- "epoch": 4.52,
80
- "eval_loss": 0.1168670803308487,
81
- "eval_runtime": 3.0473,
82
- "eval_samples_per_second": 2578.705,
83
- "eval_steps_per_second": 20.346,
84
- "step": 2500
85
- },
86
- {
87
- "epoch": 5.42,
88
- "grad_norm": 0.1355486512184143,
89
- "learning_rate": 0.0004575045207956601,
90
- "loss": 0.1713,
91
- "step": 3000
92
- },
93
- {
94
- "epoch": 5.42,
95
- "eval_loss": 0.11528698354959488,
96
- "eval_runtime": 3.0013,
97
- "eval_samples_per_second": 2618.163,
98
- "eval_steps_per_second": 20.657,
99
- "step": 3000
100
- },
101
  {
102
  "epoch": 6.33,
103
- "grad_norm": 0.16372531652450562,
104
  "learning_rate": 0.0003670886075949367,
105
- "loss": 0.1661,
106
- "step": 3500
107
  },
108
  {
109
  "epoch": 6.33,
110
- "eval_loss": 0.11218289285898209,
111
- "eval_runtime": 2.9586,
112
- "eval_samples_per_second": 2655.959,
113
- "eval_steps_per_second": 20.956,
114
- "step": 3500
115
- },
116
- {
117
- "epoch": 7.23,
118
- "grad_norm": 0.1596778929233551,
119
- "learning_rate": 0.0002766726943942134,
120
- "loss": 0.1604,
121
- "step": 4000
122
- },
123
- {
124
- "epoch": 7.23,
125
- "eval_loss": 0.1085081547498703,
126
- "eval_runtime": 2.9539,
127
- "eval_samples_per_second": 2660.243,
128
- "eval_steps_per_second": 20.989,
129
- "step": 4000
130
- },
131
- {
132
- "epoch": 8.14,
133
- "grad_norm": 0.15582768619060516,
134
- "learning_rate": 0.00018625678119349006,
135
- "loss": 0.1574,
136
- "step": 4500
137
- },
138
- {
139
- "epoch": 8.14,
140
- "eval_loss": 0.1098729744553566,
141
- "eval_runtime": 2.9739,
142
- "eval_samples_per_second": 2642.311,
143
- "eval_steps_per_second": 20.848,
144
- "step": 4500
145
- },
146
- {
147
- "epoch": 9.04,
148
- "grad_norm": 0.15063905715942383,
149
- "learning_rate": 9.584086799276672e-05,
150
- "loss": 0.1541,
151
- "step": 5000
152
- },
153
- {
154
- "epoch": 9.04,
155
- "eval_loss": 0.10638037323951721,
156
- "eval_runtime": 3.0665,
157
- "eval_samples_per_second": 2562.534,
158
- "eval_steps_per_second": 20.219,
159
- "step": 5000
160
- },
161
- {
162
- "epoch": 9.95,
163
- "grad_norm": 0.14130930602550507,
164
- "learning_rate": 5.4249547920433995e-06,
165
- "loss": 0.1521,
166
- "step": 5500
167
- },
168
- {
169
- "epoch": 9.95,
170
- "eval_loss": 0.1071261540055275,
171
- "eval_runtime": 3.024,
172
- "eval_samples_per_second": 2598.51,
173
- "eval_steps_per_second": 20.502,
174
- "step": 5500
175
  },
176
  {
177
  "epoch": 10.0,
178
- "step": 5530,
179
- "total_flos": 2.2872619342626816e+16,
180
- "train_loss": 0.19658087959772425,
181
- "train_runtime": 755.9435,
182
- "train_samples_per_second": 935.506,
183
- "train_steps_per_second": 7.315
184
  }
185
  ],
186
  "logging_steps": 500,
187
- "max_steps": 5530,
188
- "num_input_tokens_seen": 0,
189
  "num_train_epochs": 10,
190
  "save_steps": 500,
191
- "total_flos": 2.2872619342626816e+16,
192
- "train_batch_size": 16,
193
  "trial_name": null,
194
  "trial_params": null
195
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 6.33,
 
13
  "learning_rate": 0.0003670886075949367,
14
+ "loss": 0.4024,
15
+ "step": 500
16
  },
17
  {
18
  "epoch": 6.33,
19
+ "eval_loss": 0.17280669510364532,
20
+ "eval_runtime": 2.8781,
21
+ "eval_samples_per_second": 2729.968,
22
+ "eval_steps_per_second": 21.542,
23
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 10.0,
27
+ "step": 790,
28
+ "total_flos": 3267517032169472.0,
29
+ "train_loss": 0.3472654608231557,
30
+ "train_runtime": 106.5441,
31
+ "train_samples_per_second": 938.578,
32
+ "train_steps_per_second": 7.415
33
  }
34
  ],
35
  "logging_steps": 500,
36
+ "max_steps": 790,
 
37
  "num_train_epochs": 10,
38
  "save_steps": 500,
39
+ "total_flos": 3267517032169472.0,
 
40
  "trial_name": null,
41
  "trial_params": null
42
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1791b93576383f366ada713fe62fa1a5066567f1c635c3f329bc8f36e8673a58
3
- size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73516ab6f53c226d6a40128d334ce1313b9d34fdf37c818f3cb4ba28312154df
3
+ size 4856