Biscottezi
commited on
Commit
•
0dbe221
1
Parent(s):
1f7c896
End of training
Browse files- README.md +79 -0
- config.json +32 -0
- generation_config.json +6 -0
- model.safetensors +3 -0
- runs/Oct02_18-03-35_5232ce4ff845/events.out.tfevents.1727892221.5232ce4ff845.505.0 +3 -0
- runs/Oct02_18-06-50_5232ce4ff845/events.out.tfevents.1727892415.5232ce4ff845.28340.0 +3 -0
- runs/Oct02_18-09-45_5232ce4ff845/events.out.tfevents.1727892585.5232ce4ff845.29210.0 +3 -0
- runs/Oct02_18-37-38_f2b0bfe08033/events.out.tfevents.1727894260.f2b0bfe08033.344.0 +3 -0
- runs/Oct02_18-40-15_f2b0bfe08033/events.out.tfevents.1727894418.f2b0bfe08033.344.1 +3 -0
- runs/Oct02_18-40-46_f2b0bfe08033/events.out.tfevents.1727894451.f2b0bfe08033.344.2 +3 -0
- runs/Oct04_06-24-24_f17b6c91b460/events.out.tfevents.1728023067.f17b6c91b460.527.0 +3 -0
- runs/Oct04_06-32-52_f17b6c91b460/events.out.tfevents.1728023583.f17b6c91b460.527.1 +3 -0
- runs/Oct04_06-59-53_a8ec5cfd56c3/events.out.tfevents.1728025195.a8ec5cfd56c3.1716.0 +3 -0
- runs/Oct04_07-09-09_fa3de561d834/events.out.tfevents.1728025751.fa3de561d834.1024.0 +3 -0
- runs/Oct04_08-44-56_1f3fbff7071c/events.out.tfevents.1728031498.1f3fbff7071c.618.0 +3 -0
- runs/Oct04_09-01-59_1f3fbff7071c/events.out.tfevents.1728032597.1f3fbff7071c.618.1 +3 -0
- runs/Oct04_09-04-56_1f3fbff7071c/events.out.tfevents.1728032706.1f3fbff7071c.618.2 +3 -0
- runs/Oct04_09-08-33_607d790ee641/events.out.tfevents.1728032914.607d790ee641.234.0 +3 -0
- runs/Oct11_11-17-47_b33ef8978df7/events.out.tfevents.1728645474.b33ef8978df7.228.0 +3 -0
- runs/Oct12_02-22-19_052bcb00e409/events.out.tfevents.1728699742.052bcb00e409.200.0 +3 -0
- special_tokens_map.json +23 -0
- spiece.model +3 -0
- tokenizer_config.json +39 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: apache-2.0
|
4 |
+
base_model: google/mt5-small
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
+
datasets:
|
8 |
+
- vitext2sql
|
9 |
+
metrics:
|
10 |
+
- bleu
|
11 |
+
model-index:
|
12 |
+
- name: TrainedModels
|
13 |
+
results:
|
14 |
+
- task:
|
15 |
+
name: Sequence-to-sequence Language Modeling
|
16 |
+
type: text2text-generation
|
17 |
+
dataset:
|
18 |
+
name: vitext2sql
|
19 |
+
type: vitext2sql
|
20 |
+
config: vitext2sql_source
|
21 |
+
split: test
|
22 |
+
args: vitext2sql_source
|
23 |
+
metrics:
|
24 |
+
- name: Bleu
|
25 |
+
type: bleu
|
26 |
+
value: 5.1741
|
27 |
+
---
|
28 |
+
|
29 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
30 |
+
should probably proofread and complete it, then remove this comment. -->
|
31 |
+
|
32 |
+
# TrainedModels
|
33 |
+
|
34 |
+
This model is a fine-tuned version of [google/mt5-small](https://huggingface.co/google/mt5-small) on the vitext2sql dataset.
|
35 |
+
It achieves the following results on the evaluation set:
|
36 |
+
- Loss: 0.7564
|
37 |
+
- Bleu: 5.1741
|
38 |
+
- Gen Len: 18.1829
|
39 |
+
|
40 |
+
## Model description
|
41 |
+
|
42 |
+
More information needed
|
43 |
+
|
44 |
+
## Intended uses & limitations
|
45 |
+
|
46 |
+
More information needed
|
47 |
+
|
48 |
+
## Training and evaluation data
|
49 |
+
|
50 |
+
More information needed
|
51 |
+
|
52 |
+
## Training procedure
|
53 |
+
|
54 |
+
### Training hyperparameters
|
55 |
+
|
56 |
+
The following hyperparameters were used during training:
|
57 |
+
- learning_rate: 5e-05
|
58 |
+
- train_batch_size: 1
|
59 |
+
- eval_batch_size: 1
|
60 |
+
- seed: 42
|
61 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
62 |
+
- lr_scheduler_type: linear
|
63 |
+
- num_epochs: 3
|
64 |
+
|
65 |
+
### Training results
|
66 |
+
|
67 |
+
| Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
|
68 |
+
|:-------------:|:-----:|:-----:|:---------------:|:------:|:-------:|
|
69 |
+
| 0.9517 | 1.0 | 6831 | 0.8409 | 3.1104 | 16.164 |
|
70 |
+
| 0.7029 | 2.0 | 13662 | 0.7696 | 4.9487 | 18.153 |
|
71 |
+
| 0.6078 | 3.0 | 20493 | 0.7564 | 5.1741 | 18.1829 |
|
72 |
+
|
73 |
+
|
74 |
+
### Framework versions
|
75 |
+
|
76 |
+
- Transformers 4.44.2
|
77 |
+
- Pytorch 2.4.1+cu121
|
78 |
+
- Datasets 3.0.1
|
79 |
+
- Tokenizers 0.19.1
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/mt5-small",
|
3 |
+
"architectures": [
|
4 |
+
"MT5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"classifier_dropout": 0.0,
|
7 |
+
"d_ff": 1024,
|
8 |
+
"d_kv": 64,
|
9 |
+
"d_model": 512,
|
10 |
+
"decoder_start_token_id": 0,
|
11 |
+
"dense_act_fn": "gelu_new",
|
12 |
+
"dropout_rate": 0.1,
|
13 |
+
"eos_token_id": 1,
|
14 |
+
"feed_forward_proj": "gated-gelu",
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"is_encoder_decoder": true,
|
17 |
+
"is_gated_act": true,
|
18 |
+
"layer_norm_epsilon": 1e-06,
|
19 |
+
"model_type": "mt5",
|
20 |
+
"num_decoder_layers": 8,
|
21 |
+
"num_heads": 6,
|
22 |
+
"num_layers": 8,
|
23 |
+
"pad_token_id": 0,
|
24 |
+
"relative_attention_max_distance": 128,
|
25 |
+
"relative_attention_num_buckets": 32,
|
26 |
+
"tie_word_embeddings": false,
|
27 |
+
"tokenizer_class": "T5Tokenizer",
|
28 |
+
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.44.2",
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 250112
|
32 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"decoder_start_token_id": 0,
|
3 |
+
"eos_token_id": 1,
|
4 |
+
"pad_token_id": 0,
|
5 |
+
"transformers_version": "4.44.2"
|
6 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f063180e1f0bbd1839414a55a5b38b96249a1378cd3b1d623a98a0dd14653be4
|
3 |
+
size 1200729512
|
runs/Oct02_18-03-35_5232ce4ff845/events.out.tfevents.1727892221.5232ce4ff845.505.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca214345b79bbd957c2e30e120951c8e906de9ccaea378932b0d41c906a51a85
|
3 |
+
size 88
|
runs/Oct02_18-06-50_5232ce4ff845/events.out.tfevents.1727892415.5232ce4ff845.28340.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:337148d97b95960926a020646810f814133ed0473d2a943c43a59240a07d02da
|
3 |
+
size 88
|
runs/Oct02_18-09-45_5232ce4ff845/events.out.tfevents.1727892585.5232ce4ff845.29210.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7604cc742296d21e58e47378e31cf0e5b199b197c815c2c98a38d8c4b4a5e455
|
3 |
+
size 5332
|
runs/Oct02_18-37-38_f2b0bfe08033/events.out.tfevents.1727894260.f2b0bfe08033.344.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b87c1b1a4a20385767d6da5d2f5100fc949317c23fbab6eb3bed1e0693ba7b73
|
3 |
+
size 5330
|
runs/Oct02_18-40-15_f2b0bfe08033/events.out.tfevents.1727894418.f2b0bfe08033.344.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3b81e79bf1302dceeb15e62dfbc3808326ab7d900a38a6930cfbb7671ddc024
|
3 |
+
size 5330
|
runs/Oct02_18-40-46_f2b0bfe08033/events.out.tfevents.1727894451.f2b0bfe08033.344.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e027207e94b783f971c825210e1f3d36eebc83c64cfe6c80911364dc10ce51d4
|
3 |
+
size 5542
|
runs/Oct04_06-24-24_f17b6c91b460/events.out.tfevents.1728023067.f17b6c91b460.527.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8427a217dd09f7faeecc7738483f849f60be982c0d0a0e40bb0a4f66868232e7
|
3 |
+
size 5542
|
runs/Oct04_06-32-52_f17b6c91b460/events.out.tfevents.1728023583.f17b6c91b460.527.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2ad4018fc90a9b519a711c2bd8dc62ec2a7ac530ec8adead45a4939e87d32bf
|
3 |
+
size 5331
|
runs/Oct04_06-59-53_a8ec5cfd56c3/events.out.tfevents.1728025195.a8ec5cfd56c3.1716.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f904334e1577c60dec03b7d54e502f4a0e1ae7b711bb137af38f36cd1d9747d2
|
3 |
+
size 5331
|
runs/Oct04_07-09-09_fa3de561d834/events.out.tfevents.1728025751.fa3de561d834.1024.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e28a4ec4d38594b432280b3bae8df801be73b44e8e9fec36f766b10671942b4c
|
3 |
+
size 88
|
runs/Oct04_08-44-56_1f3fbff7071c/events.out.tfevents.1728031498.1f3fbff7071c.618.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce3bf31da1debaec38c5815693e96613d9610f87f3a3ba30ad1191e142221f3f
|
3 |
+
size 5331
|
runs/Oct04_09-01-59_1f3fbff7071c/events.out.tfevents.1728032597.1f3fbff7071c.618.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddd3dd109775ab0ec66d392ce8d40583c3998b9d3543ada21b3d0c4e655f58ed
|
3 |
+
size 5331
|
runs/Oct04_09-04-56_1f3fbff7071c/events.out.tfevents.1728032706.1f3fbff7071c.618.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adc0b8916c6ec815828e00b84f9708348af06db6d3da2946a8abff7bc570d62e
|
3 |
+
size 88
|
runs/Oct04_09-08-33_607d790ee641/events.out.tfevents.1728032914.607d790ee641.234.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9773b6742addab102fc6e3dd8d159c2d6d5cd78a9a99b1cf9abe37f97102b498
|
3 |
+
size 88
|
runs/Oct11_11-17-47_b33ef8978df7/events.out.tfevents.1728645474.b33ef8978df7.228.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6dbcddffee0e81311751b06f632593af7271130f82442ee9174d3fc4e3857b3
|
3 |
+
size 88
|
runs/Oct12_02-22-19_052bcb00e409/events.out.tfevents.1728699742.052bcb00e409.200.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67dcf99a0db8e2abfcd87be90e1d19dee16fbd1b28a5ef002d663c2d4b72ebf5
|
3 |
+
size 15255
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": {
|
3 |
+
"content": "</s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"pad_token": {
|
10 |
+
"content": "<pad>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
|
3 |
+
size 4309802
|
tokenizer_config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": true,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<pad>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "</s>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "<unk>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
}
|
28 |
+
},
|
29 |
+
"additional_special_tokens": [],
|
30 |
+
"clean_up_tokenization_spaces": true,
|
31 |
+
"eos_token": "</s>",
|
32 |
+
"extra_ids": 0,
|
33 |
+
"legacy": true,
|
34 |
+
"model_max_length": 1000000000000000019884624838656,
|
35 |
+
"pad_token": "<pad>",
|
36 |
+
"sp_model_kwargs": {},
|
37 |
+
"tokenizer_class": "T5Tokenizer",
|
38 |
+
"unk_token": "<unk>"
|
39 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43438f978afdda92477b9069d96fdb105535e6a147e1e73d204a850fb7e9c2d5
|
3 |
+
size 5432
|