node-py commited on
Commit
52ca4de
1 Parent(s): 842910f

Training in progress epoch 0

Browse files
Files changed (5) hide show
  1. README.md +4 -53
  2. special_tokens_map.json +4 -3
  3. tf_model.h5 +1 -1
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +38 -11
README.md CHANGED
@@ -15,9 +15,9 @@ probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Train Loss: 1.4707
19
- - Validation Loss: 1.5878
20
- - Epoch: 49
21
 
22
  ## Model description
23
 
@@ -43,56 +43,7 @@ The following hyperparameters were used during training:
43
 
44
  | Train Loss | Validation Loss | Epoch |
45
  |:----------:|:---------------:|:-----:|
46
- | 2.5315 | 2.2372 | 0 |
47
- | 2.2709 | 2.1303 | 1 |
48
- | 2.1837 | 2.0685 | 2 |
49
- | 2.1268 | 2.0216 | 3 |
50
- | 2.0821 | 1.9830 | 4 |
51
- | 2.0436 | 1.9497 | 5 |
52
- | 2.0105 | 1.9194 | 6 |
53
- | 1.9810 | 1.8955 | 7 |
54
- | 1.9552 | 1.8767 | 8 |
55
- | 1.9311 | 1.8544 | 9 |
56
- | 1.9080 | 1.8386 | 10 |
57
- | 1.8864 | 1.8183 | 11 |
58
- | 1.8676 | 1.7983 | 12 |
59
- | 1.8487 | 1.7856 | 13 |
60
- | 1.8304 | 1.7766 | 14 |
61
- | 1.8150 | 1.7672 | 15 |
62
- | 1.7992 | 1.7472 | 16 |
63
- | 1.7841 | 1.7402 | 17 |
64
- | 1.7687 | 1.7266 | 18 |
65
- | 1.7554 | 1.7215 | 19 |
66
- | 1.7422 | 1.7091 | 20 |
67
- | 1.7279 | 1.7099 | 21 |
68
- | 1.7163 | 1.6969 | 22 |
69
- | 1.7051 | 1.6856 | 23 |
70
- | 1.6925 | 1.6795 | 24 |
71
- | 1.6819 | 1.6712 | 25 |
72
- | 1.6709 | 1.6665 | 26 |
73
- | 1.6593 | 1.6606 | 27 |
74
- | 1.6504 | 1.6572 | 28 |
75
- | 1.6402 | 1.6542 | 29 |
76
- | 1.6308 | 1.6493 | 30 |
77
- | 1.6205 | 1.6393 | 31 |
78
- | 1.6104 | 1.6329 | 32 |
79
- | 1.5999 | 1.6361 | 33 |
80
- | 1.5915 | 1.6329 | 34 |
81
- | 1.5832 | 1.6229 | 35 |
82
- | 1.5746 | 1.6142 | 36 |
83
- | 1.5653 | 1.6131 | 37 |
84
- | 1.5581 | 1.6169 | 38 |
85
- | 1.5495 | 1.6107 | 39 |
86
- | 1.5410 | 1.6084 | 40 |
87
- | 1.5328 | 1.6017 | 41 |
88
- | 1.5242 | 1.5968 | 42 |
89
- | 1.5165 | 1.5964 | 43 |
90
- | 1.5085 | 1.5911 | 44 |
91
- | 1.5010 | 1.5917 | 45 |
92
- | 1.4938 | 1.5934 | 46 |
93
- | 1.4860 | 1.5896 | 47 |
94
- | 1.4790 | 1.5850 | 48 |
95
- | 1.4707 | 1.5878 | 49 |
96
 
97
 
98
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Train Loss: 7.9915
19
+ - Validation Loss: 6.1584
20
+ - Epoch: 0
21
 
22
  ## Model description
23
 
 
43
 
44
  | Train Loss | Validation Loss | Epoch |
45
  |:----------:|:---------------:|:-----:|
46
+ | 7.9915 | 6.1584 | 0 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  ### Framework versions
special_tokens_map.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
  "pad_token": "[PAD]",
5
- "unk_token": "<|endoftext|>"
 
6
  }
 
1
  {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
  "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e1a9b834a0baf37fd58186a4c06092deaed095b71f1173db960202ba99198df
3
  size 327745472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a20591b2085ece3610748f32c8387eb25018b18b393976b335e288389a83869
3
  size 327745472
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,16 +1,39 @@
1
  {
2
- "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "50256": {
5
- "content": "<|endoftext|>",
6
  "lstrip": false,
7
- "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
- "50257": {
13
- "content": "[PAD]",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,11 +41,15 @@
18
  "special": true
19
  }
20
  },
21
- "bos_token": "<|endoftext|>",
22
  "clean_up_tokenization_spaces": true,
23
- "eos_token": "<|endoftext|>",
24
- "model_max_length": 1024,
 
 
25
  "pad_token": "[PAD]",
26
- "tokenizer_class": "GPT2Tokenizer",
27
- "unk_token": "<|endoftext|>"
 
 
 
28
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
  "lstrip": false,
6
+ "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
  "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
  }