Training in progress epoch 0

Browse files

Files changed (5) hide show

README.md +4 -53
special_tokens_map.json +4 -3
tf_model.h5 +1 -1
tokenizer.json +0 -0
tokenizer_config.json +38 -11

README.md CHANGED Viewed

@@ -15,9 +15,9 @@ probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Train Loss: 1.4707
-- Validation Loss: 1.5878
-- Epoch: 49
 ## Model description
@@ -43,56 +43,7 @@ The following hyperparameters were used during training:
 | Train Loss | Validation Loss | Epoch |
 |:----------:|:---------------:|:-----:|
-| 2.5315     | 2.2372          | 0     |
-| 2.2709     | 2.1303          | 1     |
-| 2.1837     | 2.0685          | 2     |
-| 2.1268     | 2.0216          | 3     |
-| 2.0821     | 1.9830          | 4     |
-| 2.0436     | 1.9497          | 5     |
-| 2.0105     | 1.9194          | 6     |
-| 1.9810     | 1.8955          | 7     |
-| 1.9552     | 1.8767          | 8     |
-| 1.9311     | 1.8544          | 9     |
-| 1.9080     | 1.8386          | 10    |
-| 1.8864     | 1.8183          | 11    |
-| 1.8676     | 1.7983          | 12    |
-| 1.8487     | 1.7856          | 13    |
-| 1.8304     | 1.7766          | 14    |
-| 1.8150     | 1.7672          | 15    |
-| 1.7992     | 1.7472          | 16    |
-| 1.7841     | 1.7402          | 17    |
-| 1.7687     | 1.7266          | 18    |
-| 1.7554     | 1.7215          | 19    |
-| 1.7422     | 1.7091          | 20    |
-| 1.7279     | 1.7099          | 21    |
-| 1.7163     | 1.6969          | 22    |
-| 1.7051     | 1.6856          | 23    |
-| 1.6925     | 1.6795          | 24    |
-| 1.6819     | 1.6712          | 25    |
-| 1.6709     | 1.6665          | 26    |
-| 1.6593     | 1.6606          | 27    |
-| 1.6504     | 1.6572          | 28    |
-| 1.6402     | 1.6542          | 29    |
-| 1.6308     | 1.6493          | 30    |
-| 1.6205     | 1.6393          | 31    |
-| 1.6104     | 1.6329          | 32    |
-| 1.5999     | 1.6361          | 33    |
-| 1.5915     | 1.6329          | 34    |
-| 1.5832     | 1.6229          | 35    |
-| 1.5746     | 1.6142          | 36    |
-| 1.5653     | 1.6131          | 37    |
-| 1.5581     | 1.6169          | 38    |
-| 1.5495     | 1.6107          | 39    |
-| 1.5410     | 1.6084          | 40    |
-| 1.5328     | 1.6017          | 41    |
-| 1.5242     | 1.5968          | 42    |
-| 1.5165     | 1.5964          | 43    |
-| 1.5085     | 1.5911          | 44    |
-| 1.5010     | 1.5917          | 45    |
-| 1.4938     | 1.5934          | 46    |
-| 1.4860     | 1.5896          | 47    |
-| 1.4790     | 1.5850          | 48    |
-| 1.4707     | 1.5878          | 49    |
 ### Framework versions

 This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Train Loss: 7.9915
+- Validation Loss: 6.1584
+- Epoch: 0
 ## Model description
 | Train Loss | Validation Loss | Epoch |
 |:----------:|:---------------:|:-----:|
+| 7.9915     | 6.1584          | 0     |
 ### Framework versions

special_tokens_map.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
-  "bos_token": "<|endoftext|>",
-  "eos_token": "<|endoftext|>",
   "pad_token": "[PAD]",
-  "unk_token": "<|endoftext|>"
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
   "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e1a9b834a0baf37fd58186a4c06092deaed095b71f1173db960202ba99198df
 size 327745472

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a20591b2085ece3610748f32c8387eb25018b18b393976b335e288389a83869
 size 327745472

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,16 +1,39 @@
 {
-  "add_prefix_space": false,
   "added_tokens_decoder": {
-    "50256": {
-      "content": "<|endoftext|>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "50257": {
-      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -18,11 +41,15 @@
       "special": true
     }
   },
-  "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": true,
-  "eos_token": "<|endoftext|>",
-  "model_max_length": 1024,
   "pad_token": "[PAD]",
-  "tokenizer_class": "GPT2Tokenizer",
-  "unk_token": "<|endoftext|>"
 }

 {
   "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
   "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
 }