Training in progress epoch 0

Browse files

Files changed (10) hide show

README.md +13 -74
added_tokens.json +3 -0
config.json +39 -25
generation_config.json +3 -2
merges.txt +0 -0
special_tokens_map.json +3 -4
tf_model.h5 +2 -2
tokenizer.json +0 -0
tokenizer_config.json +11 -38
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,5 +1,7 @@
 ---
-base_model: bert-base-chinese
 tags:
 - generated_from_keras_callback
 model-index:
@@ -12,10 +14,11 @@ probably proofread and complete it, then remove this comment. -->
 # node-py/my_awesome_eli5_clm-model
-This model is a fine-tuned version of [bert-base-chinese](https://huggingface.co/bert-base-chinese) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Train Loss: 0.3272
-- Epoch: 64
 ## Model description
@@ -39,78 +42,14 @@ The following hyperparameters were used during training:
 ### Training results
-| Train Loss | Epoch |
-|:----------:|:-----:|
-| 6.5795     | 0     |
-| 5.8251     | 1     |
-| 5.3850     | 2     |
-| 5.0469     | 3     |
-| 4.8048     | 4     |
-| 4.6144     | 5     |
-| 4.4743     | 6     |
-| 4.3366     | 7     |
-| 4.2178     | 8     |
-| 4.1022     | 9     |
-| 3.9908     | 10    |
-| 3.8856     | 11    |
-| 3.7700     | 12    |
-| 3.6673     | 13    |
-| 3.5560     | 14    |
-| 3.4401     | 15    |
-| 3.3328     | 16    |
-| 3.2248     | 17    |
-| 3.1290     | 18    |
-| 3.0121     | 19    |
-| 2.8978     | 20    |
-| 2.7830     | 21    |
-| 2.6913     | 22    |
-| 2.5822     | 23    |
-| 2.4772     | 24    |
-| 2.3761     | 25    |
-| 2.2792     | 26    |
-| 2.1664     | 27    |
-| 2.0731     | 28    |
-| 1.9734     | 29    |
-| 1.8900     | 30    |
-| 1.7927     | 31    |
-| 1.7036     | 32    |
-| 1.6202     | 33    |
-| 1.5329     | 34    |
-| 1.4535     | 35    |
-| 1.3778     | 36    |
-| 1.3093     | 37    |
-| 1.2413     | 38    |
-| 1.1709     | 39    |
-| 1.1114     | 40    |
-| 1.0563     | 41    |
-| 0.9950     | 42    |
-| 0.9344     | 43    |
-| 0.8830     | 44    |
-| 0.8380     | 45    |
-| 0.7966     | 46    |
-| 0.7552     | 47    |
-| 0.7162     | 48    |
-| 0.6754     | 49    |
-| 0.6420     | 50    |
-| 0.6081     | 51    |
-| 0.5825     | 52    |
-| 0.5506     | 53    |
-| 0.5213     | 54    |
-| 0.4942     | 55    |
-| 0.4716     | 56    |
-| 0.4485     | 57    |
-| 0.4256     | 58    |
-| 0.4087     | 59    |
-| 0.3921     | 60    |
-| 0.3736     | 61    |
-| 0.3574     | 62    |
-| 0.3412     | 63    |
-| 0.3272     | 64    |
 ### Framework versions
-- Transformers 4.44.0
-- TensorFlow 2.16.1
-- Datasets 2.21.0
 - Tokenizers 0.19.1

 ---
+library_name: transformers
+license: apache-2.0
+base_model: distilgpt2
 tags:
 - generated_from_keras_callback
 model-index:
 # node-py/my_awesome_eli5_clm-model
+This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Train Loss: 2.2705
+- Validation Loss: 2.1315
+- Epoch: 0
 ## Model description
 ### Training results
+| Train Loss | Validation Loss | Epoch |
+|:----------:|:---------------:|:-----:|
+| 2.2705     | 2.1315          | 0     |
 ### Framework versions
+- Transformers 4.44.2
+- TensorFlow 2.17.0
+- Datasets 3.0.0
 - Tokenizers 0.19.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 50257
+}

config.json CHANGED Viewed

@@ -1,31 +1,45 @@
 {
-  "_name_or_path": "bert-base-chinese",
   "architectures": [
-    "BertLMHeadModel"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "directionality": "bidi",
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "is_decoder": true,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "pooler_fc_size": 768,
-  "pooler_num_attention_heads": 12,
-  "pooler_num_fc_layers": 3,
-  "pooler_size_per_head": 128,
-  "pooler_type": "first_token_transform",
-  "position_embedding_type": "absolute",
-  "transformers_version": "4.44.0",
-  "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 21128
 }

 {
+  "_name_or_path": "distilgpt2",
+  "_num_labels": 1,
+  "activation_function": "gelu_new",
   "architectures": [
+    "GPT2LMHeadModel"
   ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "id2label": {
+    "0": "LABEL_0"
+  },
   "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 6,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.44.2",
   "use_cache": true,
+  "vocab_size": 50257
 }

generation_config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "_from_model_config": true,
-  "pad_token_id": 0,
-  "transformers_version": "4.44.0"
 }

 {
   "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.2"
 }

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,6 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
   "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
   "pad_token": "[PAD]",
+  "unk_token": "<|endoftext|>"
 }

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9a130fe2b324106038c7cc20ad685930f0584fee9c1c72c7628eab9bc8e3ed3
-size 475933304

 version https://git-lfs.github.com/spec/v1
+oid sha256:55dab45c5f83dd4240c06a6c8e684c9d764be7819ef5f33515adf8e2fc2843a3
+size 327745472

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,39 +1,16 @@
 {
   "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -41,15 +18,11 @@
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": false,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
   "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
 }

 {
+  "add_prefix_space": false,
   "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "50257": {
+      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     }
   },
+  "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
   "pad_token": "[PAD]",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
 }

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff