node-py commited on
Commit
640fa30
1 Parent(s): a34161e

Training in progress epoch 0

Browse files
Files changed (4) hide show
  1. README.md +6 -15
  2. config.json +19 -38
  3. generation_config.json +1 -2
  4. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: distilgpt2
4
  tags:
5
  - generated_from_keras_callback
6
  model-index:
@@ -13,11 +13,11 @@ probably proofread and complete it, then remove this comment. -->
13
 
14
  # node-py/my_awesome_eli5_clm-model
15
 
16
- This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Train Loss: 5.2212
19
- - Validation Loss: 5.1350
20
- - Epoch: 9
21
 
22
  ## Model description
23
 
@@ -43,16 +43,7 @@ The following hyperparameters were used during training:
43
 
44
  | Train Loss | Validation Loss | Epoch |
45
  |:----------:|:---------------:|:-----:|
46
- | 7.9915 | 6.1584 | 0 |
47
- | 6.0733 | 5.8991 | 1 |
48
- | 5.9048 | 5.7918 | 2 |
49
- | 5.7975 | 5.6991 | 3 |
50
- | 5.7077 | 5.6058 | 4 |
51
- | 5.6085 | 5.5014 | 5 |
52
- | 5.5086 | 5.4011 | 6 |
53
- | 5.4100 | 5.3107 | 7 |
54
- | 5.3142 | 5.2220 | 8 |
55
- | 5.2212 | 5.1350 | 9 |
56
 
57
 
58
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: bert-base-uncased
4
  tags:
5
  - generated_from_keras_callback
6
  model-index:
 
13
 
14
  # node-py/my_awesome_eli5_clm-model
15
 
16
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Train Loss: 7.4278
19
+ - Validation Loss: 6.1153
20
+ - Epoch: 0
21
 
22
  ## Model description
23
 
 
43
 
44
  | Train Loss | Validation Loss | Epoch |
45
  |:----------:|:---------------:|:-----:|
46
+ | 7.4278 | 6.1153 | 0 |
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  ### Framework versions
config.json CHANGED
@@ -1,45 +1,26 @@
1
  {
2
- "_name_or_path": "distilgpt2",
3
- "_num_labels": 1,
4
- "activation_function": "gelu_new",
5
  "architectures": [
6
- "GPT2LMHeadModel"
7
  ],
8
- "attn_pdrop": 0.1,
9
- "bos_token_id": 50256,
10
- "embd_pdrop": 0.1,
11
- "eos_token_id": 50256,
12
- "id2label": {
13
- "0": "LABEL_0"
14
- },
15
  "initializer_range": 0.02,
16
- "label2id": {
17
- "LABEL_0": 0
18
- },
19
- "layer_norm_epsilon": 1e-05,
20
- "model_type": "gpt2",
21
- "n_ctx": 1024,
22
- "n_embd": 768,
23
- "n_head": 12,
24
- "n_inner": null,
25
- "n_layer": 6,
26
- "n_positions": 1024,
27
- "reorder_and_upcast_attn": false,
28
- "resid_pdrop": 0.1,
29
- "scale_attn_by_inverse_layer_idx": false,
30
- "scale_attn_weights": true,
31
- "summary_activation": null,
32
- "summary_first_dropout": 0.1,
33
- "summary_proj_to_labels": true,
34
- "summary_type": "cls_index",
35
- "summary_use_proj": true,
36
- "task_specific_params": {
37
- "text-generation": {
38
- "do_sample": true,
39
- "max_length": 50
40
- }
41
- },
42
  "transformers_version": "4.44.0",
 
43
  "use_cache": true,
44
- "vocab_size": 50257
45
  }
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
 
 
3
  "architectures": [
4
+ "BertLMHeadModel"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
 
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "is_decoder": true,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 0,
21
+ "position_embedding_type": "absolute",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "transformers_version": "4.44.0",
23
+ "type_vocab_size": 2,
24
  "use_cache": true,
25
+ "vocab_size": 30522
26
  }
generation_config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 50256,
4
- "eos_token_id": 50256,
5
  "transformers_version": "4.44.0"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "pad_token_id": 0,
 
4
  "transformers_version": "4.44.0"
5
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:355baad3df548530b23cae5cc635cf03e531bc87edc100f88b19dd324863ef7b
3
- size 327745472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12e8534ea4b65b4e1ab43d991fdad385ca0338fbd005b1c5160af0a650d4c4f
3
+ size 533687616