louis030195 commited on
Commit
69508a7
1 Parent(s): 3a640cb

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 499.67,
3
- "eval_loss": 5.67578125,
4
- "eval_runtime": 0.0865,
5
  "eval_samples": 9,
6
- "eval_samples_per_second": 104.106,
7
- "eval_steps_per_second": 23.135,
8
- "perplexity": 291.71615278815983,
9
- "train_loss": 0.8276048583984374,
10
- "train_runtime": 639.1734,
11
  "train_samples": 23,
12
- "train_samples_per_second": 17.992,
13
- "train_steps_per_second": 0.782
14
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_loss": 3.314453125,
4
+ "eval_runtime": 2.4382,
5
  "eval_samples": 9,
6
+ "eval_samples_per_second": 3.691,
7
+ "eval_steps_per_second": 0.82,
8
+ "perplexity": 27.507346790200362,
9
+ "train_loss": 2.098372395833333,
10
+ "train_runtime": 956.5249,
11
  "train_samples": 23,
12
+ "train_samples_per_second": 1.202,
13
+ "train_steps_per_second": 0.157
14
  }
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "attn_pdrop": 0.1,
9
  "bos_token_id": 50256,
10
  "embd_pdrop": 0.1,
11
- "eos_token_id": 198,
12
  "id2label": {
13
  "0": "LABEL_0"
14
  },
@@ -40,7 +40,7 @@
40
  }
41
  },
42
  "torch_dtype": "float16",
43
- "transformers_version": "4.13.0",
44
- "use_cache": false,
45
  "vocab_size": 50257
46
  }
 
8
  "attn_pdrop": 0.1,
9
  "bos_token_id": 50256,
10
  "embd_pdrop": 0.1,
11
+ "eos_token_id": 50256,
12
  "id2label": {
13
  "0": "LABEL_0"
14
  },
 
40
  }
41
  },
42
  "torch_dtype": "float16",
43
+ "transformers_version": "4.16.0.dev0",
44
+ "use_cache": true,
45
  "vocab_size": 50257
46
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 499.67,
3
- "eval_loss": 5.67578125,
4
- "eval_runtime": 0.0865,
5
  "eval_samples": 9,
6
- "eval_samples_per_second": 104.106,
7
- "eval_steps_per_second": 23.135,
8
- "perplexity": 291.71615278815983
9
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_loss": 3.314453125,
4
+ "eval_runtime": 2.4382,
5
  "eval_samples": 9,
6
+ "eval_samples_per_second": 3.691,
7
+ "eval_steps_per_second": 0.82,
8
+ "perplexity": 27.507346790200362
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96c7af5912efddf9ae72108390b7309f5d4ad68284e8a323ec5afc556aa87595
3
- size 170133371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:344184e25d9e8e50e4b826bf02c7dabb381a18803cb8fdb6179f808fbd4ffcac
3
+ size 170133438
runs/Jan27_09-27-06_06c694df2bc7/1643276178.1144845/events.out.tfevents.1643276178.06c694df2bc7.575.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7e1ba97a3a5e2658a57ab94a08db0be875d882bb4da52b90dd3011d56b69ff
3
+ size 4787
runs/Jan27_09-27-06_06c694df2bc7/events.out.tfevents.1643276178.06c694df2bc7.575.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7977b6667779ac4f6cdaebedbc59dd92256551fd458a60cdbb835499a2febc1
3
+ size 3672
runs/Jan27_09-43-10_06c694df2bc7/1643276611.652044/events.out.tfevents.1643276611.06c694df2bc7.801.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6718261aedf038a075cb1f0986e635880e8048c1b23be1e78a4405e6cf3c2f
3
+ size 4787
runs/Jan27_09-43-10_06c694df2bc7/events.out.tfevents.1643276611.06c694df2bc7.801.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe66840afc59e6cd3241b4f8e807ca40381bcaa0448553f5070b148581e593e5
3
+ size 4026
runs/Jan27_09-43-10_06c694df2bc7/events.out.tfevents.1643277571.06c694df2bc7.801.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa64d4865a15c18233f25d5f6d0e99e941479473a1d00155b532424e4b1c421
3
+ size 311
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
1
+ {"bos_token": "<|endoftext|>", "eos_token": "198", "unk_token": "<|endoftext|>"}
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "distilgpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "198", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "distilgpt2", "tokenizer_class": "GPT2Tokenizer"}
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 499.67,
3
- "train_loss": 0.8276048583984374,
4
- "train_runtime": 639.1734,
5
  "train_samples": 23,
6
- "train_samples_per_second": 17.992,
7
- "train_steps_per_second": 0.782
8
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "train_loss": 2.098372395833333,
4
+ "train_runtime": 956.5249,
5
  "train_samples": 23,
6
+ "train_samples_per_second": 1.202,
7
+ "train_steps_per_second": 0.157
8
  }
trainer_state.json CHANGED
@@ -1,47 +1,25 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 499.6666666666667,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 199.67,
12
- "eval_loss": 4.546875,
13
- "eval_runtime": 0.0857,
14
- "eval_samples_per_second": 105.046,
15
- "eval_steps_per_second": 23.344,
16
- "step": 200
17
- },
18
- {
19
- "epoch": 399.67,
20
- "eval_loss": 5.43359375,
21
- "eval_runtime": 0.0856,
22
- "eval_samples_per_second": 105.16,
23
- "eval_steps_per_second": 23.369,
24
- "step": 400
25
- },
26
- {
27
- "epoch": 499.67,
28
- "learning_rate": 5e-05,
29
- "loss": 0.8276,
30
- "step": 500
31
- },
32
- {
33
- "epoch": 499.67,
34
- "step": 500,
35
- "total_flos": 3003083546165248.0,
36
- "train_loss": 0.8276048583984374,
37
- "train_runtime": 639.1734,
38
- "train_samples_per_second": 17.992,
39
- "train_steps_per_second": 0.782
40
  }
41
  ],
42
- "max_steps": 500,
43
- "num_train_epochs": 500,
44
- "total_flos": 3003083546165248.0,
45
  "trial_name": null,
46
  "trial_params": null
47
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "global_step": 150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 50.0,
12
+ "step": 150,
13
+ "total_flos": 300491279958016.0,
14
+ "train_loss": 2.098372395833333,
15
+ "train_runtime": 956.5249,
16
+ "train_samples_per_second": 1.202,
17
+ "train_steps_per_second": 0.157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
+ "max_steps": 150,
21
+ "num_train_epochs": 50,
22
+ "total_flos": 300491279958016.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d20f7e55570ae3c982208f96205c5ae8fcc5abf26d55c695a4a61956d3109dad
3
- size 3887
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e43700f0bfe93c297af47c96114b00d27d85b8b7707d998bdab63ea39f976fc
3
+ size 4143