Yannis98 commited on
Commit
a96a96b
1 Parent(s): 35b6739
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +55 -0
  3. all_results.json +26 -0
  4. checkpoint-1000/config.json +33 -0
  5. checkpoint-1000/model.safetensors +3 -0
  6. checkpoint-1000/optimizer.pt +3 -0
  7. checkpoint-1000/rng_state.pth +3 -0
  8. checkpoint-1000/scheduler.pt +3 -0
  9. checkpoint-1000/special_tokens_map.json +15 -0
  10. checkpoint-1000/tokenizer.json +0 -0
  11. checkpoint-1000/tokenizer_config.json +57 -0
  12. checkpoint-1000/trainer_state.json +47 -0
  13. checkpoint-1000/training_args.bin +3 -0
  14. checkpoint-2000/config.json +33 -0
  15. checkpoint-2000/model.safetensors +3 -0
  16. checkpoint-2000/optimizer.pt +3 -0
  17. checkpoint-2000/rng_state.pth +3 -0
  18. checkpoint-2000/scheduler.pt +3 -0
  19. checkpoint-2000/special_tokens_map.json +15 -0
  20. checkpoint-2000/tokenizer.json +0 -0
  21. checkpoint-2000/tokenizer_config.json +57 -0
  22. checkpoint-2000/trainer_state.json +61 -0
  23. checkpoint-2000/training_args.bin +3 -0
  24. checkpoint-3000/config.json +33 -0
  25. checkpoint-3000/model.safetensors +3 -0
  26. checkpoint-3000/optimizer.pt +3 -0
  27. checkpoint-3000/rng_state.pth +3 -0
  28. checkpoint-3000/scheduler.pt +3 -0
  29. checkpoint-3000/special_tokens_map.json +15 -0
  30. checkpoint-3000/tokenizer.json +0 -0
  31. checkpoint-3000/tokenizer_config.json +57 -0
  32. checkpoint-3000/trainer_state.json +75 -0
  33. checkpoint-3000/training_args.bin +3 -0
  34. checkpoint-4000/config.json +33 -0
  35. checkpoint-4000/model.safetensors +3 -0
  36. checkpoint-4000/optimizer.pt +3 -0
  37. checkpoint-4000/rng_state.pth +3 -0
  38. checkpoint-4000/scheduler.pt +3 -0
  39. checkpoint-4000/special_tokens_map.json +15 -0
  40. checkpoint-4000/tokenizer.json +0 -0
  41. checkpoint-4000/tokenizer_config.json +57 -0
  42. checkpoint-4000/trainer_state.json +89 -0
  43. checkpoint-4000/training_args.bin +3 -0
  44. checkpoint-4124/config.json +33 -0
  45. checkpoint-4124/model.safetensors +3 -0
  46. checkpoint-4124/optimizer.pt +3 -0
  47. checkpoint-4124/rng_state.pth +3 -0
  48. checkpoint-4124/scheduler.pt +3 -0
  49. checkpoint-4124/special_tokens_map.json +15 -0
  50. checkpoint-4124/tokenizer.json +0 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Base_Albert/eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Base_Albert/eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
37
+ eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: albert/albert-base-v2
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - squad_v2
8
+ model-index:
9
+ - name: Base_Albert
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # Base_Albert
17
+
18
+ This model is a fine-tuned version of [albert/albert-base-v2](https://huggingface.co/albert/albert-base-v2) on the squad_v2 dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 3e-05
38
+ - train_batch_size: 64
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - lr_scheduler_warmup_ratio: 0.1
44
+ - num_epochs: 2.0
45
+
46
+ ### Training results
47
+
48
+
49
+
50
+ ### Framework versions
51
+
52
+ - Transformers 4.45.0.dev0
53
+ - Pytorch 2.2.2+cu121
54
+ - Datasets 2.19.2
55
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_HasAns_exact": 75.37112010796221,
4
+ "eval_HasAns_f1": 81.67986330801969,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 82.70815811606391,
7
+ "eval_NoAns_f1": 82.70815811606391,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 79.04489177124569,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 82.1947468786271,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 79.04489177124569,
14
+ "eval_f1": 82.19474687862723,
15
+ "eval_runtime": 79.7911,
16
+ "eval_samples": 12171,
17
+ "eval_samples_per_second": 152.536,
18
+ "eval_steps_per_second": 19.075,
19
+ "eval_total": 11873,
20
+ "total_flos": 4371201741330432.0,
21
+ "train_loss": 0.9245093981116634,
22
+ "train_runtime": 4409.6844,
23
+ "train_samples": 131958,
24
+ "train_samples_per_second": 59.849,
25
+ "train_steps_per_second": 0.935
26
+ }
checkpoint-1000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.45.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 30000
33
+ }
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b396361671289ea35df62be5851b96a2f201fb9359090142c33091fe703594e
3
+ size 44381360
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed26f962d92a3cd7b562d68e97637db084d21caf6307d7495eff9b015bbc00b
3
+ size 88777619
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960a69c76f4510e5d359f1f8ed0b1d8d5a6a5e2293a5f059697691e42aa1ff31
3
+ size 14244
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c66296607eae32dd03cf616c1ee454bed866dc0c297c973f719f647f45b77a
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": true,
48
+ "eos_token": "[SEP]",
49
+ "keep_accents": false,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "remove_space": true,
54
+ "sep_token": "[SEP]",
55
+ "tokenizer_class": "AlbertTokenizer",
56
+ "unk_token": "<unk>"
57
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.48496605237633367,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.24248302618816683,
13
+ "grad_norm": 22.99319839477539,
14
+ "learning_rate": 2.929668552950687e-05,
15
+ "loss": 1.9432,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.48496605237633367,
20
+ "grad_norm": 29.776500701904297,
21
+ "learning_rate": 2.5254648342764755e-05,
22
+ "loss": 1.0335,
23
+ "step": 1000
24
+ }
25
+ ],
26
+ "logging_steps": 500,
27
+ "max_steps": 4124,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 2,
30
+ "save_steps": 1000,
31
+ "stateful_callbacks": {
32
+ "TrainerControl": {
33
+ "args": {
34
+ "should_epoch_stop": false,
35
+ "should_evaluate": false,
36
+ "should_log": false,
37
+ "should_save": true,
38
+ "should_training_stop": false
39
+ },
40
+ "attributes": {}
41
+ }
42
+ },
43
+ "total_flos": 1060022550528000.0,
44
+ "train_batch_size": 64,
45
+ "trial_name": null,
46
+ "trial_params": null
47
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
3
+ size 5240
checkpoint-2000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.45.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 30000
33
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6bcc75b4f409bc7511d2c105aeb79e9ac9152669577265b8c89f9d6db52741a
3
+ size 44381360
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b92639b4b0018204eb041b6eb71864a3da250b6dd42f82d7d785bf411c8fbf
3
+ size 88777619
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960a69c76f4510e5d359f1f8ed0b1d8d5a6a5e2293a5f059697691e42aa1ff31
3
+ size 14244
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db1ca8ff12dbe618a66e2b8dca8235d001ac2257fb93e96b7781b32c6fd5305
3
+ size 1064
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": true,
48
+ "eos_token": "[SEP]",
49
+ "keep_accents": false,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "remove_space": true,
54
+ "sep_token": "[SEP]",
55
+ "tokenizer_class": "AlbertTokenizer",
56
+ "unk_token": "<unk>"
57
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9699321047526673,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.24248302618816683,
13
+ "grad_norm": 22.99319839477539,
14
+ "learning_rate": 2.929668552950687e-05,
15
+ "loss": 1.9432,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.48496605237633367,
20
+ "grad_norm": 29.776500701904297,
21
+ "learning_rate": 2.5254648342764755e-05,
22
+ "loss": 1.0335,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.7274490785645005,
27
+ "grad_norm": 17.826507568359375,
28
+ "learning_rate": 2.1212611156022636e-05,
29
+ "loss": 0.9314,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.9699321047526673,
34
+ "grad_norm": 16.028316497802734,
35
+ "learning_rate": 1.7170573969280517e-05,
36
+ "loss": 0.875,
37
+ "step": 2000
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 4124,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 2,
44
+ "save_steps": 1000,
45
+ "stateful_callbacks": {
46
+ "TrainerControl": {
47
+ "args": {
48
+ "should_epoch_stop": false,
49
+ "should_evaluate": false,
50
+ "should_log": false,
51
+ "should_save": true,
52
+ "should_training_stop": false
53
+ },
54
+ "attributes": {}
55
+ }
56
+ },
57
+ "total_flos": 2120045101056000.0,
58
+ "train_batch_size": 64,
59
+ "trial_name": null,
60
+ "trial_params": null
61
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
3
+ size 5240
checkpoint-3000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.45.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 30000
33
+ }
checkpoint-3000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f4b6c26206d9596468fd953d25757798d0baa3a572cc57cebf9ee2d9efae22
3
+ size 44381360
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fcd6a270702b70468e31ce242284cc1e98251471318f2dd4b53908bd0ddbcca
3
+ size 88777619
checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba0bbd22f1de48c7afcee33d3a889b6e69d5787af2f52b8b3e85ebdaa6e9705
3
+ size 14244
checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c45a1cd2cf6182f86dab32b38f81700fabc02c33f71f34969929416febf2538e
3
+ size 1064
checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-3000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": true,
48
+ "eos_token": "[SEP]",
49
+ "keep_accents": false,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "remove_space": true,
54
+ "sep_token": "[SEP]",
55
+ "tokenizer_class": "AlbertTokenizer",
56
+ "unk_token": "<unk>"
57
+ }
checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.454898157129001,
5
+ "eval_steps": 500,
6
+ "global_step": 3000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.24248302618816683,
13
+ "grad_norm": 22.99319839477539,
14
+ "learning_rate": 2.929668552950687e-05,
15
+ "loss": 1.9432,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.48496605237633367,
20
+ "grad_norm": 29.776500701904297,
21
+ "learning_rate": 2.5254648342764755e-05,
22
+ "loss": 1.0335,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.7274490785645005,
27
+ "grad_norm": 17.826507568359375,
28
+ "learning_rate": 2.1212611156022636e-05,
29
+ "loss": 0.9314,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.9699321047526673,
34
+ "grad_norm": 16.028316497802734,
35
+ "learning_rate": 1.7170573969280517e-05,
36
+ "loss": 0.875,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 1.2124151309408342,
41
+ "grad_norm": 27.188560485839844,
42
+ "learning_rate": 1.31285367825384e-05,
43
+ "loss": 0.7128,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.454898157129001,
48
+ "grad_norm": 15.629920959472656,
49
+ "learning_rate": 9.086499595796281e-06,
50
+ "loss": 0.6671,
51
+ "step": 3000
52
+ }
53
+ ],
54
+ "logging_steps": 500,
55
+ "max_steps": 4124,
56
+ "num_input_tokens_seen": 0,
57
+ "num_train_epochs": 2,
58
+ "save_steps": 1000,
59
+ "stateful_callbacks": {
60
+ "TrainerControl": {
61
+ "args": {
62
+ "should_epoch_stop": false,
63
+ "should_evaluate": false,
64
+ "should_log": false,
65
+ "should_save": true,
66
+ "should_training_stop": false
67
+ },
68
+ "attributes": {}
69
+ }
70
+ },
71
+ "total_flos": 3179902023060480.0,
72
+ "train_batch_size": 64,
73
+ "trial_name": null,
74
+ "trial_params": null
75
+ }
checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
3
+ size 5240
checkpoint-4000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.45.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 30000
33
+ }
checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3543f8064c55082b743de7567800b19a1f3e96d847a83a7ab2af846dc34fac
3
+ size 44381360
checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce87d63c7a09078195a45eda009c6042f28baa6698c32ce8f96eb9a91aa6339f
3
+ size 88777619
checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba0bbd22f1de48c7afcee33d3a889b6e69d5787af2f52b8b3e85ebdaa6e9705
3
+ size 14244
checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba6269d6b3fd350746b0f383afc9648ce09cd2ff349a4514e17e2cae44591e6
3
+ size 1064
checkpoint-4000/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-4000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4000/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": true,
48
+ "eos_token": "[SEP]",
49
+ "keep_accents": false,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "remove_space": true,
54
+ "sep_token": "[SEP]",
55
+ "tokenizer_class": "AlbertTokenizer",
56
+ "unk_token": "<unk>"
57
+ }
checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9398642095053347,
5
+ "eval_steps": 500,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.24248302618816683,
13
+ "grad_norm": 22.99319839477539,
14
+ "learning_rate": 2.929668552950687e-05,
15
+ "loss": 1.9432,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.48496605237633367,
20
+ "grad_norm": 29.776500701904297,
21
+ "learning_rate": 2.5254648342764755e-05,
22
+ "loss": 1.0335,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.7274490785645005,
27
+ "grad_norm": 17.826507568359375,
28
+ "learning_rate": 2.1212611156022636e-05,
29
+ "loss": 0.9314,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.9699321047526673,
34
+ "grad_norm": 16.028316497802734,
35
+ "learning_rate": 1.7170573969280517e-05,
36
+ "loss": 0.875,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 1.2124151309408342,
41
+ "grad_norm": 27.188560485839844,
42
+ "learning_rate": 1.31285367825384e-05,
43
+ "loss": 0.7128,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.454898157129001,
48
+ "grad_norm": 15.629920959472656,
49
+ "learning_rate": 9.086499595796281e-06,
50
+ "loss": 0.6671,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 1.6973811833171677,
55
+ "grad_norm": 15.22022819519043,
56
+ "learning_rate": 5.044462409054164e-06,
57
+ "loss": 0.6666,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 1.9398642095053347,
62
+ "grad_norm": 21.28203582763672,
63
+ "learning_rate": 1.0024252223120454e-06,
64
+ "loss": 0.641,
65
+ "step": 4000
66
+ }
67
+ ],
68
+ "logging_steps": 500,
69
+ "max_steps": 4124,
70
+ "num_input_tokens_seen": 0,
71
+ "num_train_epochs": 2,
72
+ "save_steps": 1000,
73
+ "stateful_callbacks": {
74
+ "TrainerControl": {
75
+ "args": {
76
+ "should_epoch_stop": false,
77
+ "should_evaluate": false,
78
+ "should_log": false,
79
+ "should_save": true,
80
+ "should_training_stop": false
81
+ },
82
+ "attributes": {}
83
+ }
84
+ },
85
+ "total_flos": 4239924573588480.0,
86
+ "train_batch_size": 64,
87
+ "trial_name": null,
88
+ "trial_params": null
89
+ }
checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
3
+ size 5240
checkpoint-4124/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.45.0.dev0",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 30000
33
+ }
checkpoint-4124/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:949037f5b37793e09ab2f2f7eeddd0592de75c2aa8f34f3ce8753b1c362c4aee
3
+ size 44381360
checkpoint-4124/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8adf2c81f41b900f880bf12c51aaf821848b0e5b3d83bbc524e51244ff0b0aed
3
+ size 88777619
checkpoint-4124/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba0bbd22f1de48c7afcee33d3a889b6e69d5787af2f52b8b3e85ebdaa6e9705
3
+ size 14244
checkpoint-4124/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9caeddb926499794fec79c386599b77509588bd568b6405745c0bf9f348096b
3
+ size 1064
checkpoint-4124/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-4124/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff