dq158 commited on
Commit
0fd3c31
1 Parent(s): f9e6f09

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96485eb7c607920cd754505017469a3214f305e887335f74ec1294359ec8fba7
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ac607c4b4c16928e7543894b1342e51724717ce3480383f8c838c31d4c04a4
3
  size 1256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afb7005eeedf6895d3e5b147f0e299d7d5c3f1bbc19bbb111a50846f82e55bb9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae28f9b2f5015b95ad2f650d54d1979a68d40e31cc70d2fd5c54ddf3ae4a6519
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4950342cbbaedd0bab0603999a400a1f5307e42dd9ea5ac4333478fb245f0ffd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac550c17bc81a5a61f579be7c3cd944957f99fac9afc8af96fb956eda27f781
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,129 +1,56 @@
1
  {
2
- "best_metric": 2.7745249271392822,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6378",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 12756,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.16,
13
- "learning_rate": 0.00039920947174116906,
14
- "loss": 2.8811,
15
- "step": 1000
16
- },
17
- {
18
- "epoch": 0.31,
19
- "learning_rate": 0.00039648480371823173,
20
- "loss": 2.8589,
21
- "step": 2000
22
- },
23
- {
24
- "epoch": 0.47,
25
- "learning_rate": 0.0003918428175464395,
26
- "loss": 2.8896,
27
- "step": 3000
28
  },
29
  {
30
  "epoch": 0.63,
31
- "learning_rate": 0.0003853288101859922,
32
- "loss": 2.8554,
33
- "step": 4000
34
- },
35
- {
36
- "epoch": 0.78,
37
- "learning_rate": 0.0003770063459650089,
38
- "loss": 2.9077,
39
- "step": 5000
40
  },
41
  {
42
- "epoch": 0.94,
43
- "learning_rate": 0.0003669566363125271,
44
- "loss": 2.8994,
45
- "step": 6000
46
  },
47
  {
48
  "epoch": 1.0,
49
  "eval_bleu": 1.0,
50
  "eval_brevity_penalty": 1.0,
51
  "eval_length_ratio": 1.0,
52
- "eval_loss": 2.7745249271392822,
53
- "eval_precisions": [
54
- 1.0,
55
- 1.0,
56
- 1.0,
57
- 1.0
58
- ],
59
- "eval_reference_length": 5805056,
60
- "eval_runtime": 7760.7176,
61
- "eval_samples_per_second": 1.461,
62
- "eval_steps_per_second": 0.091,
63
- "eval_translation_length": 5805056,
64
- "step": 6378
65
- },
66
- {
67
- "epoch": 1.1,
68
- "learning_rate": 0.00035527774728934524,
69
- "loss": 2.9036,
70
- "step": 7000
71
- },
72
- {
73
- "epoch": 1.25,
74
- "learning_rate": 0.00034208364264970225,
75
- "loss": 2.8998,
76
- "step": 8000
77
- },
78
- {
79
- "epoch": 1.41,
80
- "learning_rate": 0.00032750307177169117,
81
- "loss": 2.8735,
82
- "step": 9000
83
- },
84
- {
85
- "epoch": 1.57,
86
- "learning_rate": 0.00031167831330809374,
87
- "loss": 2.8724,
88
- "step": 10000
89
- },
90
- {
91
- "epoch": 1.72,
92
- "learning_rate": 0.00029476378681721313,
93
- "loss": 2.8833,
94
- "step": 11000
95
- },
96
- {
97
- "epoch": 1.88,
98
- "learning_rate": 0.00027692454592155135,
99
- "loss": 2.878,
100
- "step": 12000
101
- },
102
- {
103
- "epoch": 2.0,
104
- "eval_bleu": 1.0,
105
- "eval_brevity_penalty": 1.0,
106
- "eval_length_ratio": 1.0,
107
- "eval_loss": 2.7745249271392822,
108
  "eval_precisions": [
109
  1.0,
110
  1.0,
111
  1.0,
112
  1.0
113
  ],
114
- "eval_reference_length": 5805056,
115
- "eval_runtime": 7896.2252,
116
- "eval_samples_per_second": 1.436,
117
- "eval_steps_per_second": 0.09,
118
- "eval_translation_length": 5805056,
119
- "step": 12756
120
  }
121
  ],
122
- "logging_steps": 1000,
123
- "max_steps": 31890,
124
  "num_train_epochs": 5,
125
- "save_steps": 1000,
126
- "total_flos": 1.748349136542892e+18,
127
  "trial_name": null,
128
  "trial_params": null
129
  }
 
1
  {
2
+ "best_metric": 3.066662549972534,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-1581",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 1581,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.32,
13
+ "learning_rate": 4.967667032675337e-05,
14
+ "loss": 3.2269,
15
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
17
  {
18
  "epoch": 0.63,
19
+ "learning_rate": 4.837746407304061e-05,
20
+ "loss": 3.2202,
21
+ "step": 1000
 
 
 
 
 
 
22
  },
23
  {
24
+ "epoch": 0.95,
25
+ "learning_rate": 4.613457734930978e-05,
26
+ "loss": 3.2179,
27
+ "step": 1500
28
  },
29
  {
30
  "epoch": 1.0,
31
  "eval_bleu": 1.0,
32
  "eval_brevity_penalty": 1.0,
33
  "eval_length_ratio": 1.0,
34
+ "eval_loss": 3.066662549972534,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "eval_precisions": [
36
  1.0,
37
  1.0,
38
  1.0,
39
  1.0
40
  ],
41
+ "eval_reference_length": 1439232,
42
+ "eval_runtime": 868.8432,
43
+ "eval_samples_per_second": 3.235,
44
+ "eval_steps_per_second": 0.203,
45
+ "eval_translation_length": 1439232,
46
+ "step": 1581
47
  }
48
  ],
49
+ "logging_steps": 500,
50
+ "max_steps": 7905,
51
  "num_train_epochs": 5,
52
+ "save_steps": 500,
53
+ "total_flos": 2.1666322696686797e+17,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dd2903933121e6a92a30122c80cc9994899cd1aaaedd34cd64d7035bcbbb24c
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20086b87ec5232c7b02847cb5e045ab682b690baae2571d7dc3765d177a13545
3
  size 4728