genejalston commited on
Commit
8f771b3
1 Parent(s): 94850e4

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d680bf864856f9bfa584eb3208b4e38f5e9cb6cd1f58cceb049b31b9b2acfd73
3
  size 1625426996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70439eb5bf332bcd85707ea7476cfef47e0adebfd07fc1da5b88b03fbf4caadf
3
  size 1625426996
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2cfa1bc4665664041205554f346f8cb1719a2a51264588c21beb85644ae889
3
- size 3250759951
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b14d4fda53cf28bf2ae0b4b157bbbff1bcea19560574e1fc479b6280edaaf6
3
+ size 3250745679
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f73c7f3308641c8e2c15d99e5247e6be2d37417c7acb4a7ad64cb68b6d5fa4d2
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecb6385e3487dcbb2407131ebc74d6528cbf0cd32b5abf0b470cfecc01787ff
3
+ size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b776a4dee12abf784324330625ce24646d6dfd2b7e10e3f7907e7112d9954c0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f13c29da6cf7e87c7b07f9b274343bdf230f8df79285a1204e1a03eb02196fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,125 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.2,
13
- "learning_rate": 1.869158878504673e-05,
14
- "loss": 0.4912,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.4,
19
- "learning_rate": 1.7356475300400533e-05,
20
- "loss": 0.3659,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.6,
25
- "learning_rate": 1.602136181575434e-05,
26
- "loss": 0.3266,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.8,
31
- "learning_rate": 1.4686248331108147e-05,
32
- "loss": 0.2493,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 1.0,
37
- "learning_rate": 1.335113484646195e-05,
38
- "loss": 0.2199,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 1.0,
43
- "eval_loss": 1.8119621276855469,
44
- "eval_runtime": 912.5828,
45
- "eval_samples_per_second": 1.369,
46
- "eval_steps_per_second": 0.172,
47
- "step": 2500
48
- },
49
- {
50
- "epoch": 1.2,
51
- "learning_rate": 1.2016021361815755e-05,
52
- "loss": 0.1937,
53
- "step": 3000
54
- },
55
- {
56
- "epoch": 1.4,
57
- "learning_rate": 1.068090787716956e-05,
58
- "loss": 0.1332,
59
- "step": 3500
60
- },
61
- {
62
- "epoch": 1.6,
63
- "learning_rate": 9.345794392523365e-06,
64
- "loss": 0.1551,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 1.8,
69
- "learning_rate": 8.01068090787717e-06,
70
- "loss": 0.1196,
71
- "step": 4500
72
- },
73
- {
74
- "epoch": 2.0,
75
- "learning_rate": 6.675567423230975e-06,
76
- "loss": 0.1398,
77
- "step": 5000
78
- },
79
- {
80
- "epoch": 2.0,
81
- "eval_loss": 2.2787365913391113,
82
- "eval_runtime": 910.7781,
83
- "eval_samples_per_second": 1.371,
84
- "eval_steps_per_second": 0.172,
85
- "step": 5000
86
- },
87
- {
88
- "epoch": 2.2,
89
- "learning_rate": 5.34045393858478e-06,
90
- "loss": 0.1012,
91
- "step": 5500
92
- },
93
- {
94
- "epoch": 2.4,
95
- "learning_rate": 4.005340453938585e-06,
96
- "loss": 0.1021,
97
- "step": 6000
98
- },
99
- {
100
- "epoch": 2.6,
101
- "learning_rate": 2.67022696929239e-06,
102
- "loss": 0.0804,
103
- "step": 6500
104
- },
105
- {
106
- "epoch": 2.8,
107
- "learning_rate": 1.335113484646195e-06,
108
- "loss": 0.0613,
109
- "step": 7000
110
- },
111
- {
112
- "epoch": 3.0,
113
- "learning_rate": 0.0,
114
- "loss": 0.0713,
115
- "step": 7500
116
  }
117
  ],
118
  "logging_steps": 500,
119
- "max_steps": 7500,
120
  "num_train_epochs": 3,
121
  "save_steps": 500,
122
- "total_flos": 1.26966087401472e+16,
123
  "trial_name": null,
124
  "trial_params": null
125
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.1,
13
+ "learning_rate": 4.836557705136758e-05,
14
+ "loss": 0.3773,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.1,
19
+ "eval_loss": 0.08091682195663452,
20
+ "eval_runtime": 6249.7001,
21
+ "eval_samples_per_second": 0.2,
22
+ "eval_steps_per_second": 0.05,
23
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "logging_steps": 500,
27
+ "max_steps": 15000,
28
  "num_train_epochs": 3,
29
  "save_steps": 500,
30
+ "total_flos": 2167104602112000.0,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef8fd0438f41dd769275a8c7c436204a3b8383bf6d9589567415d61a8711569b
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:174bd78d776d79859d6c59d304c2b807ab06208a61976a9d5cfc4028cb675643
3
  size 4536