leixa commited on
Commit
655ca32
·
verified ·
1 Parent(s): 4218faa

Training in progress, step 48, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc2995f781ed74f3377cbedb73668f4c2604d96c9fb2489dd59eda4288302bd7
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:540695a3a9f43d9bbd1ef789d552460b8c74b46bd4fe758079e6157729c7e2ec
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3df16017fc50b7c065b86c7bafca727334d4e926f8dbad3e150d972f1bd5cce0
3
  size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7428b52cb33a82912bb70cd558b2213508ff4ab07a0f854523e513b35a9f53
3
  size 102864548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10afebc62f6c8e891ad93ef14a7ba50b2c58f0717580ad02c0baad1acebe56b0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a808482866d58c57fe0290015fc20e4f8b8939db7d59d8f55d0fcd2b8b4cf266
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd734c499cbd9269039444aa61a9c5a6fbae1cfe5ae2a2f523c9e7a6137392e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e49a8262b2a61d3153e5d379c20cfb08094371d950647117ad67ae4b87231f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25,
5
  "eval_steps": 24,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -79,6 +79,70 @@
79
  "eval_samples_per_second": 48.78,
80
  "eval_steps_per_second": 6.323,
81
  "step": 24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  ],
84
  "logging_steps": 3,
@@ -98,7 +162,7 @@
98
  "attributes": {}
99
  }
100
  },
101
- "total_flos": 5439527023804416.0,
102
  "train_batch_size": 8,
103
  "trial_name": null,
104
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5,
5
  "eval_steps": 24,
6
+ "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
79
  "eval_samples_per_second": 48.78,
80
  "eval_steps_per_second": 6.323,
81
  "step": 24
82
+ },
83
+ {
84
+ "epoch": 0.28125,
85
+ "grad_norm": 0.2701338827610016,
86
+ "learning_rate": 4.954008042777125e-05,
87
+ "loss": 1.666,
88
+ "step": 27
89
+ },
90
+ {
91
+ "epoch": 0.3125,
92
+ "grad_norm": 0.35075831413269043,
93
+ "learning_rate": 4.93641851874178e-05,
94
+ "loss": 1.6302,
95
+ "step": 30
96
+ },
97
+ {
98
+ "epoch": 0.34375,
99
+ "grad_norm": 0.31512945890426636,
100
+ "learning_rate": 4.916028962942762e-05,
101
+ "loss": 1.6324,
102
+ "step": 33
103
+ },
104
+ {
105
+ "epoch": 0.375,
106
+ "grad_norm": 0.2673735022544861,
107
+ "learning_rate": 4.892862807891131e-05,
108
+ "loss": 1.5973,
109
+ "step": 36
110
+ },
111
+ {
112
+ "epoch": 0.40625,
113
+ "grad_norm": 0.27960431575775146,
114
+ "learning_rate": 4.866946677079314e-05,
115
+ "loss": 1.5657,
116
+ "step": 39
117
+ },
118
+ {
119
+ "epoch": 0.4375,
120
+ "grad_norm": 0.3442665934562683,
121
+ "learning_rate": 4.8383103543843036e-05,
122
+ "loss": 1.564,
123
+ "step": 42
124
+ },
125
+ {
126
+ "epoch": 0.46875,
127
+ "grad_norm": 0.3022296130657196,
128
+ "learning_rate": 4.806986749838807e-05,
129
+ "loss": 1.557,
130
+ "step": 45
131
+ },
132
+ {
133
+ "epoch": 0.5,
134
+ "grad_norm": 0.30464908480644226,
135
+ "learning_rate": 4.773011861809693e-05,
136
+ "loss": 1.5419,
137
+ "step": 48
138
+ },
139
+ {
140
+ "epoch": 0.5,
141
+ "eval_loss": 1.552250862121582,
142
+ "eval_runtime": 3.3169,
143
+ "eval_samples_per_second": 48.84,
144
+ "eval_steps_per_second": 6.331,
145
+ "step": 48
146
  }
147
  ],
148
  "logging_steps": 3,
 
162
  "attributes": {}
163
  }
164
  },
165
+ "total_flos": 1.06657392623616e+16,
166
  "train_batch_size": 8,
167
  "trial_name": null,
168
  "trial_params": null