Femboyuwu2000 commited on
Commit
f922a81
1 Parent(s): 677ae12

Training in progress, step 6160, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fa290a324b0413b280d2bace84c9e16ff480cfdf57669d0e4aa091cc96a7f3a
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e5f6360dfa9c2a789402315eb51b17d5c5437a5e36f3a7f4a8ff1180c978bd7
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155d752646efe8e0f6b33728b9a2153eb61f59385cfa4bc37a58d1e634a84306
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef67c02a39ed9ea9db9f9f9072b9280130156e0f2c168f6d3433e11a691906a0
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1814d7b39176db5919fcc37c67d779f76326615526c5fa198e939a90b9adff4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:059a6e0450ff9489b2174c9501bf25eb13b58d453dbd5dc4fed1c213de807539
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:541ffa1c9c0ef80e1cac33b6e46c86e1b24666070b4e0e70f8ae6ca13ce53e48
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1291563f0ece66a71bc509a68887ccf6e08065ec53a4ab16c369d685623ef0fa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4864,
5
  "eval_steps": 500,
6
- "global_step": 6080,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2135,6 +2135,34 @@
2135
  "learning_rate": 2.447123376605561e-05,
2136
  "loss": 3.4535,
2137
  "step": 6080
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2138
  }
2139
  ],
2140
  "logging_steps": 20,
@@ -2142,7 +2170,7 @@
2142
  "num_input_tokens_seen": 0,
2143
  "num_train_epochs": 2,
2144
  "save_steps": 20,
2145
- "total_flos": 1.4386015976914944e+16,
2146
  "train_batch_size": 8,
2147
  "trial_name": null,
2148
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4928,
5
  "eval_steps": 500,
6
+ "global_step": 6160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2135
  "learning_rate": 2.447123376605561e-05,
2136
  "loss": 3.4535,
2137
  "step": 6080
2138
+ },
2139
+ {
2140
+ "epoch": 0.49,
2141
+ "grad_norm": 39.040924072265625,
2142
+ "learning_rate": 2.4433512026757668e-05,
2143
+ "loss": 3.5125,
2144
+ "step": 6100
2145
+ },
2146
+ {
2147
+ "epoch": 0.49,
2148
+ "grad_norm": 27.91790008544922,
2149
+ "learning_rate": 2.439569133438243e-05,
2150
+ "loss": 3.5131,
2151
+ "step": 6120
2152
+ },
2153
+ {
2154
+ "epoch": 0.49,
2155
+ "grad_norm": 42.502357482910156,
2156
+ "learning_rate": 2.435777208565106e-05,
2157
+ "loss": 3.5424,
2158
+ "step": 6140
2159
+ },
2160
+ {
2161
+ "epoch": 0.49,
2162
+ "grad_norm": 26.83557891845703,
2163
+ "learning_rate": 2.431975467831853e-05,
2164
+ "loss": 3.4445,
2165
+ "step": 6160
2166
  }
2167
  ],
2168
  "logging_steps": 20,
 
2170
  "num_input_tokens_seen": 0,
2171
  "num_train_epochs": 2,
2172
  "save_steps": 20,
2173
+ "total_flos": 1.4563889634902016e+16,
2174
  "train_batch_size": 8,
2175
  "trial_name": null,
2176
  "trial_params": null