bitsoko commited on
Commit
755d142
·
verified ·
1 Parent(s): 5c6903f

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": "unsloth",
22
  "target_modules": [
23
- "down_proj",
24
- "o_proj",
25
- "v_proj",
26
- "up_proj",
27
- "k_proj",
28
  "q_proj",
29
- "gate_proj"
 
 
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": "unsloth",
22
  "target_modules": [
 
 
 
 
 
23
  "q_proj",
24
+ "k_proj",
25
+ "up_proj",
26
+ "gate_proj",
27
+ "v_proj",
28
+ "down_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f79c51cd876c8b5b6f891bdff8fd168dd531ab20717f8389982574d19f0dbea0
3
  size 1912664024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e19924cdd9d90593dba79dcc7bb890667c5dee7cb8242c8ece6b143bb46c9f
3
  size 1912664024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bf021cd132f16efcbc21f9c136335d5522103e91e16b4f5562176a75367b8c4
3
  size 958697364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5461d77cb41a184a2bff02c6a64c2df7c57bc39308a8adb227ef080fe473bb0d
3
  size 958697364
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4039ca40b296c95519ba3582af20e2ac85fa629c760519260b975e4ffc9aa9b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43eaf6e8b9b7e97d802563efbae1976d32c9bf9869f2ff306873ea7672065324
3
  size 1064
last-checkpoint/tokenizer_config.json CHANGED
@@ -122,7 +122,7 @@
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|placeholder6|>",
125
- "padding_side": "left",
126
  "sp_model_kwargs": {},
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
 
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|placeholder6|>",
125
+ "padding_side": "right",
126
  "sp_model_kwargs": {},
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
last-checkpoint/trainer_state.json CHANGED
@@ -10,32 +10,32 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0012270691453463403,
13
- "grad_norm": NaN,
14
- "learning_rate": 0.0,
15
- "loss": 3.3245,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.0012270691453463403,
20
- "eval_loss": 1.7450172901153564,
21
- "eval_runtime": 24.6271,
22
- "eval_samples_per_second": 4.061,
23
- "eval_steps_per_second": 0.528,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.0024541382906926807,
28
- "grad_norm": 0.0,
29
- "learning_rate": 0.0001999263532588683,
30
- "loss": 3.5708,
31
  "step": 40
32
  },
33
  {
34
  "epoch": 0.0024541382906926807,
35
- "eval_loss": 1.7450109720230103,
36
- "eval_runtime": 26.043,
37
- "eval_samples_per_second": 3.84,
38
- "eval_steps_per_second": 0.499,
39
  "step": 40
40
  }
41
  ],
@@ -44,7 +44,7 @@
44
  "num_input_tokens_seen": 0,
45
  "num_train_epochs": 1,
46
  "save_steps": 50,
47
- "total_flos": 3253280243097600.0,
48
  "train_batch_size": 2,
49
  "trial_name": null,
50
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0012270691453463403,
13
+ "grad_norm": 0.05003070831298828,
14
+ "learning_rate": 0.00019981588314717073,
15
+ "loss": 2.6972,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.0012270691453463403,
20
+ "eval_loss": 2.2967841625213623,
21
+ "eval_runtime": 23.2641,
22
+ "eval_samples_per_second": 4.298,
23
+ "eval_steps_per_second": 0.559,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.0024541382906926807,
28
+ "grad_norm": 0.07180789858102798,
29
+ "learning_rate": 0.00019957039401006504,
30
+ "loss": 2.2022,
31
  "step": 40
32
  },
33
  {
34
  "epoch": 0.0024541382906926807,
35
+ "eval_loss": 2.068006992340088,
36
+ "eval_runtime": 23.5719,
37
+ "eval_samples_per_second": 4.242,
38
+ "eval_steps_per_second": 0.552,
39
  "step": 40
40
  }
41
  ],
 
44
  "num_input_tokens_seen": 0,
45
  "num_train_epochs": 1,
46
  "save_steps": 50,
47
+ "total_flos": 3276518679244800.0,
48
  "train_batch_size": 2,
49
  "trial_name": null,
50
  "trial_params": null