winnieyangwannan commited on
Commit
74ac789
·
verified ·
1 Parent(s): 70b3933

Training in progress, step 70, checkpoint

Browse files
checkpoint-70/adapter_config.json CHANGED
@@ -23,11 +23,11 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "k_proj",
27
  "v_proj",
28
- "o_proj",
29
- "gate_proj",
30
  "q_proj",
 
 
31
  "down_proj",
32
  "up_proj"
33
  ],
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "v_proj",
27
+ "k_proj",
 
28
  "q_proj",
29
+ "gate_proj",
30
+ "o_proj",
31
  "down_proj",
32
  "up_proj"
33
  ],
checkpoint-70/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:497c1a2809b16acf1e3de74649a6cd989d7bd32c4b1ca6d8c37789503276c86d
3
  size 145287696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47885d78c898d6ec34947040784edc7617a4de70342cc80bd6a7688601dbd6ed
3
  size 145287696
checkpoint-70/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2386bd2314f4c75cce6957016cf2ab951659d522db66de5ec387b34a52be8cf
3
  size 290833618
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f2bfd7a1ce01304f4307de61e6f2afea404747362f1b8e7da0f4cb7de6323b
3
  size 290833618
checkpoint-70/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9316543e09853a805290cd6f5a5bb7f3480626115c4cbde98ef584fa48396437
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:174965c0191f95cc5c3ff4ffb7dd99e0ad61f9f08edf482de6218a0cdc8be32f
3
  size 1064
checkpoint-70/trainer_state.json CHANGED
@@ -10,114 +10,114 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.021739130434782608,
13
- "grad_norm": 2.138256549835205,
14
- "learning_rate": 4.5e-05,
15
- "loss": 2.3657,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.021739130434782608,
20
- "eval_loss": 1.5849242210388184,
21
- "eval_runtime": 9.6002,
22
- "eval_samples_per_second": 49.999,
23
- "eval_steps_per_second": 1.562,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.043478260869565216,
28
- "grad_norm": 1.1203280687332153,
29
- "learning_rate": 4e-05,
30
- "loss": 1.2125,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 0.043478260869565216,
35
- "eval_loss": 1.04584538936615,
36
- "eval_runtime": 9.677,
37
- "eval_samples_per_second": 49.602,
38
- "eval_steps_per_second": 1.55,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.06521739130434782,
43
- "grad_norm": 1.064527988433838,
44
- "learning_rate": 3.5e-05,
45
- "loss": 0.867,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 0.06521739130434782,
50
- "eval_loss": 0.9389117360115051,
51
- "eval_runtime": 9.7352,
52
- "eval_samples_per_second": 49.305,
53
- "eval_steps_per_second": 1.541,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 0.08695652173913043,
58
- "grad_norm": 1.1056835651397705,
59
- "learning_rate": 3e-05,
60
- "loss": 0.7879,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 0.08695652173913043,
65
- "eval_loss": 0.8724283576011658,
66
- "eval_runtime": 9.8116,
67
- "eval_samples_per_second": 48.922,
68
  "eval_steps_per_second": 1.529,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 0.10869565217391304,
73
- "grad_norm": 0.8273067474365234,
74
- "learning_rate": 2.5e-05,
75
- "loss": 0.7057,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 0.10869565217391304,
80
- "eval_loss": 0.8029478788375854,
81
- "eval_runtime": 9.8368,
82
- "eval_samples_per_second": 48.797,
83
- "eval_steps_per_second": 1.525,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 0.13043478260869565,
88
- "grad_norm": 0.5044384598731995,
89
- "learning_rate": 2e-05,
90
- "loss": 0.6629,
91
  "step": 60
92
  },
93
  {
94
  "epoch": 0.13043478260869565,
95
- "eval_loss": 0.7792357802391052,
96
- "eval_runtime": 9.8462,
97
- "eval_samples_per_second": 48.75,
98
- "eval_steps_per_second": 1.523,
99
  "step": 60
100
  },
101
  {
102
  "epoch": 0.15217391304347827,
103
- "grad_norm": 0.5917642116546631,
104
- "learning_rate": 1.5e-05,
105
- "loss": 0.623,
106
  "step": 70
107
  },
108
  {
109
  "epoch": 0.15217391304347827,
110
- "eval_loss": 0.7707605957984924,
111
- "eval_runtime": 9.8356,
112
- "eval_samples_per_second": 48.803,
113
- "eval_steps_per_second": 1.525,
114
  "step": 70
115
  }
116
  ],
117
  "logging_steps": 10,
118
- "max_steps": 100,
119
  "num_input_tokens_seen": 0,
120
- "num_train_epochs": 1,
121
  "save_steps": 10,
122
  "stateful_callbacks": {
123
  "TrainerControl": {
 
10
  "log_history": [
11
  {
12
  "epoch": 0.021739130434782608,
13
+ "grad_norm": 2.2049312591552734,
14
+ "learning_rate": 4.963768115942029e-05,
15
+ "loss": 2.3459,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.021739130434782608,
20
+ "eval_loss": 1.5330805778503418,
21
+ "eval_runtime": 9.6608,
22
+ "eval_samples_per_second": 49.685,
23
+ "eval_steps_per_second": 1.553,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.043478260869565216,
28
+ "grad_norm": 1.1927831172943115,
29
+ "learning_rate": 4.9275362318840584e-05,
30
+ "loss": 1.1669,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 0.043478260869565216,
35
+ "eval_loss": 1.0271039009094238,
36
+ "eval_runtime": 9.7215,
37
+ "eval_samples_per_second": 49.375,
38
+ "eval_steps_per_second": 1.543,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.06521739130434782,
43
+ "grad_norm": 1.0509072542190552,
44
+ "learning_rate": 4.891304347826087e-05,
45
+ "loss": 0.8327,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 0.06521739130434782,
50
+ "eval_loss": 0.9039635062217712,
51
+ "eval_runtime": 9.7818,
52
+ "eval_samples_per_second": 49.071,
53
+ "eval_steps_per_second": 1.533,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 0.08695652173913043,
58
+ "grad_norm": 0.8860757350921631,
59
+ "learning_rate": 4.855072463768116e-05,
60
+ "loss": 0.731,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 0.08695652173913043,
65
+ "eval_loss": 0.8067704439163208,
66
+ "eval_runtime": 9.8118,
67
+ "eval_samples_per_second": 48.921,
68
  "eval_steps_per_second": 1.529,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 0.10869565217391304,
73
+ "grad_norm": 0.57489013671875,
74
+ "learning_rate": 4.818840579710145e-05,
75
+ "loss": 0.6501,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 0.10869565217391304,
80
+ "eval_loss": 0.7662845849990845,
81
+ "eval_runtime": 9.8455,
82
+ "eval_samples_per_second": 48.753,
83
+ "eval_steps_per_second": 1.524,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 0.13043478260869565,
88
+ "grad_norm": 0.5091506242752075,
89
+ "learning_rate": 4.782608695652174e-05,
90
+ "loss": 0.6288,
91
  "step": 60
92
  },
93
  {
94
  "epoch": 0.13043478260869565,
95
+ "eval_loss": 0.7400590181350708,
96
+ "eval_runtime": 9.8714,
97
+ "eval_samples_per_second": 48.626,
98
+ "eval_steps_per_second": 1.52,
99
  "step": 60
100
  },
101
  {
102
  "epoch": 0.15217391304347827,
103
+ "grad_norm": 0.5590857863426208,
104
+ "learning_rate": 4.746376811594203e-05,
105
+ "loss": 0.5838,
106
  "step": 70
107
  },
108
  {
109
  "epoch": 0.15217391304347827,
110
+ "eval_loss": 0.7244360446929932,
111
+ "eval_runtime": 9.8787,
112
+ "eval_samples_per_second": 48.589,
113
+ "eval_steps_per_second": 1.518,
114
  "step": 70
115
  }
116
  ],
117
  "logging_steps": 10,
118
+ "max_steps": 1380,
119
  "num_input_tokens_seen": 0,
120
+ "num_train_epochs": 3,
121
  "save_steps": 10,
122
  "stateful_callbacks": {
123
  "TrainerControl": {
checkpoint-70/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17a1a7c75c85fbba54f1e97b2cee73c167834890dd0df62950989daaf8975814
3
  size 5816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cdf2cbd96a7d8012e1f3da0569783099ba26a3ecbec680ff36cde09793d0889
3
  size 5816