CharlesLi commited on
Commit
e57e851
·
verified ·
1 Parent(s): a36093c

Model save

Browse files
Files changed (4) hide show
  1. README.md +11 -12
  2. all_results.json +6 -11
  3. train_results.json +6 -6
  4. trainer_state.json +84 -84
README.md CHANGED
@@ -3,7 +3,6 @@ base_model: meta-llama/Llama-2-7b-chat-hf
3
  library_name: peft
4
  license: llama2
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - sft
9
  - generated_from_trainer
@@ -19,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.7570
23
 
24
  ## Model description
25
 
@@ -56,16 +55,16 @@ The following hyperparameters were used during training:
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:------:|:----:|:---------------:|
59
- | 2.0231 | 0.7692 | 5 | 1.8074 |
60
- | 1.2461 | 1.5385 | 10 | 1.3001 |
61
- | 1.0347 | 2.3077 | 15 | 1.1189 |
62
- | 1.0003 | 3.0769 | 20 | 1.0753 |
63
- | 0.9236 | 4.0769 | 25 | 0.9328 |
64
- | 0.7969 | 4.8462 | 30 | 0.8437 |
65
- | 0.7475 | 5.6154 | 35 | 0.7918 |
66
- | 0.7378 | 6.3846 | 40 | 0.7680 |
67
- | 0.6801 | 7.1538 | 45 | 0.7585 |
68
- | 0.6816 | 7.9231 | 50 | 0.7570 |
69
 
70
 
71
  ### Framework versions
 
3
  library_name: peft
4
  license: llama2
5
  tags:
 
6
  - trl
7
  - sft
8
  - generated_from_trainer
 
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.7426
22
 
23
  ## Model description
24
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:------:|:----:|:---------------:|
58
+ | 2.1683 | 0.7692 | 5 | 2.2065 |
59
+ | 1.3617 | 1.5385 | 10 | 1.3566 |
60
+ | 1.038 | 2.3077 | 15 | 0.9891 |
61
+ | 0.8968 | 3.0769 | 20 | 0.8804 |
62
+ | 0.7897 | 3.8462 | 25 | 0.8087 |
63
+ | 0.6971 | 4.6154 | 30 | 0.7661 |
64
+ | 0.6746 | 5.3846 | 35 | 0.7485 |
65
+ | 0.6274 | 6.1538 | 40 | 0.7473 |
66
+ | 0.6142 | 6.9231 | 45 | 0.7422 |
67
+ | 0.589 | 7.6923 | 50 | 0.7426 |
68
 
69
 
70
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
- "epoch": 7.923076923076923,
3
- "eval_loss": 0.7570110559463501,
4
- "eval_runtime": 1.3606,
5
- "eval_samples": 20,
6
- "eval_samples_per_second": 14.7,
7
- "eval_steps_per_second": 2.205,
8
- "total_flos": 1135546662912.0,
9
- "train_loss": 0.4567394733428955,
10
- "train_runtime": 115.4286,
11
  "train_samples": 100,
12
- "train_samples_per_second": 6.931,
13
- "train_steps_per_second": 0.433
14
  }
 
1
  {
2
+ "epoch": 7.6923076923076925,
3
+ "total_flos": 1123928997888.0,
4
+ "train_loss": 0.9408295249938965,
5
+ "train_runtime": 179.2278,
 
 
 
 
 
6
  "train_samples": 100,
7
+ "train_samples_per_second": 4.464,
8
+ "train_steps_per_second": 0.279
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.923076923076923,
3
- "total_flos": 1135546662912.0,
4
- "train_loss": 0.4567394733428955,
5
- "train_runtime": 115.4286,
6
  "train_samples": 100,
7
- "train_samples_per_second": 6.931,
8
- "train_steps_per_second": 0.433
9
  }
 
1
  {
2
+ "epoch": 7.6923076923076925,
3
+ "total_flos": 1123928997888.0,
4
+ "train_loss": 0.9408295249938965,
5
+ "train_runtime": 179.2278,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 4.464,
8
+ "train_steps_per_second": 0.279
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.923076923076923,
5
  "eval_steps": 5,
6
  "global_step": 50,
7
  "is_hyper_param_search": false,
@@ -10,169 +10,169 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.15384615384615385,
13
- "grad_norm": 0.9935800099826351,
14
- "learning_rate": 0.0001,
15
  "loss": 1.9258,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.7692307692307693,
20
- "grad_norm": 1.2486035475371888,
21
- "learning_rate": 0.00018660254037844388,
22
- "loss": 2.0231,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.7692307692307693,
27
- "eval_loss": 1.8074404001235962,
28
- "eval_runtime": 2.6326,
29
- "eval_samples_per_second": 7.597,
30
  "eval_steps_per_second": 1.14,
31
  "step": 5
32
  },
33
  {
34
  "epoch": 1.5384615384615383,
35
- "grad_norm": 0.6291642388150667,
36
- "learning_rate": 0.00011736481776669306,
37
- "loss": 1.2461,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 1.5384615384615383,
42
- "eval_loss": 1.3001079559326172,
43
- "eval_runtime": 1.3446,
44
- "eval_samples_per_second": 14.874,
45
- "eval_steps_per_second": 2.231,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 2.3076923076923075,
50
- "grad_norm": 0.44475405058884243,
51
- "learning_rate": 3.5721239031346066e-05,
52
- "loss": 1.0347,
53
  "step": 15
54
  },
55
  {
56
  "epoch": 2.3076923076923075,
57
- "eval_loss": 1.1189016103744507,
58
- "eval_runtime": 1.34,
59
- "eval_samples_per_second": 14.926,
60
  "eval_steps_per_second": 2.239,
61
  "step": 15
62
  },
63
  {
64
  "epoch": 3.076923076923077,
65
- "grad_norm": 0.5502394281465781,
66
- "learning_rate": 0.0,
67
- "loss": 1.0003,
68
  "step": 20
69
  },
70
  {
71
  "epoch": 3.076923076923077,
72
- "eval_loss": 1.0752954483032227,
73
- "eval_runtime": 1.3421,
74
- "eval_samples_per_second": 14.902,
75
- "eval_steps_per_second": 2.235,
76
  "step": 20
77
  },
78
  {
79
- "epoch": 4.076923076923077,
80
- "grad_norm": 0.9000202028751579,
81
  "learning_rate": 0.00011736481776669306,
82
- "loss": 0.9236,
83
  "step": 25
84
  },
85
  {
86
- "epoch": 4.076923076923077,
87
- "eval_loss": 0.9328290820121765,
88
- "eval_runtime": 2.5854,
89
- "eval_samples_per_second": 7.736,
90
- "eval_steps_per_second": 1.16,
91
  "step": 25
92
  },
93
  {
94
- "epoch": 4.846153846153846,
95
- "grad_norm": 2.9290091595676615,
96
  "learning_rate": 8.263518223330697e-05,
97
- "loss": 0.7969,
98
  "step": 30
99
  },
100
  {
101
- "epoch": 4.846153846153846,
102
- "eval_loss": 0.8436583280563354,
103
- "eval_runtime": 1.3222,
104
- "eval_samples_per_second": 15.126,
105
- "eval_steps_per_second": 2.269,
106
  "step": 30
107
  },
108
  {
109
- "epoch": 5.615384615384615,
110
- "grad_norm": 0.39504894482406727,
111
  "learning_rate": 5.000000000000002e-05,
112
- "loss": 0.7475,
113
  "step": 35
114
  },
115
  {
116
- "epoch": 5.615384615384615,
117
- "eval_loss": 0.7918258905410767,
118
- "eval_runtime": 1.3346,
119
- "eval_samples_per_second": 14.986,
120
- "eval_steps_per_second": 2.248,
121
  "step": 35
122
  },
123
  {
124
- "epoch": 6.384615384615385,
125
- "grad_norm": 0.31285894816224147,
126
  "learning_rate": 2.339555568810221e-05,
127
- "loss": 0.7378,
128
  "step": 40
129
  },
130
  {
131
- "epoch": 6.384615384615385,
132
- "eval_loss": 0.7679780125617981,
133
- "eval_runtime": 1.3224,
134
- "eval_samples_per_second": 15.124,
135
- "eval_steps_per_second": 2.269,
136
  "step": 40
137
  },
138
  {
139
- "epoch": 7.153846153846154,
140
- "grad_norm": 0.21948125726865245,
141
  "learning_rate": 6.030737921409169e-06,
142
- "loss": 0.6801,
143
  "step": 45
144
  },
145
  {
146
- "epoch": 7.153846153846154,
147
- "eval_loss": 0.7584531903266907,
148
- "eval_runtime": 1.3195,
149
- "eval_samples_per_second": 15.157,
150
- "eval_steps_per_second": 2.274,
151
  "step": 45
152
  },
153
  {
154
- "epoch": 7.923076923076923,
155
- "grad_norm": 0.307143919357652,
156
  "learning_rate": 0.0,
157
- "loss": 0.6816,
158
  "step": 50
159
  },
160
  {
161
- "epoch": 7.923076923076923,
162
- "eval_loss": 0.7570110559463501,
163
- "eval_runtime": 1.3241,
164
- "eval_samples_per_second": 15.104,
165
- "eval_steps_per_second": 2.266,
166
  "step": 50
167
  },
168
  {
169
- "epoch": 7.923076923076923,
170
  "step": 50,
171
- "total_flos": 1135546662912.0,
172
- "train_loss": 0.4567394733428955,
173
- "train_runtime": 115.4286,
174
- "train_samples_per_second": 6.931,
175
- "train_steps_per_second": 0.433
176
  }
177
  ],
178
  "logging_steps": 5,
@@ -192,7 +192,7 @@
192
  "attributes": {}
193
  }
194
  },
195
- "total_flos": 1135546662912.0,
196
  "train_batch_size": 4,
197
  "trial_name": null,
198
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.6923076923076925,
5
  "eval_steps": 5,
6
  "global_step": 50,
7
  "is_hyper_param_search": false,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.15384615384615385,
13
+ "grad_norm": 0.9936393869149185,
14
+ "learning_rate": 4e-05,
15
  "loss": 1.9258,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.7692307692307693,
20
+ "grad_norm": 1.2614418414941255,
21
+ "learning_rate": 0.0002,
22
+ "loss": 2.1683,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.7692307692307693,
27
+ "eval_loss": 2.206515073776245,
28
+ "eval_runtime": 2.6311,
29
+ "eval_samples_per_second": 7.601,
30
  "eval_steps_per_second": 1.14,
31
  "step": 5
32
  },
33
  {
34
  "epoch": 1.5384615384615383,
35
+ "grad_norm": 0.665258996028448,
36
+ "learning_rate": 0.00019396926207859084,
37
+ "loss": 1.3617,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 1.5384615384615383,
42
+ "eval_loss": 1.3565729856491089,
43
+ "eval_runtime": 1.3409,
44
+ "eval_samples_per_second": 14.915,
45
+ "eval_steps_per_second": 2.237,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 2.3076923076923075,
50
+ "grad_norm": 0.6002049032359591,
51
+ "learning_rate": 0.0001766044443118978,
52
+ "loss": 1.038,
53
  "step": 15
54
  },
55
  {
56
  "epoch": 2.3076923076923075,
57
+ "eval_loss": 0.989143967628479,
58
+ "eval_runtime": 1.3397,
59
+ "eval_samples_per_second": 14.928,
60
  "eval_steps_per_second": 2.239,
61
  "step": 15
62
  },
63
  {
64
  "epoch": 3.076923076923077,
65
+ "grad_norm": 0.4625631758370509,
66
+ "learning_rate": 0.00015000000000000001,
67
+ "loss": 0.8968,
68
  "step": 20
69
  },
70
  {
71
  "epoch": 3.076923076923077,
72
+ "eval_loss": 0.8804405927658081,
73
+ "eval_runtime": 1.33,
74
+ "eval_samples_per_second": 15.038,
75
+ "eval_steps_per_second": 2.256,
76
  "step": 20
77
  },
78
  {
79
+ "epoch": 3.8461538461538463,
80
+ "grad_norm": 0.8748995670117777,
81
  "learning_rate": 0.00011736481776669306,
82
+ "loss": 0.7897,
83
  "step": 25
84
  },
85
  {
86
+ "epoch": 3.8461538461538463,
87
+ "eval_loss": 0.8086840510368347,
88
+ "eval_runtime": 1.3392,
89
+ "eval_samples_per_second": 14.934,
90
+ "eval_steps_per_second": 2.24,
91
  "step": 25
92
  },
93
  {
94
+ "epoch": 4.615384615384615,
95
+ "grad_norm": 0.20438610348674396,
96
  "learning_rate": 8.263518223330697e-05,
97
+ "loss": 0.6971,
98
  "step": 30
99
  },
100
  {
101
+ "epoch": 4.615384615384615,
102
+ "eval_loss": 0.7661463618278503,
103
+ "eval_runtime": 1.3535,
104
+ "eval_samples_per_second": 14.777,
105
+ "eval_steps_per_second": 2.217,
106
  "step": 30
107
  },
108
  {
109
+ "epoch": 5.384615384615385,
110
+ "grad_norm": 0.24634077551831257,
111
  "learning_rate": 5.000000000000002e-05,
112
+ "loss": 0.6746,
113
  "step": 35
114
  },
115
  {
116
+ "epoch": 5.384615384615385,
117
+ "eval_loss": 0.7485342025756836,
118
+ "eval_runtime": 1.3278,
119
+ "eval_samples_per_second": 15.062,
120
+ "eval_steps_per_second": 2.259,
121
  "step": 35
122
  },
123
  {
124
+ "epoch": 6.153846153846154,
125
+ "grad_norm": 0.37266569177931286,
126
  "learning_rate": 2.339555568810221e-05,
127
+ "loss": 0.6274,
128
  "step": 40
129
  },
130
  {
131
+ "epoch": 6.153846153846154,
132
+ "eval_loss": 0.7472798228263855,
133
+ "eval_runtime": 1.3315,
134
+ "eval_samples_per_second": 15.02,
135
+ "eval_steps_per_second": 2.253,
136
  "step": 40
137
  },
138
  {
139
+ "epoch": 6.923076923076923,
140
+ "grad_norm": 0.27018938207785786,
141
  "learning_rate": 6.030737921409169e-06,
142
+ "loss": 0.6142,
143
  "step": 45
144
  },
145
  {
146
+ "epoch": 6.923076923076923,
147
+ "eval_loss": 0.7422198057174683,
148
+ "eval_runtime": 1.3271,
149
+ "eval_samples_per_second": 15.07,
150
+ "eval_steps_per_second": 2.26,
151
  "step": 45
152
  },
153
  {
154
+ "epoch": 7.6923076923076925,
155
+ "grad_norm": 0.3257049326382863,
156
  "learning_rate": 0.0,
157
+ "loss": 0.589,
158
  "step": 50
159
  },
160
  {
161
+ "epoch": 7.6923076923076925,
162
+ "eval_loss": 0.7426176071166992,
163
+ "eval_runtime": 1.3347,
164
+ "eval_samples_per_second": 14.985,
165
+ "eval_steps_per_second": 2.248,
166
  "step": 50
167
  },
168
  {
169
+ "epoch": 7.6923076923076925,
170
  "step": 50,
171
+ "total_flos": 1123928997888.0,
172
+ "train_loss": 0.9408295249938965,
173
+ "train_runtime": 179.2278,
174
+ "train_samples_per_second": 4.464,
175
+ "train_steps_per_second": 0.279
176
  }
177
  ],
178
  "logging_steps": 5,
 
192
  "attributes": {}
193
  }
194
  },
195
+ "total_flos": 1123928997888.0,
196
  "train_batch_size": 4,
197
  "trial_name": null,
198
  "trial_params": null