emendes3 commited on
Commit
35047d7
1 Parent(s): d2b0a65

End of training

Browse files
README.md CHANGED
@@ -4,23 +4,16 @@ tags:
4
  - generated_from_trainer
5
  base_model: liuhaotian/llava-v1.5-13b
6
  model-index:
7
- - name: llava_13b_exact_location_name_synthetic
8
  results: []
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
- # llava_13b_exact_location_name_synthetic
15
 
16
- This model is a fine-tuned version of [liuhaotian/llava-v1.5-13b_2.0](https://huggingface.co/liuhaotian/llava-v1.5-13b_2.0) on an unknown dataset.
17
- It achieves the following results on the evaluation set:
18
- - eval_loss: 0.2209
19
- - eval_runtime: 55.1656
20
- - eval_samples_per_second: 15.716
21
- - eval_steps_per_second: 0.508
22
- - epoch: 1.0
23
- - step: 28
24
 
25
  ## Model description
26
 
@@ -50,7 +43,7 @@ The following hyperparameters were used during training:
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: cosine
52
  - lr_scheduler_warmup_ratio: 0.03
53
- - num_epochs: 2.0
54
 
55
  ### Framework versions
56
 
 
4
  - generated_from_trainer
5
  base_model: liuhaotian/llava-v1.5-13b
6
  model-index:
7
+ - name: liuhaotian/llava-v1.5-13b_1.0
8
  results: []
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
+ # liuhaotian/llava-v1.5-13b_1.0
15
 
16
+ This model is a fine-tuned version of [liuhaotian/llava-v1.5-13b_1.0](https://huggingface.co/liuhaotian/llava-v1.5-13b_1.0) on an unknown dataset.
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: cosine
45
  - lr_scheduler_warmup_ratio: 0.03
46
+ - num_epochs: 10.0
47
 
48
  ### Framework versions
49
 
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
- "gate_proj",
25
- "up_proj",
26
  "q_proj",
27
- "down_proj",
28
  "o_proj",
29
- "v_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
  "k_proj",
 
 
25
  "q_proj",
 
26
  "o_proj",
27
+ "gate_proj",
28
+ "up_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5411dae2e1c74192ad746e7850b567ea81e74ac5ab615f27792ff9fe3c0e5923
3
  size 1001466944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67294656e681be2e83da3526407b218831e97f6f3e4e74c6beeb7c4425892eef
3
  size 1001466944
trainer_state.json CHANGED
@@ -17,49 +17,49 @@
17
  {
18
  "epoch": 0.07,
19
  "learning_rate": 0.0002,
20
- "loss": 1.2004,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.11,
25
  "learning_rate": 0.0002,
26
- "loss": 1.1709,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.14,
31
  "learning_rate": 0.0002,
32
- "loss": 1.0756,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.18,
37
  "learning_rate": 0.0002,
38
- "loss": 1.039,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.21,
43
  "learning_rate": 0.0002,
44
- "loss": 0.9718,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.25,
49
  "learning_rate": 0.0002,
50
- "loss": 0.922,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.29,
55
  "learning_rate": 0.0002,
56
- "loss": 0.9097,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.32,
61
  "learning_rate": 0.0002,
62
- "loss": 0.8924,
63
  "step": 9
64
  },
65
  {
@@ -71,303 +71,303 @@
71
  {
72
  "epoch": 0.39,
73
  "learning_rate": 0.0002,
74
- "loss": 0.8296,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.43,
79
  "learning_rate": 0.0002,
80
- "loss": 0.7891,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.46,
85
  "learning_rate": 0.0002,
86
- "loss": 0.7679,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.5,
91
  "learning_rate": 0.0002,
92
- "loss": 0.723,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.54,
97
  "learning_rate": 0.0002,
98
- "loss": 0.7228,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 0.57,
103
  "learning_rate": 0.0002,
104
- "loss": 0.6779,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 0.61,
109
  "learning_rate": 0.0002,
110
- "loss": 0.5896,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 0.64,
115
  "learning_rate": 0.0002,
116
- "loss": 0.6516,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 0.68,
121
  "learning_rate": 0.0002,
122
- "loss": 0.5621,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 0.71,
127
  "learning_rate": 0.0002,
128
- "loss": 0.6197,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 0.75,
133
  "learning_rate": 0.0002,
134
- "loss": 0.4754,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 0.79,
139
  "learning_rate": 0.0002,
140
- "loss": 0.4386,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 0.82,
145
  "learning_rate": 0.0002,
146
- "loss": 0.4126,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 0.86,
151
  "learning_rate": 0.0002,
152
- "loss": 0.3274,
153
  "step": 24
154
  },
155
  {
156
  "epoch": 0.89,
157
  "learning_rate": 0.0002,
158
- "loss": 0.3908,
159
  "step": 25
160
  },
161
  {
162
  "epoch": 0.93,
163
  "learning_rate": 0.0002,
164
- "loss": 0.3058,
165
  "step": 26
166
  },
167
  {
168
  "epoch": 0.96,
169
  "learning_rate": 0.0002,
170
- "loss": 0.3605,
171
  "step": 27
172
  },
173
  {
174
  "epoch": 1.0,
175
  "learning_rate": 0.0002,
176
- "loss": 0.2814,
177
  "step": 28
178
  },
179
  {
180
  "epoch": 1.0,
181
- "eval_loss": 0.22194796800613403,
182
- "eval_runtime": 56.9155,
183
- "eval_samples_per_second": 15.233,
184
- "eval_steps_per_second": 0.492,
185
  "step": 28
186
  },
187
  {
188
  "epoch": 1.04,
189
  "learning_rate": 0.0002,
190
- "loss": 0.2038,
191
  "step": 29
192
  },
193
  {
194
  "epoch": 1.07,
195
  "learning_rate": 0.0002,
196
- "loss": 0.2133,
197
  "step": 30
198
  },
199
  {
200
  "epoch": 1.11,
201
  "learning_rate": 0.0002,
202
- "loss": 0.1911,
203
  "step": 31
204
  },
205
  {
206
  "epoch": 1.14,
207
  "learning_rate": 0.0002,
208
- "loss": 0.1707,
209
  "step": 32
210
  },
211
  {
212
  "epoch": 1.18,
213
  "learning_rate": 0.0002,
214
- "loss": 0.1928,
215
  "step": 33
216
  },
217
  {
218
  "epoch": 1.21,
219
  "learning_rate": 0.0002,
220
- "loss": 0.1678,
221
  "step": 34
222
  },
223
  {
224
  "epoch": 1.25,
225
  "learning_rate": 0.0002,
226
- "loss": 0.2044,
227
  "step": 35
228
  },
229
  {
230
  "epoch": 1.29,
231
  "learning_rate": 0.0002,
232
- "loss": 0.1367,
233
  "step": 36
234
  },
235
  {
236
  "epoch": 1.32,
237
  "learning_rate": 0.0002,
238
- "loss": 0.1395,
239
  "step": 37
240
  },
241
  {
242
  "epoch": 1.36,
243
  "learning_rate": 0.0002,
244
- "loss": 0.0845,
245
  "step": 38
246
  },
247
  {
248
  "epoch": 1.39,
249
  "learning_rate": 0.0002,
250
- "loss": 0.093,
251
  "step": 39
252
  },
253
  {
254
  "epoch": 1.43,
255
  "learning_rate": 0.0002,
256
- "loss": 0.1559,
257
  "step": 40
258
  },
259
  {
260
  "epoch": 1.46,
261
  "learning_rate": 0.0002,
262
- "loss": 0.1009,
263
  "step": 41
264
  },
265
  {
266
  "epoch": 1.5,
267
  "learning_rate": 0.0002,
268
- "loss": 0.0909,
269
  "step": 42
270
  },
271
  {
272
  "epoch": 1.54,
273
  "learning_rate": 0.0002,
274
- "loss": 0.1177,
275
  "step": 43
276
  },
277
  {
278
  "epoch": 1.57,
279
  "learning_rate": 0.0002,
280
- "loss": 0.1206,
281
  "step": 44
282
  },
283
  {
284
  "epoch": 1.61,
285
  "learning_rate": 0.0002,
286
- "loss": 0.062,
287
  "step": 45
288
  },
289
  {
290
  "epoch": 1.64,
291
  "learning_rate": 0.0002,
292
- "loss": 0.0648,
293
  "step": 46
294
  },
295
  {
296
  "epoch": 1.68,
297
  "learning_rate": 0.0002,
298
- "loss": 0.0793,
299
  "step": 47
300
  },
301
  {
302
  "epoch": 1.71,
303
  "learning_rate": 0.0002,
304
- "loss": 0.1064,
305
  "step": 48
306
  },
307
  {
308
  "epoch": 1.75,
309
  "learning_rate": 0.0002,
310
- "loss": 0.0924,
311
  "step": 49
312
  },
313
  {
314
  "epoch": 1.79,
315
  "learning_rate": 0.0002,
316
- "loss": 0.0518,
317
  "step": 50
318
  },
319
  {
320
  "epoch": 1.82,
321
  "learning_rate": 0.0002,
322
- "loss": 0.1159,
323
  "step": 51
324
  },
325
  {
326
  "epoch": 1.86,
327
  "learning_rate": 0.0002,
328
- "loss": 0.0479,
329
  "step": 52
330
  },
331
  {
332
  "epoch": 1.89,
333
  "learning_rate": 0.0002,
334
- "loss": 0.1041,
335
  "step": 53
336
  },
337
  {
338
  "epoch": 1.93,
339
  "learning_rate": 0.0002,
340
- "loss": 0.1483,
341
  "step": 54
342
  },
343
  {
344
  "epoch": 1.96,
345
  "learning_rate": 0.0002,
346
- "loss": 0.0986,
347
  "step": 55
348
  },
349
  {
350
  "epoch": 2.0,
351
  "learning_rate": 0.0002,
352
- "loss": 0.0352,
353
  "step": 56
354
  },
355
  {
356
  "epoch": 2.0,
357
- "eval_loss": 0.03910892829298973,
358
- "eval_runtime": 56.8268,
359
- "eval_samples_per_second": 15.257,
360
- "eval_steps_per_second": 0.493,
361
  "step": 56
362
  },
363
  {
364
  "epoch": 2.0,
365
  "step": 56,
366
  "total_flos": 1.65447188707541e+17,
367
- "train_loss": 0.42645503600527135,
368
- "train_runtime": 568.7271,
369
- "train_samples_per_second": 3.049,
370
- "train_steps_per_second": 0.098
371
  }
372
  ],
373
  "logging_steps": 1.0,
 
17
  {
18
  "epoch": 0.07,
19
  "learning_rate": 0.0002,
20
+ "loss": 1.2012,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.11,
25
  "learning_rate": 0.0002,
26
+ "loss": 1.1717,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.14,
31
  "learning_rate": 0.0002,
32
+ "loss": 1.0755,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.18,
37
  "learning_rate": 0.0002,
38
+ "loss": 1.0384,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.21,
43
  "learning_rate": 0.0002,
44
+ "loss": 0.9714,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.25,
49
  "learning_rate": 0.0002,
50
+ "loss": 0.9222,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.29,
55
  "learning_rate": 0.0002,
56
+ "loss": 0.909,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.32,
61
  "learning_rate": 0.0002,
62
+ "loss": 0.8922,
63
  "step": 9
64
  },
65
  {
 
71
  {
72
  "epoch": 0.39,
73
  "learning_rate": 0.0002,
74
+ "loss": 0.8293,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.43,
79
  "learning_rate": 0.0002,
80
+ "loss": 0.7898,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.46,
85
  "learning_rate": 0.0002,
86
+ "loss": 0.768,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.5,
91
  "learning_rate": 0.0002,
92
+ "loss": 0.7237,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.54,
97
  "learning_rate": 0.0002,
98
+ "loss": 0.7225,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 0.57,
103
  "learning_rate": 0.0002,
104
+ "loss": 0.6761,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 0.61,
109
  "learning_rate": 0.0002,
110
+ "loss": 0.5894,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 0.64,
115
  "learning_rate": 0.0002,
116
+ "loss": 0.651,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 0.68,
121
  "learning_rate": 0.0002,
122
+ "loss": 0.5618,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 0.71,
127
  "learning_rate": 0.0002,
128
+ "loss": 0.6188,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 0.75,
133
  "learning_rate": 0.0002,
134
+ "loss": 0.4751,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 0.79,
139
  "learning_rate": 0.0002,
140
+ "loss": 0.439,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 0.82,
145
  "learning_rate": 0.0002,
146
+ "loss": 0.4123,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 0.86,
151
  "learning_rate": 0.0002,
152
+ "loss": 0.3256,
153
  "step": 24
154
  },
155
  {
156
  "epoch": 0.89,
157
  "learning_rate": 0.0002,
158
+ "loss": 0.3866,
159
  "step": 25
160
  },
161
  {
162
  "epoch": 0.93,
163
  "learning_rate": 0.0002,
164
+ "loss": 0.3048,
165
  "step": 26
166
  },
167
  {
168
  "epoch": 0.96,
169
  "learning_rate": 0.0002,
170
+ "loss": 0.361,
171
  "step": 27
172
  },
173
  {
174
  "epoch": 1.0,
175
  "learning_rate": 0.0002,
176
+ "loss": 0.2796,
177
  "step": 28
178
  },
179
  {
180
  "epoch": 1.0,
181
+ "eval_loss": 0.22090579569339752,
182
+ "eval_runtime": 55.1656,
183
+ "eval_samples_per_second": 15.716,
184
+ "eval_steps_per_second": 0.508,
185
  "step": 28
186
  },
187
  {
188
  "epoch": 1.04,
189
  "learning_rate": 0.0002,
190
+ "loss": 0.202,
191
  "step": 29
192
  },
193
  {
194
  "epoch": 1.07,
195
  "learning_rate": 0.0002,
196
+ "loss": 0.2117,
197
  "step": 30
198
  },
199
  {
200
  "epoch": 1.11,
201
  "learning_rate": 0.0002,
202
+ "loss": 0.1917,
203
  "step": 31
204
  },
205
  {
206
  "epoch": 1.14,
207
  "learning_rate": 0.0002,
208
+ "loss": 0.1686,
209
  "step": 32
210
  },
211
  {
212
  "epoch": 1.18,
213
  "learning_rate": 0.0002,
214
+ "loss": 0.1938,
215
  "step": 33
216
  },
217
  {
218
  "epoch": 1.21,
219
  "learning_rate": 0.0002,
220
+ "loss": 0.1667,
221
  "step": 34
222
  },
223
  {
224
  "epoch": 1.25,
225
  "learning_rate": 0.0002,
226
+ "loss": 0.2046,
227
  "step": 35
228
  },
229
  {
230
  "epoch": 1.29,
231
  "learning_rate": 0.0002,
232
+ "loss": 0.1348,
233
  "step": 36
234
  },
235
  {
236
  "epoch": 1.32,
237
  "learning_rate": 0.0002,
238
+ "loss": 0.1376,
239
  "step": 37
240
  },
241
  {
242
  "epoch": 1.36,
243
  "learning_rate": 0.0002,
244
+ "loss": 0.0836,
245
  "step": 38
246
  },
247
  {
248
  "epoch": 1.39,
249
  "learning_rate": 0.0002,
250
+ "loss": 0.0931,
251
  "step": 39
252
  },
253
  {
254
  "epoch": 1.43,
255
  "learning_rate": 0.0002,
256
+ "loss": 0.1543,
257
  "step": 40
258
  },
259
  {
260
  "epoch": 1.46,
261
  "learning_rate": 0.0002,
262
+ "loss": 0.0985,
263
  "step": 41
264
  },
265
  {
266
  "epoch": 1.5,
267
  "learning_rate": 0.0002,
268
+ "loss": 0.0891,
269
  "step": 42
270
  },
271
  {
272
  "epoch": 1.54,
273
  "learning_rate": 0.0002,
274
+ "loss": 0.1181,
275
  "step": 43
276
  },
277
  {
278
  "epoch": 1.57,
279
  "learning_rate": 0.0002,
280
+ "loss": 0.1194,
281
  "step": 44
282
  },
283
  {
284
  "epoch": 1.61,
285
  "learning_rate": 0.0002,
286
+ "loss": 0.0609,
287
  "step": 45
288
  },
289
  {
290
  "epoch": 1.64,
291
  "learning_rate": 0.0002,
292
+ "loss": 0.0644,
293
  "step": 46
294
  },
295
  {
296
  "epoch": 1.68,
297
  "learning_rate": 0.0002,
298
+ "loss": 0.0791,
299
  "step": 47
300
  },
301
  {
302
  "epoch": 1.71,
303
  "learning_rate": 0.0002,
304
+ "loss": 0.1023,
305
  "step": 48
306
  },
307
  {
308
  "epoch": 1.75,
309
  "learning_rate": 0.0002,
310
+ "loss": 0.0922,
311
  "step": 49
312
  },
313
  {
314
  "epoch": 1.79,
315
  "learning_rate": 0.0002,
316
+ "loss": 0.0534,
317
  "step": 50
318
  },
319
  {
320
  "epoch": 1.82,
321
  "learning_rate": 0.0002,
322
+ "loss": 0.1168,
323
  "step": 51
324
  },
325
  {
326
  "epoch": 1.86,
327
  "learning_rate": 0.0002,
328
+ "loss": 0.0473,
329
  "step": 52
330
  },
331
  {
332
  "epoch": 1.89,
333
  "learning_rate": 0.0002,
334
+ "loss": 0.1019,
335
  "step": 53
336
  },
337
  {
338
  "epoch": 1.93,
339
  "learning_rate": 0.0002,
340
+ "loss": 0.1488,
341
  "step": 54
342
  },
343
  {
344
  "epoch": 1.96,
345
  "learning_rate": 0.0002,
346
+ "loss": 0.1002,
347
  "step": 55
348
  },
349
  {
350
  "epoch": 2.0,
351
  "learning_rate": 0.0002,
352
+ "loss": 0.0347,
353
  "step": 56
354
  },
355
  {
356
  "epoch": 2.0,
357
+ "eval_loss": 0.03905528038740158,
358
+ "eval_runtime": 55.2135,
359
+ "eval_samples_per_second": 15.703,
360
+ "eval_steps_per_second": 0.507,
361
  "step": 56
362
  },
363
  {
364
  "epoch": 2.0,
365
  "step": 56,
366
  "total_flos": 1.65447188707541e+17,
367
+ "train_loss": 0.4258737172266202,
368
+ "train_runtime": 620.5334,
369
+ "train_samples_per_second": 2.794,
370
+ "train_steps_per_second": 0.09
371
  }
372
  ],
373
  "logging_steps": 1.0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca823d71f9f1dfe99c4c12b322baa8687b341261d17a6a38a37d4ca0be5ed1d7
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:916f5d701e6ee4989937509630b933bc38f9bbd0f859c786e9046ea2551d2f15
3
  size 6840