vvduc03 commited on
Commit
1f1aabb
1 Parent(s): 81d6d1b

vvduc03/lora-llava-3b

Browse files
adapter_config.json CHANGED
@@ -19,9 +19,9 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "q_proj",
23
  "k_proj",
24
- "v_proj"
 
25
  ],
26
  "task_type": null
27
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
22
  "k_proj",
23
+ "v_proj",
24
+ "q_proj"
25
  ],
26
  "task_type": null
27
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:943d0f3883083f3d9820d0c1abd7af634249a9b793eae7189fe2599276553575
3
  size 47237856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf64a0c26cb48b3c7c523d0cf1b5a68debca5533ad7777755e0a6f735296173
3
  size 47237856
trainer_state.json CHANGED
@@ -10,90 +10,90 @@
10
  "log_history": [
11
  {
12
  "epoch": 20.0,
13
- "grad_norm": 0.38967031240463257,
14
  "learning_rate": 8e-05,
15
- "loss": 0.1783,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 40.0,
20
- "grad_norm": 0.009602731093764305,
21
  "learning_rate": 0.00016,
22
- "loss": 0.0093,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 60.0,
27
- "grad_norm": 0.000580370076932013,
28
  "learning_rate": 0.0001866666666666667,
29
  "loss": 0.0,
30
  "step": 60
31
  },
32
  {
33
  "epoch": 80.0,
34
- "grad_norm": 0.00020515041251201183,
35
  "learning_rate": 0.00016,
36
  "loss": 0.0,
37
  "step": 80
38
  },
39
  {
40
  "epoch": 100.0,
41
- "grad_norm": 0.00012300981325097382,
42
  "learning_rate": 0.00013333333333333334,
43
  "loss": 0.0,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 120.0,
48
- "grad_norm": 9.383377619087696e-05,
49
  "learning_rate": 0.00010666666666666667,
50
  "loss": 0.0,
51
  "step": 120
52
  },
53
  {
54
  "epoch": 140.0,
55
- "grad_norm": 8.079586405074224e-05,
56
  "learning_rate": 8e-05,
57
  "loss": 0.0,
58
  "step": 140
59
  },
60
  {
61
  "epoch": 160.0,
62
- "grad_norm": 7.359922165051103e-05,
63
  "learning_rate": 5.333333333333333e-05,
64
  "loss": 0.0,
65
  "step": 160
66
  },
67
  {
68
  "epoch": 180.0,
69
- "grad_norm": 6.797131209168583e-05,
70
  "learning_rate": 2.6666666666666667e-05,
71
  "loss": 0.0,
72
  "step": 180
73
  },
74
  {
75
  "epoch": 200.0,
76
- "grad_norm": 6.64097024127841e-05,
77
  "learning_rate": 0.0,
78
  "loss": 0.0,
79
  "step": 200
80
  },
81
  {
82
  "epoch": 200.0,
83
- "eval_loss": 1.2874310414190404e-05,
84
- "eval_runtime": 0.4025,
85
- "eval_samples_per_second": 2.485,
86
- "eval_steps_per_second": 2.485,
87
  "step": 200
88
  },
89
  {
90
  "epoch": 200.0,
91
  "step": 200,
92
  "total_flos": 1572670596710400.0,
93
- "train_loss": 0.018764802380228502,
94
- "train_runtime": 147.4566,
95
- "train_samples_per_second": 21.701,
96
- "train_steps_per_second": 1.356
97
  }
98
  ],
99
  "logging_steps": 20,
 
10
  "log_history": [
11
  {
12
  "epoch": 20.0,
13
+ "grad_norm": 0.38066378235816956,
14
  "learning_rate": 8e-05,
15
+ "loss": 0.1782,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 40.0,
20
+ "grad_norm": 0.008850287646055222,
21
  "learning_rate": 0.00016,
22
+ "loss": 0.0092,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 60.0,
27
+ "grad_norm": 0.0006731236935593188,
28
  "learning_rate": 0.0001866666666666667,
29
  "loss": 0.0,
30
  "step": 60
31
  },
32
  {
33
  "epoch": 80.0,
34
+ "grad_norm": 0.00021651879069395363,
35
  "learning_rate": 0.00016,
36
  "loss": 0.0,
37
  "step": 80
38
  },
39
  {
40
  "epoch": 100.0,
41
+ "grad_norm": 0.00013040985504630953,
42
  "learning_rate": 0.00013333333333333334,
43
  "loss": 0.0,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 120.0,
48
+ "grad_norm": 9.823329310165718e-05,
49
  "learning_rate": 0.00010666666666666667,
50
  "loss": 0.0,
51
  "step": 120
52
  },
53
  {
54
  "epoch": 140.0,
55
+ "grad_norm": 8.462095138384029e-05,
56
  "learning_rate": 8e-05,
57
  "loss": 0.0,
58
  "step": 140
59
  },
60
  {
61
  "epoch": 160.0,
62
+ "grad_norm": 7.46678197174333e-05,
63
  "learning_rate": 5.333333333333333e-05,
64
  "loss": 0.0,
65
  "step": 160
66
  },
67
  {
68
  "epoch": 180.0,
69
+ "grad_norm": 6.831328937551007e-05,
70
  "learning_rate": 2.6666666666666667e-05,
71
  "loss": 0.0,
72
  "step": 180
73
  },
74
  {
75
  "epoch": 200.0,
76
+ "grad_norm": 6.943791231606156e-05,
77
  "learning_rate": 0.0,
78
  "loss": 0.0,
79
  "step": 200
80
  },
81
  {
82
  "epoch": 200.0,
83
+ "eval_loss": 1.2972288459422998e-05,
84
+ "eval_runtime": 0.4099,
85
+ "eval_samples_per_second": 2.439,
86
+ "eval_steps_per_second": 2.439,
87
  "step": 200
88
  },
89
  {
90
  "epoch": 200.0,
91
  "step": 200,
92
  "total_flos": 1572670596710400.0,
93
+ "train_loss": 0.018746382653935145,
94
+ "train_runtime": 146.7988,
95
+ "train_samples_per_second": 21.799,
96
+ "train_steps_per_second": 1.362
97
  }
98
  ],
99
  "logging_steps": 20,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97a0369021c64d53315ac38195fdc982e14c3b117e8662c6734eec42e120a343
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de8523b3104361a991d2d52bf0069c4f84e685c4be6c6aada009e81b1c325515
3
  size 5112