ioseff commited on
Commit
fc5a2ed
1 Parent(s): c2370ce

Upload 12 files

Browse files
README.md CHANGED
@@ -1,10 +1,6 @@
1
  ---
2
  library_name: peft
3
  base_model: mistralai/Mistral-7B-Instruct-v0.3
4
- license: apache-2.0
5
- language:
6
- - en
7
- pipeline_tag: text-generation
8
  ---
9
 
10
  # Model Card for Model ID
 
1
  ---
2
  library_name: peft
3
  base_model: mistralai/Mistral-7B-Instruct-v0.3
 
 
 
 
4
  ---
5
 
6
  # Model Card for Model ID
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:580a33032ba98e3506ee441f4e5f75780f1a03e1ee052c0a5133486f9f6cfd8f
3
  size 109069176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cec231ba6b91706ea2a50580577e766a95a3fe4af139bd3844daa155e75bc98
3
  size 109069176
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58f74c022ab8ac2dd6b4b0b7a61d586792ca5ffc6b996a4cd024f2179c52f128
3
  size 218182586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad4878e8a63e316ac11866123641662dc7216d50e67d57bfe0debada823dac3
3
  size 218182586
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb2b34e37db29df208daf0ddbe9d385e4ba6889399d10d69389d14c1cd292838
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6cbfb719333be9e6c83f55aaecd8c648686c99a7490787c95506dc1016037fe
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0a7343915c78125635d9ebba74d66cd6af1a76cc5481916839c6ed63f8cc757
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2651dbbb234a1169de9db4c1691e20ebcc2a6f2cad7a0b6f3fb47aa10c248f
3
  size 1064
trainer_state.json CHANGED
@@ -1,69 +1,118 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.7054673721340388,
13
- "grad_norm": 0.2144777774810791,
14
  "learning_rate": 0.0002,
15
- "loss": 1.3606,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 1.4109347442680775,
20
- "grad_norm": 0.18283842504024506,
21
  "learning_rate": 0.0002,
22
- "loss": 1.2686,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 2.1164021164021163,
27
- "grad_norm": 0.22576524317264557,
28
  "learning_rate": 0.0002,
29
  "loss": 1.2149,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 2.821869488536155,
34
- "grad_norm": 0.2537195384502411,
35
  "learning_rate": 0.0002,
36
- "loss": 1.1427,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 3.527336860670194,
41
- "grad_norm": 0.36884188652038574,
42
  "learning_rate": 0.0002,
43
- "loss": 1.0694,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 4.232804232804233,
48
- "grad_norm": 0.6655462384223938,
49
  "learning_rate": 0.0002,
50
  "loss": 0.9957,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 4.938271604938271,
55
- "grad_norm": 0.47897958755493164,
56
  "learning_rate": 0.0002,
57
- "loss": 0.972,
58
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
  ],
61
  "logging_steps": 100,
62
- "max_steps": 705,
63
  "num_input_tokens_seen": 0,
64
- "num_train_epochs": 5,
65
  "save_steps": 100,
66
- "total_flos": 8.23802837336064e+16,
67
  "train_batch_size": 2,
68
  "trial_name": null,
69
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.876543209876543,
5
  "eval_steps": 500,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.7054673721340388,
13
+ "grad_norm": 0.20514033734798431,
14
  "learning_rate": 0.0002,
15
+ "loss": 1.3614,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 1.4109347442680775,
20
+ "grad_norm": 0.18633601069450378,
21
  "learning_rate": 0.0002,
22
+ "loss": 1.2692,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 2.1164021164021163,
27
+ "grad_norm": 0.22059209644794464,
28
  "learning_rate": 0.0002,
29
  "loss": 1.2149,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 2.821869488536155,
34
+ "grad_norm": 0.2522888779640198,
35
  "learning_rate": 0.0002,
36
+ "loss": 1.1426,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 3.527336860670194,
41
+ "grad_norm": 0.3600117266178131,
42
  "learning_rate": 0.0002,
43
+ "loss": 1.0687,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 4.232804232804233,
48
+ "grad_norm": 0.617989718914032,
49
  "learning_rate": 0.0002,
50
  "loss": 0.9957,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 4.938271604938271,
55
+ "grad_norm": 0.5240411162376404,
56
  "learning_rate": 0.0002,
57
+ "loss": 0.973,
58
  "step": 700
59
+ },
60
+ {
61
+ "epoch": 5.64373897707231,
62
+ "grad_norm": 0.615224301815033,
63
+ "learning_rate": 0.0002,
64
+ "loss": 0.8472,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 6.349206349206349,
69
+ "grad_norm": 0.6250160932540894,
70
+ "learning_rate": 0.0002,
71
+ "loss": 0.7726,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 7.054673721340388,
76
+ "grad_norm": 0.7413871884346008,
77
+ "learning_rate": 0.0002,
78
+ "loss": 0.7079,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 7.760141093474427,
83
+ "grad_norm": 1.084841012954712,
84
+ "learning_rate": 0.0002,
85
+ "loss": 0.6037,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 8.465608465608465,
90
+ "grad_norm": 1.128200650215149,
91
+ "learning_rate": 0.0002,
92
+ "loss": 0.5464,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 9.171075837742505,
97
+ "grad_norm": 1.0372769832611084,
98
+ "learning_rate": 0.0002,
99
+ "loss": 0.5376,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 9.876543209876543,
104
+ "grad_norm": 1.0853991508483887,
105
+ "learning_rate": 0.0002,
106
+ "loss": 0.475,
107
+ "step": 1400
108
  }
109
  ],
110
  "logging_steps": 100,
111
+ "max_steps": 1410,
112
  "num_input_tokens_seen": 0,
113
+ "num_train_epochs": 10,
114
  "save_steps": 100,
115
+ "total_flos": 1.646216165505024e+17,
116
  "train_batch_size": 2,
117
  "trial_name": null,
118
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:881cde0630a4c41c4e442efc0a6da9df8e0c0644d3075b6726cc576bd0b7c0a8
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14239da92aa96b64b73efe5aa4434231ca52bdacd8fffc77ac6b77246788c490
3
  size 4984