AmberYifan commited on
Commit
2fe5333
1 Parent(s): a3f2a32

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7627417502864715,
5
- "train_runtime": 798.2471,
6
  "train_samples": 2000,
7
- "train_samples_per_second": 2.505,
8
- "train_steps_per_second": 0.078
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7615542123394627,
5
+ "train_runtime": 802.2647,
6
  "train_samples": 2000,
7
+ "train_samples_per_second": 2.493,
8
+ "train_steps_per_second": 0.077
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38d5040d27160b24c21d4c9c0e182742cd802ddf22bb0559297a7259e00efc31
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fef86d6bc419727e815913d0864fc6143a9e06797a92bc7694e158387164f4f
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d5942d061d9c3c3bcf20dbbae4581747318592ce798ae4a1a89d2e65bb9924
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d752f381c7bf27493d4b25acbf7fa46e8b7431f88a9bedd6a6bf9e21fbf9c28
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fb7cc9fc0ae1217b6e36e7e113d7c9fce7cff6b0d9b85b0b360895a13fa136e
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a489c2438e7429e922b665007cdb6175369757c4d2ef470e69f6d5889a5393bc
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e75093158ca8cf68884fb57d1bec7b306ae9598bf106e212c4606ec8eaae8d9
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925b4b04de518c13f0821f4dddb03e382d145281848209b06ff205ab8df40b4d
3
  size 1168138808
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.7627417502864715,
5
- "train_runtime": 798.2471,
6
  "train_samples": 2000,
7
- "train_samples_per_second": 2.505,
8
- "train_steps_per_second": 0.078
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7615542123394627,
5
+ "train_runtime": 802.2647,
6
  "train_samples": 2000,
7
+ "train_samples_per_second": 2.493,
8
+ "train_steps_per_second": 0.077
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
- "grad_norm": 406.82268355522706,
14
  "learning_rate": 7.142857142857142e-08,
15
  "logits/generated": -1.7205866575241089,
16
  "logits/real": -1.5578112602233887,
@@ -25,102 +25,102 @@
25
  },
26
  {
27
  "epoch": 0.16,
28
- "grad_norm": 255.30926008233882,
29
  "learning_rate": 4.727272727272727e-07,
30
- "logits/generated": -1.643042802810669,
31
- "logits/real": -1.5913658142089844,
32
- "logps/generated": -247.2132110595703,
33
- "logps/real": -229.58377075195312,
34
- "loss": 0.9075,
35
- "rewards/accuracies": 0.5138888955116272,
36
- "rewards/generated": 0.033381447196006775,
37
- "rewards/margins": 0.06060503050684929,
38
- "rewards/real": 0.09398648142814636,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
- "grad_norm": 126.87007618777977,
44
  "learning_rate": 3.818181818181818e-07,
45
- "logits/generated": -1.6706269979476929,
46
- "logits/real": -1.6231067180633545,
47
- "logps/generated": -255.0316619873047,
48
- "logps/real": -233.18115234375,
49
- "loss": 0.8444,
50
- "rewards/accuracies": 0.7250000238418579,
51
- "rewards/generated": -0.507367730140686,
52
- "rewards/margins": 0.4785459637641907,
53
- "rewards/real": -0.028821701183915138,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
- "grad_norm": 58.59915104165316,
59
  "learning_rate": 2.909090909090909e-07,
60
- "logits/generated": -1.6554796695709229,
61
- "logits/real": -1.5990644693374634,
62
- "logps/generated": -259.2175598144531,
63
- "logps/real": -229.8797607421875,
64
- "loss": 0.7261,
65
- "rewards/accuracies": 0.800000011920929,
66
- "rewards/generated": -0.31560593843460083,
67
- "rewards/margins": 0.8427440524101257,
68
- "rewards/real": 0.5271381139755249,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
- "grad_norm": 69.00503449133376,
74
  "learning_rate": 2e-07,
75
- "logits/generated": -1.6960480213165283,
76
- "logits/real": -1.6210428476333618,
77
- "logps/generated": -269.42431640625,
78
- "logps/real": -236.22348022460938,
79
- "loss": 0.6977,
80
- "rewards/accuracies": 0.824999988079071,
81
- "rewards/generated": -0.6807600259780884,
82
- "rewards/margins": 1.2224102020263672,
83
- "rewards/real": 0.5416500568389893,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
- "grad_norm": 57.46050240577938,
89
  "learning_rate": 1.0909090909090908e-07,
90
- "logits/generated": -1.7112897634506226,
91
- "logits/real": -1.6359144449234009,
92
- "logps/generated": -271.5468444824219,
93
- "logps/real": -238.4114532470703,
94
- "loss": 0.6996,
95
- "rewards/accuracies": 0.7749999761581421,
96
- "rewards/generated": -1.030948519706726,
97
- "rewards/margins": 1.2689697742462158,
98
- "rewards/real": 0.2380211055278778,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
- "grad_norm": 104.69079883597416,
104
  "learning_rate": 1.818181818181818e-08,
105
- "logits/generated": -1.7024548053741455,
106
- "logits/real": -1.6506109237670898,
107
- "logps/generated": -265.20843505859375,
108
- "logps/real": -237.0784454345703,
109
- "loss": 0.7147,
110
- "rewards/accuracies": 0.824999988079071,
111
- "rewards/generated": -0.9356173276901245,
112
- "rewards/margins": 1.1806285381317139,
113
- "rewards/real": 0.24501121044158936,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
- "train_loss": 0.7627417502864715,
121
- "train_runtime": 798.2471,
122
- "train_samples_per_second": 2.505,
123
- "train_steps_per_second": 0.078
124
  }
125
  ],
126
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
+ "grad_norm": 406.3304728752427,
14
  "learning_rate": 7.142857142857142e-08,
15
  "logits/generated": -1.7205866575241089,
16
  "logits/real": -1.5578112602233887,
 
25
  },
26
  {
27
  "epoch": 0.16,
28
+ "grad_norm": 270.8324793326741,
29
  "learning_rate": 4.727272727272727e-07,
30
+ "logits/generated": -1.6502946615219116,
31
+ "logits/real": -1.5962095260620117,
32
+ "logps/generated": -247.24267578125,
33
+ "logps/real": -229.2892303466797,
34
+ "loss": 0.9095,
35
+ "rewards/accuracies": 0.6111111044883728,
36
+ "rewards/generated": 0.03043537400662899,
37
+ "rewards/margins": 0.0930033028125763,
38
+ "rewards/real": 0.12343868613243103,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
+ "grad_norm": 107.75622187322996,
44
  "learning_rate": 3.818181818181818e-07,
45
+ "logits/generated": -1.6509218215942383,
46
+ "logits/real": -1.602124810218811,
47
+ "logps/generated": -250.4297637939453,
48
+ "logps/real": -229.20663452148438,
49
+ "loss": 0.8269,
50
+ "rewards/accuracies": 0.6625000238418579,
51
+ "rewards/generated": -0.0471772626042366,
52
+ "rewards/margins": 0.4158110022544861,
53
+ "rewards/real": 0.3686337471008301,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
+ "grad_norm": 56.227101064567556,
59
  "learning_rate": 2.909090909090909e-07,
60
+ "logits/generated": -1.6684995889663696,
61
+ "logits/real": -1.6101709604263306,
62
+ "logps/generated": -259.2822265625,
63
+ "logps/real": -230.04443359375,
64
+ "loss": 0.7224,
65
+ "rewards/accuracies": 0.7749999761581421,
66
+ "rewards/generated": -0.3220736086368561,
67
+ "rewards/margins": 0.8327458500862122,
68
+ "rewards/real": 0.5106722116470337,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
+ "grad_norm": 67.61806680856738,
74
  "learning_rate": 2e-07,
75
+ "logits/generated": -1.7005382776260376,
76
+ "logits/real": -1.6268432140350342,
77
+ "logps/generated": -267.0151672363281,
78
+ "logps/real": -235.13064575195312,
79
+ "loss": 0.7124,
80
+ "rewards/accuracies": 0.8125,
81
+ "rewards/generated": -0.43984347581863403,
82
+ "rewards/margins": 1.0907765626907349,
83
+ "rewards/real": 0.6509330868721008,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
+ "grad_norm": 64.83403783592586,
89
  "learning_rate": 1.0909090909090908e-07,
90
+ "logits/generated": -1.7158596515655518,
91
+ "logits/real": -1.6438223123550415,
92
+ "logps/generated": -269.775390625,
93
+ "logps/real": -237.33114624023438,
94
+ "loss": 0.711,
95
+ "rewards/accuracies": 0.7875000238418579,
96
+ "rewards/generated": -0.8538025617599487,
97
+ "rewards/margins": 1.199856162071228,
98
+ "rewards/real": 0.34605351090431213,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
+ "grad_norm": 112.17375370240751,
104
  "learning_rate": 1.818181818181818e-08,
105
+ "logits/generated": -1.7108463048934937,
106
+ "logits/real": -1.6622025966644287,
107
+ "logps/generated": -264.43609619140625,
108
+ "logps/real": -236.8936309814453,
109
+ "loss": 0.7073,
110
+ "rewards/accuracies": 0.7875000238418579,
111
+ "rewards/generated": -0.8583856821060181,
112
+ "rewards/margins": 1.1218775510787964,
113
+ "rewards/real": 0.2634918689727783,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
+ "train_loss": 0.7615542123394627,
121
+ "train_runtime": 802.2647,
122
+ "train_samples_per_second": 2.493,
123
+ "train_steps_per_second": 0.077
124
  }
125
  ],
126
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7b5ab46cfe9caa7e1ba22a39d38b2d8f60fa402b1de5d1038e9b55ab8a150ae
3
  size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:272c3175b7bf97aba5f7a059b43872d5721fcf13e3d41aa2103487725adbb20e
3
  size 6392