AmberYifan commited on
Commit
dce475a
1 Parent(s): 7b9d965

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.11663837057928886,
5
- "train_runtime": 749.8498,
6
  "train_samples": 1999,
7
- "train_samples_per_second": 2.666,
8
- "train_steps_per_second": 0.083
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.10689828472752724,
5
+ "train_runtime": 796.2858,
6
  "train_samples": 1999,
7
+ "train_samples_per_second": 2.51,
8
+ "train_steps_per_second": 0.078
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3713e12ac5f0eaa7b9be1100cbb0063059bef563f96dbddbdeb7155ca520f223
3
  size 4949453792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28f98ac0660e96a5060de697a7425d8b1ded924d6467ea848f622456ce98614
3
  size 4949453792
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:061e5f5bf83531ae381007b5f84f6b5dcfeeab13cb5e9b578446e4f773eea97b
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd6e9d95663fc3a9e5c1682afa197be3195382029688aaf891727289345c3fb5
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87b37957d2c2faba2f5d8b333de4c3984232cfdaa3950238aada2f3e966fb4ba
3
  size 4546807800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b041eede0b6435f399b5424767050eadadee3fe095c4ab9febf64739c59403e
3
  size 4546807800
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
- "train_loss": 0.11663837057928886,
5
- "train_runtime": 749.8498,
6
  "train_samples": 1999,
7
- "train_samples_per_second": 2.666,
8
- "train_steps_per_second": 0.083
9
  }
 
1
  {
2
  "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.10689828472752724,
5
+ "train_runtime": 796.2858,
6
  "train_samples": 1999,
7
+ "train_samples_per_second": 2.51,
8
+ "train_steps_per_second": 0.078
9
  }
trainer_state.json CHANGED
@@ -10,13 +10,13 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
- "grad_norm": 270.6478620436586,
14
  "learning_rate": 7.142857142857142e-08,
15
- "logits/generated": -2.8518388271331787,
16
- "logits/real": -2.431556224822998,
17
- "logps/generated": -241.12493896484375,
18
- "logps/real": -88.04447174072266,
19
- "loss": 0.788,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
@@ -25,102 +25,102 @@
25
  },
26
  {
27
  "epoch": 0.16,
28
- "grad_norm": 3.4825336012871664,
29
  "learning_rate": 4.727272727272727e-07,
30
- "logits/generated": -3.112872362136841,
31
- "logits/real": -2.3448193073272705,
32
- "logps/generated": -285.2105712890625,
33
- "logps/real": -104.45608520507812,
34
- "loss": 0.279,
35
  "rewards/accuracies": 0.8888888955116272,
36
- "rewards/generated": -3.95505690574646,
37
- "rewards/margins": 5.338830471038818,
38
- "rewards/real": 1.3837733268737793,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
- "grad_norm": 0.900671571621766,
44
  "learning_rate": 3.818181818181818e-07,
45
- "logits/generated": -3.139878749847412,
46
- "logits/real": -2.3643927574157715,
47
- "logps/generated": -320.77362060546875,
48
- "logps/real": -73.03101348876953,
49
- "loss": 0.0785,
50
  "rewards/accuracies": 1.0,
51
- "rewards/generated": -8.526379585266113,
52
- "rewards/margins": 11.384855270385742,
53
- "rewards/real": 2.858475923538208,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
- "grad_norm": 1.4149577906908892,
59
  "learning_rate": 2.909090909090909e-07,
60
- "logits/generated": -3.261312484741211,
61
- "logits/real": -2.3303236961364746,
62
- "logps/generated": -352.6148376464844,
63
- "logps/real": -65.670654296875,
64
- "loss": 0.0706,
65
  "rewards/accuracies": 1.0,
66
- "rewards/generated": -10.558695793151855,
67
- "rewards/margins": 13.603212356567383,
68
- "rewards/real": 3.0445168018341064,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
- "grad_norm": 1.78406972696896,
74
  "learning_rate": 2e-07,
75
- "logits/generated": -3.309864044189453,
76
- "logits/real": -2.4957680702209473,
77
- "logps/generated": -344.9488525390625,
78
- "logps/real": -93.11787414550781,
79
- "loss": 0.0814,
80
  "rewards/accuracies": 1.0,
81
- "rewards/generated": -10.671560287475586,
82
- "rewards/margins": 14.303810119628906,
83
- "rewards/real": 3.632251262664795,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
- "grad_norm": 0.918331726023657,
89
  "learning_rate": 1.0909090909090908e-07,
90
- "logits/generated": -3.2701480388641357,
91
- "logits/real": -2.330775737762451,
92
- "logps/generated": -352.88299560546875,
93
- "logps/real": -85.17743682861328,
94
- "loss": 0.0762,
95
  "rewards/accuracies": 1.0,
96
- "rewards/generated": -10.926799774169922,
97
- "rewards/margins": 14.398449897766113,
98
- "rewards/real": 3.4716498851776123,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
- "grad_norm": 0.8334083410423753,
104
  "learning_rate": 1.818181818181818e-08,
105
- "logits/generated": -3.3187618255615234,
106
- "logits/real": -2.187263011932373,
107
- "logps/generated": -343.95556640625,
108
- "logps/real": -66.73863983154297,
109
- "loss": 0.0748,
110
  "rewards/accuracies": 1.0,
111
- "rewards/generated": -10.801365852355957,
112
- "rewards/margins": 14.124551773071289,
113
- "rewards/real": 3.3231849670410156,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
- "train_loss": 0.11663837057928886,
121
- "train_runtime": 749.8498,
122
- "train_samples_per_second": 2.666,
123
- "train_steps_per_second": 0.083
124
  }
125
  ],
126
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.016,
13
+ "grad_norm": 346.3062021602437,
14
  "learning_rate": 7.142857142857142e-08,
15
+ "logits/generated": -3.196486711502075,
16
+ "logits/real": -2.3527207374572754,
17
+ "logps/generated": -231.1735382080078,
18
+ "logps/real": -99.6369400024414,
19
+ "loss": 0.7803,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.16,
28
+ "grad_norm": 2.4299127951132924,
29
  "learning_rate": 4.727272727272727e-07,
30
+ "logits/generated": -3.081599235534668,
31
+ "logits/real": -2.270655870437622,
32
+ "logps/generated": -280.790771484375,
33
+ "logps/real": -82.0130844116211,
34
+ "loss": 0.2552,
35
  "rewards/accuracies": 0.8888888955116272,
36
+ "rewards/generated": -4.4418439865112305,
37
+ "rewards/margins": 5.652818202972412,
38
+ "rewards/real": 1.2109735012054443,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.32,
43
+ "grad_norm": 0.9060775786875579,
44
  "learning_rate": 3.818181818181818e-07,
45
+ "logits/generated": -3.208840847015381,
46
+ "logits/real": -2.1548514366149902,
47
+ "logps/generated": -326.5088195800781,
48
+ "logps/real": -72.36115264892578,
49
+ "loss": 0.0712,
50
  "rewards/accuracies": 1.0,
51
+ "rewards/generated": -9.6439790725708,
52
+ "rewards/margins": 12.68847370147705,
53
+ "rewards/real": 3.044494867324829,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.48,
58
+ "grad_norm": 0.9746129145559742,
59
  "learning_rate": 2.909090909090909e-07,
60
+ "logits/generated": -3.225053071975708,
61
+ "logits/real": -2.0585813522338867,
62
+ "logps/generated": -353.51434326171875,
63
+ "logps/real": -57.8585090637207,
64
+ "loss": 0.0641,
65
  "rewards/accuracies": 1.0,
66
+ "rewards/generated": -11.199175834655762,
67
+ "rewards/margins": 14.336624145507812,
68
+ "rewards/real": 3.137446165084839,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.64,
73
+ "grad_norm": 1.114683085793999,
74
  "learning_rate": 2e-07,
75
+ "logits/generated": -3.1570346355438232,
76
+ "logits/real": -2.1495680809020996,
77
+ "logps/generated": -325.97515869140625,
78
+ "logps/real": -78.28927612304688,
79
+ "loss": 0.075,
80
  "rewards/accuracies": 1.0,
81
+ "rewards/generated": -10.375910758972168,
82
+ "rewards/margins": 13.806567192077637,
83
+ "rewards/real": 3.4306564331054688,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.8,
88
+ "grad_norm": 0.7978164295236632,
89
  "learning_rate": 1.0909090909090908e-07,
90
+ "logits/generated": -3.2370517253875732,
91
+ "logits/real": -2.0745859146118164,
92
+ "logps/generated": -342.4796447753906,
93
+ "logps/real": -65.09725189208984,
94
+ "loss": 0.0641,
95
  "rewards/accuracies": 1.0,
96
+ "rewards/generated": -10.8725004196167,
97
+ "rewards/margins": 14.377031326293945,
98
+ "rewards/real": 3.5045323371887207,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.96,
103
+ "grad_norm": 0.7374328187221965,
104
  "learning_rate": 1.818181818181818e-08,
105
+ "logits/generated": -3.1993775367736816,
106
+ "logits/real": -2.025631904602051,
107
+ "logps/generated": -341.24053955078125,
108
+ "logps/real": -61.965667724609375,
109
+ "loss": 0.0622,
110
  "rewards/accuracies": 1.0,
111
+ "rewards/generated": -11.041067123413086,
112
+ "rewards/margins": 14.699417114257812,
113
+ "rewards/real": 3.6583499908447266,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.992,
118
  "step": 62,
119
  "total_flos": 0.0,
120
+ "train_loss": 0.10689828472752724,
121
+ "train_runtime": 796.2858,
122
+ "train_samples_per_second": 2.51,
123
+ "train_steps_per_second": 0.078
124
  }
125
  ],
126
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fdd5b8d1c52afaee4e300c7319d3b6fa0725347c935b0ac2dc3f707fa843493
3
  size 6456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66cc83367e99f19f41dca5c4ee324c54160a387192d3d367a0aa5a8885cd12c
3
  size 6456