SonishMaharjan commited on
Commit
e1d8ac9
1 Parent(s): d747632

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +4 -4
  2. train_results.json +4 -4
  3. trainer_state.json +56 -56
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
- "train_loss": 0.2613685036960401,
5
- "train_runtime": 863.1868,
6
- "train_samples_per_second": 17.141,
7
- "train_steps_per_second": 0.132
8
  }
 
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
+ "train_loss": 0.16734615334293299,
5
+ "train_runtime": 848.5959,
6
+ "train_samples_per_second": 17.436,
7
+ "train_steps_per_second": 0.134
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
- "train_loss": 0.2613685036960401,
5
- "train_runtime": 863.1868,
6
- "train_samples_per_second": 17.141,
7
- "train_steps_per_second": 0.132
8
  }
 
1
  {
2
  "epoch": 2.94,
3
  "total_flos": 1.1265748270391624e+18,
4
+ "train_loss": 0.16734615334293299,
5
+ "train_runtime": 848.5959,
6
+ "train_samples_per_second": 17.436,
7
+ "train_steps_per_second": 0.134
8
  }
trainer_state.json CHANGED
@@ -11,125 +11,125 @@
11
  {
12
  "epoch": 0.03,
13
  "learning_rate": 4.166666666666667e-06,
14
- "loss": 0.3991,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.31,
19
  "learning_rate": 5e-05,
20
- "loss": 0.3808,
21
  "step": 12
22
  },
23
  {
24
  "epoch": 0.62,
25
  "learning_rate": 4.411764705882353e-05,
26
- "loss": 0.3474,
27
  "step": 24
28
  },
29
  {
30
  "epoch": 0.93,
31
  "learning_rate": 3.8235294117647055e-05,
32
- "loss": 0.3093,
33
  "step": 36
34
  },
35
  {
36
  "epoch": 0.98,
37
- "eval_Macro F1": 0.8820015735129666,
38
- "eval_Macro Precision": 0.899385909210207,
39
- "eval_Macro Recall": 0.8738252833106378,
40
- "eval_Micro F1": 0.8890637293469709,
41
- "eval_Micro Precision": 0.8890637293469709,
42
- "eval_Micro Recall": 0.8890637293469709,
43
- "eval_Weighted F1": 0.8879137250311478,
44
- "eval_Weighted Precision": 0.8952120340834557,
45
- "eval_Weighted Recall": 0.8890637293469709,
46
- "eval_accuracy": 0.8890637293469709,
47
- "eval_loss": 0.2252058982849121,
48
- "eval_runtime": 64.7982,
49
- "eval_samples_per_second": 39.229,
50
- "eval_steps_per_second": 1.235,
51
  "step": 38
52
  },
53
  {
54
  "epoch": 1.24,
55
  "learning_rate": 3.235294117647059e-05,
56
- "loss": 0.2358,
57
  "step": 48
58
  },
59
  {
60
  "epoch": 1.55,
61
  "learning_rate": 2.647058823529412e-05,
62
- "loss": 0.2391,
63
  "step": 60
64
  },
65
  {
66
  "epoch": 1.86,
67
  "learning_rate": 2.058823529411765e-05,
68
- "loss": 0.2278,
69
  "step": 72
70
  },
71
  {
72
  "epoch": 1.99,
73
- "eval_Macro F1": 0.9220395057673447,
74
- "eval_Macro Precision": 0.9240847889405631,
75
- "eval_Macro Recall": 0.9220981981645411,
76
- "eval_Micro F1": 0.9291896144767899,
77
- "eval_Micro Precision": 0.9291896144767899,
78
- "eval_Micro Recall": 0.9291896144767899,
79
- "eval_Weighted F1": 0.929177891947533,
80
- "eval_Weighted Precision": 0.9309640796953544,
81
- "eval_Weighted Recall": 0.9291896144767899,
82
- "eval_accuracy": 0.9291896144767899,
83
- "eval_loss": 0.16484124958515167,
84
- "eval_runtime": 64.4008,
85
- "eval_samples_per_second": 39.472,
86
- "eval_steps_per_second": 1.242,
87
  "step": 77
88
  },
89
  {
90
  "epoch": 2.17,
91
  "learning_rate": 1.4705882352941177e-05,
92
- "loss": 0.1989,
93
  "step": 84
94
  },
95
  {
96
  "epoch": 2.48,
97
  "learning_rate": 8.823529411764707e-06,
98
- "loss": 0.2251,
99
  "step": 96
100
  },
101
  {
102
  "epoch": 2.79,
103
  "learning_rate": 2.9411764705882355e-06,
104
- "loss": 0.2066,
105
  "step": 108
106
  },
107
  {
108
  "epoch": 2.94,
109
- "eval_Macro F1": 0.9386216440513059,
110
- "eval_Macro Precision": 0.9382250515595165,
111
- "eval_Macro Recall": 0.9403801182311695,
112
- "eval_Micro F1": 0.9425649095200629,
113
- "eval_Micro Precision": 0.9425649095200629,
114
- "eval_Micro Recall": 0.9425649095200629,
115
- "eval_Weighted F1": 0.9426485199157074,
116
- "eval_Weighted Precision": 0.9439679024820588,
117
- "eval_Weighted Recall": 0.9425649095200629,
118
- "eval_accuracy": 0.9425649095200629,
119
- "eval_loss": 0.13585534691810608,
120
- "eval_runtime": 63.8941,
121
- "eval_samples_per_second": 39.785,
122
- "eval_steps_per_second": 1.252,
123
  "step": 114
124
  },
125
  {
126
  "epoch": 2.94,
127
  "step": 114,
128
  "total_flos": 1.1265748270391624e+18,
129
- "train_loss": 0.2613685036960401,
130
- "train_runtime": 863.1868,
131
- "train_samples_per_second": 17.141,
132
- "train_steps_per_second": 0.132
133
  }
134
  ],
135
  "logging_steps": 12,
 
11
  {
12
  "epoch": 0.03,
13
  "learning_rate": 4.166666666666667e-06,
14
+ "loss": 0.1438,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.31,
19
  "learning_rate": 5e-05,
20
+ "loss": 0.146,
21
  "step": 12
22
  },
23
  {
24
  "epoch": 0.62,
25
  "learning_rate": 4.411764705882353e-05,
26
+ "loss": 0.1822,
27
  "step": 24
28
  },
29
  {
30
  "epoch": 0.93,
31
  "learning_rate": 3.8235294117647055e-05,
32
+ "loss": 0.1916,
33
  "step": 36
34
  },
35
  {
36
  "epoch": 0.98,
37
+ "eval_Macro F1": 0.9408451121862865,
38
+ "eval_Macro Precision": 0.9411664777049882,
39
+ "eval_Macro Recall": 0.9427118741264285,
40
+ "eval_Micro F1": 0.9461054287962234,
41
+ "eval_Micro Precision": 0.9461054287962234,
42
+ "eval_Micro Recall": 0.9461054287962234,
43
+ "eval_Weighted F1": 0.9464960091472664,
44
+ "eval_Weighted Precision": 0.9487006349374144,
45
+ "eval_Weighted Recall": 0.9461054287962234,
46
+ "eval_accuracy": 0.9461054287962234,
47
+ "eval_loss": 0.13961628079414368,
48
+ "eval_runtime": 63.2832,
49
+ "eval_samples_per_second": 40.169,
50
+ "eval_steps_per_second": 1.264,
51
  "step": 38
52
  },
53
  {
54
  "epoch": 1.24,
55
  "learning_rate": 3.235294117647059e-05,
56
+ "loss": 0.1565,
57
  "step": 48
58
  },
59
  {
60
  "epoch": 1.55,
61
  "learning_rate": 2.647058823529412e-05,
62
+ "loss": 0.1692,
63
  "step": 60
64
  },
65
  {
66
  "epoch": 1.86,
67
  "learning_rate": 2.058823529411765e-05,
68
+ "loss": 0.1597,
69
  "step": 72
70
  },
71
  {
72
  "epoch": 1.99,
73
+ "eval_Macro F1": 0.9484522652918805,
74
+ "eval_Macro Precision": 0.9471910825399321,
75
+ "eval_Macro Recall": 0.95148334257645,
76
+ "eval_Micro F1": 0.9520062942564909,
77
+ "eval_Micro Precision": 0.952006294256491,
78
+ "eval_Micro Recall": 0.952006294256491,
79
+ "eval_Weighted F1": 0.9522977862410118,
80
+ "eval_Weighted Precision": 0.9540997275393984,
81
+ "eval_Weighted Recall": 0.952006294256491,
82
+ "eval_accuracy": 0.952006294256491,
83
+ "eval_loss": 0.12265925854444504,
84
+ "eval_runtime": 64.9371,
85
+ "eval_samples_per_second": 39.146,
86
+ "eval_steps_per_second": 1.232,
87
  "step": 77
88
  },
89
  {
90
  "epoch": 2.17,
91
  "learning_rate": 1.4705882352941177e-05,
92
+ "loss": 0.1619,
93
  "step": 84
94
  },
95
  {
96
  "epoch": 2.48,
97
  "learning_rate": 8.823529411764707e-06,
98
+ "loss": 0.1611,
99
  "step": 96
100
  },
101
  {
102
  "epoch": 2.79,
103
  "learning_rate": 2.9411764705882355e-06,
104
+ "loss": 0.1722,
105
  "step": 108
106
  },
107
  {
108
  "epoch": 2.94,
109
+ "eval_Macro F1": 0.947301058449996,
110
+ "eval_Macro Precision": 0.9458287584858323,
111
+ "eval_Macro Recall": 0.9498167521420487,
112
+ "eval_Micro F1": 0.9512195121951219,
113
+ "eval_Micro Precision": 0.9512195121951219,
114
+ "eval_Micro Recall": 0.9512195121951219,
115
+ "eval_Weighted F1": 0.9515018097434871,
116
+ "eval_Weighted Precision": 0.9527033488356239,
117
+ "eval_Weighted Recall": 0.9512195121951219,
118
+ "eval_accuracy": 0.9512195121951219,
119
+ "eval_loss": 0.11822786182165146,
120
+ "eval_runtime": 63.9617,
121
+ "eval_samples_per_second": 39.743,
122
+ "eval_steps_per_second": 1.251,
123
  "step": 114
124
  },
125
  {
126
  "epoch": 2.94,
127
  "step": 114,
128
  "total_flos": 1.1265748270391624e+18,
129
+ "train_loss": 0.16734615334293299,
130
+ "train_runtime": 848.5959,
131
+ "train_samples_per_second": 17.436,
132
+ "train_steps_per_second": 0.134
133
  }
134
  ],
135
  "logging_steps": 12,