SonishMaharjan commited on
Commit
40b21fd
1 Parent(s): 20c7cf0

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +102 -72
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.92,
3
- "total_flos": 5.6158236080942285e+17,
4
- "train_loss": 0.9780613832306444,
5
- "train_runtime": 1760.8586,
6
- "train_samples_per_second": 4.21,
7
- "train_steps_per_second": 0.032
8
  }
 
1
  {
2
+ "epoch": 2.94,
3
+ "total_flos": 1.1265748270391624e+18,
4
+ "train_loss": 0.6647836283633584,
5
+ "train_runtime": 6473.0815,
6
+ "train_samples_per_second": 2.286,
7
+ "train_steps_per_second": 0.018
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.92,
3
- "total_flos": 5.6158236080942285e+17,
4
- "train_loss": 0.9780613832306444,
5
- "train_runtime": 1760.8586,
6
- "train_samples_per_second": 4.21,
7
- "train_steps_per_second": 0.032
8
  }
 
1
  {
2
+ "epoch": 2.94,
3
+ "total_flos": 1.1265748270391624e+18,
4
+ "train_loss": 0.6647836283633584,
5
+ "train_runtime": 6473.0815,
6
+ "train_samples_per_second": 2.286,
7
+ "train_steps_per_second": 0.018
8
  }
trainer_state.json CHANGED
@@ -1,112 +1,142 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9230769230769234,
5
  "eval_steps": 500,
6
- "global_step": 57,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.05,
13
- "learning_rate": 8.333333333333334e-06,
14
- "loss": 1.5498,
15
  "step": 1
16
  },
 
 
 
 
 
 
17
  {
18
  "epoch": 0.62,
19
  "learning_rate": 4.411764705882353e-05,
20
- "loss": 1.3111,
21
- "step": 12
22
  },
23
  {
24
- "epoch": 0.97,
25
- "eval_Macro F1": 0.38463266827131015,
26
- "eval_Macro Precision": 0.3513860547447244,
27
- "eval_Macro Recall": 0.45842986022199095,
28
- "eval_Micro F1": 0.46153846153846156,
29
- "eval_Micro Precision": 0.46153846153846156,
30
- "eval_Micro Recall": 0.46153846153846156,
31
- "eval_Weighted F1": 0.39257303711149655,
32
- "eval_Weighted Precision": 0.36331970047517986,
33
- "eval_Weighted Recall": 0.46153846153846156,
34
- "eval_accuracy": 0.46153846153846156,
35
- "eval_loss": 1.004745602607727,
36
- "eval_runtime": 492.4481,
37
- "eval_samples_per_second": 2.587,
38
- "eval_steps_per_second": 0.081,
39
- "step": 19
 
 
 
 
 
 
40
  },
41
  {
42
- "epoch": 1.23,
43
  "learning_rate": 3.235294117647059e-05,
44
- "loss": 1.0359,
45
- "step": 24
 
 
 
 
 
 
46
  },
47
  {
48
- "epoch": 1.85,
49
  "learning_rate": 2.058823529411765e-05,
50
- "loss": 0.923,
51
- "step": 36
52
  },
53
  {
54
- "epoch": 2.0,
55
- "eval_Macro F1": 0.5093855651518693,
56
- "eval_Macro Precision": 0.7237298314233377,
57
- "eval_Macro Recall": 0.5863700553639922,
58
- "eval_Micro F1": 0.6004709576138147,
59
- "eval_Micro Precision": 0.6004709576138147,
60
- "eval_Micro Recall": 0.6004709576138147,
61
- "eval_Weighted F1": 0.5303808904978752,
62
- "eval_Weighted Precision": 0.7224619896227847,
63
- "eval_Weighted Recall": 0.6004709576138147,
64
- "eval_accuracy": 0.6004709576138147,
65
- "eval_loss": 0.7626671195030212,
66
- "eval_runtime": 31.9216,
67
- "eval_samples_per_second": 39.91,
68
  "eval_steps_per_second": 1.253,
69
- "step": 39
70
  },
71
  {
72
- "epoch": 2.46,
 
 
 
 
 
 
73
  "learning_rate": 8.823529411764707e-06,
74
- "loss": 0.7743,
75
- "step": 48
 
 
 
 
 
 
76
  },
77
  {
78
- "epoch": 2.92,
79
- "eval_Macro F1": 0.5605146596385748,
80
- "eval_Macro Precision": 0.7334668145101633,
81
- "eval_Macro Recall": 0.6217819801854411,
82
- "eval_Micro F1": 0.640502354788069,
83
- "eval_Micro Precision": 0.640502354788069,
84
- "eval_Micro Recall": 0.640502354788069,
85
- "eval_Weighted F1": 0.5794525732393071,
86
- "eval_Weighted Precision": 0.727397633667412,
87
- "eval_Weighted Recall": 0.640502354788069,
88
- "eval_accuracy": 0.640502354788069,
89
- "eval_loss": 0.6403084993362427,
90
- "eval_runtime": 32.0563,
91
- "eval_samples_per_second": 39.743,
92
- "eval_steps_per_second": 1.248,
93
- "step": 57
94
  },
95
  {
96
- "epoch": 2.92,
97
- "step": 57,
98
- "total_flos": 5.6158236080942285e+17,
99
- "train_loss": 0.9780613832306444,
100
- "train_runtime": 1760.8586,
101
- "train_samples_per_second": 4.21,
102
- "train_steps_per_second": 0.032
103
  }
104
  ],
105
  "logging_steps": 12,
106
- "max_steps": 57,
107
  "num_train_epochs": 3,
108
  "save_steps": 500,
109
- "total_flos": 5.6158236080942285e+17,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9419354838709677,
5
  "eval_steps": 500,
6
+ "global_step": 114,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "learning_rate": 4.166666666666667e-06,
14
+ "loss": 1.3431,
15
  "step": 1
16
  },
17
+ {
18
+ "epoch": 0.31,
19
+ "learning_rate": 5e-05,
20
+ "loss": 1.226,
21
+ "step": 12
22
+ },
23
  {
24
  "epoch": 0.62,
25
  "learning_rate": 4.411764705882353e-05,
26
+ "loss": 0.9888,
27
+ "step": 24
28
  },
29
  {
30
+ "epoch": 0.93,
31
+ "learning_rate": 3.8235294117647055e-05,
32
+ "loss": 0.8637,
33
+ "step": 36
34
+ },
35
+ {
36
+ "epoch": 0.98,
37
+ "eval_Macro F1": 0.5661973571975384,
38
+ "eval_Macro Precision": 0.7510967291822963,
39
+ "eval_Macro Recall": 0.6473652579844594,
40
+ "eval_Micro F1": 0.6778127458693942,
41
+ "eval_Micro Precision": 0.6778127458693942,
42
+ "eval_Micro Recall": 0.6778127458693942,
43
+ "eval_Weighted F1": 0.5949172262865288,
44
+ "eval_Weighted Precision": 0.7505686242573377,
45
+ "eval_Weighted Recall": 0.6778127458693942,
46
+ "eval_accuracy": 0.6778127458693942,
47
+ "eval_loss": 0.598620593547821,
48
+ "eval_runtime": 1926.4978,
49
+ "eval_samples_per_second": 1.319,
50
+ "eval_steps_per_second": 0.042,
51
+ "step": 38
52
  },
53
  {
54
+ "epoch": 1.24,
55
  "learning_rate": 3.235294117647059e-05,
56
+ "loss": 0.6928,
57
+ "step": 48
58
+ },
59
+ {
60
+ "epoch": 1.55,
61
+ "learning_rate": 2.647058823529412e-05,
62
+ "loss": 0.6054,
63
+ "step": 60
64
  },
65
  {
66
+ "epoch": 1.86,
67
  "learning_rate": 2.058823529411765e-05,
68
+ "loss": 0.5018,
69
+ "step": 72
70
  },
71
  {
72
+ "epoch": 1.99,
73
+ "eval_Macro F1": 0.8376972138667833,
74
+ "eval_Macro Precision": 0.8622787078319065,
75
+ "eval_Macro Recall": 0.8344563153022619,
76
+ "eval_Micro F1": 0.8516915814319432,
77
+ "eval_Micro Precision": 0.8516915814319433,
78
+ "eval_Micro Recall": 0.8516915814319433,
79
+ "eval_Weighted F1": 0.8452584084591334,
80
+ "eval_Weighted Precision": 0.8588424441061323,
81
+ "eval_Weighted Recall": 0.8516915814319433,
82
+ "eval_accuracy": 0.8516915814319433,
83
+ "eval_loss": 0.270693838596344,
84
+ "eval_runtime": 63.8305,
85
+ "eval_samples_per_second": 39.824,
86
  "eval_steps_per_second": 1.253,
87
+ "step": 77
88
  },
89
  {
90
+ "epoch": 2.17,
91
+ "learning_rate": 1.4705882352941177e-05,
92
+ "loss": 0.4449,
93
+ "step": 84
94
+ },
95
+ {
96
+ "epoch": 2.48,
97
  "learning_rate": 8.823529411764707e-06,
98
+ "loss": 0.4168,
99
+ "step": 96
100
+ },
101
+ {
102
+ "epoch": 2.79,
103
+ "learning_rate": 2.9411764705882355e-06,
104
+ "loss": 0.3761,
105
+ "step": 108
106
  },
107
  {
108
+ "epoch": 2.94,
109
+ "eval_Macro F1": 0.8640166757623045,
110
+ "eval_Macro Precision": 0.8744992381555277,
111
+ "eval_Macro Recall": 0.8626385930557097,
112
+ "eval_Micro F1": 0.8741148701809599,
113
+ "eval_Micro Precision": 0.8741148701809599,
114
+ "eval_Micro Recall": 0.8741148701809599,
115
+ "eval_Weighted F1": 0.8719933502245091,
116
+ "eval_Weighted Precision": 0.8780283114822369,
117
+ "eval_Weighted Recall": 0.8741148701809599,
118
+ "eval_accuracy": 0.8741148701809599,
119
+ "eval_loss": 0.22845804691314697,
120
+ "eval_runtime": 63.2765,
121
+ "eval_samples_per_second": 40.173,
122
+ "eval_steps_per_second": 1.264,
123
+ "step": 114
124
  },
125
  {
126
+ "epoch": 2.94,
127
+ "step": 114,
128
+ "total_flos": 1.1265748270391624e+18,
129
+ "train_loss": 0.6647836283633584,
130
+ "train_runtime": 6473.0815,
131
+ "train_samples_per_second": 2.286,
132
+ "train_steps_per_second": 0.018
133
  }
134
  ],
135
  "logging_steps": 12,
136
+ "max_steps": 114,
137
  "num_train_epochs": 3,
138
  "save_steps": 500,
139
+ "total_flos": 1.1265748270391624e+18,
140
  "trial_name": null,
141
  "trial_params": null
142
  }