Shresthadev403 commited on
Commit
aef6b33
·
1 Parent(s): 521d0fe

End of training

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 2.1188
19
 
20
  ## Model description
21
 
@@ -46,13 +46,13 @@ The following hyperparameters were used during training:
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | 17.0348 | 1.0 | 10 | 4.3278 |
50
- | 4.1755 | 2.0 | 20 | 2.9010 |
51
- | 2.9516 | 3.0 | 30 | 2.5815 |
52
- | 2.4828 | 4.0 | 40 | 2.1538 |
53
- | 2.2802 | 5.0 | 50 | 2.1050 |
54
- | 2.1949 | 6.0 | 60 | 2.1221 |
55
- | 2.0416 | 7.0 | 70 | 2.1188 |
56
 
57
 
58
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.7084
19
 
20
  ## Model description
21
 
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | 16.994 | 1.0 | 10 | 4.2000 |
50
+ | 3.8266 | 2.0 | 20 | 3.1920 |
51
+ | 3.0646 | 3.0 | 30 | 2.2112 |
52
+ | 2.3667 | 4.0 | 40 | 1.9768 |
53
+ | 2.0815 | 5.0 | 50 | 1.8096 |
54
+ | 1.9727 | 6.0 | 60 | 1.7462 |
55
+ | 1.8305 | 7.0 | 70 | 1.7084 |
56
 
57
 
58
  ### Framework versions
logs/events.out.tfevents.1702141643.62a1e4c74a80.42734.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8f3b1cbc90af3df29859e9f95323ff69663844e172761dbd65f96d9c17d822f
3
+ size 4606
logs/events.out.tfevents.1702141700.62a1e4c74a80.42734.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5692b3cf90af6cd33796035637a638eccff8a9dd47835aded87e0707544f4edd
3
+ size 4606
logs/events.out.tfevents.1702141798.62a1e4c74a80.42734.14 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8023540862a59981d10bd1c5cd1806341e537c4ded0fda6a8cd4c34ffbd5238c
3
+ size 4606
logs/events.out.tfevents.1702141860.62a1e4c74a80.42734.15 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b07a10500bc4df9cbfccbd417ce32277d6a5681dc138f743c55058c7423a9eb
3
+ size 4606
logs/events.out.tfevents.1702142103.62a1e4c74a80.42734.16 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7561f7ec08f996ce5434eddb66a4a0678daa21d4d792b022269537c9837e2124
3
+ size 4606
logs/events.out.tfevents.1702142220.62a1e4c74a80.42734.17 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea175ad66c333c6a5bbc8457c9b9fcbb6b300129233508bb9b57977eb3b2b380
3
+ size 4606
logs/events.out.tfevents.1702142397.62a1e4c74a80.42734.18 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3cadf90022728bebe5860f59875298569dc6178699ea34318d4e1159fe357f
3
+ size 4606
logs/events.out.tfevents.1702142503.62a1e4c74a80.42734.19 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ecad16e2c7c0d119e9e7789fddd52fc66d02ed172184df2599bb472c897d6c9
3
+ size 4606
logs/events.out.tfevents.1702142787.62a1e4c74a80.42734.20 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572bf5adb0cbd8bb65d849a3a354958048cf97c8380b600997697515e174af10
3
+ size 4606
logs/events.out.tfevents.1702143232.62a1e4c74a80.42734.21 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7ec42db451bc974284a7fea152966e6a1b47b24f2036e8174fcaee9dce2a9d
3
+ size 4606
logs/events.out.tfevents.1702143345.62a1e4c74a80.42734.22 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666ef2a489b11be7cfd2cc954ded83fbd219aab35fcaff2e8fec3036dc41074c
3
+ size 4606
logs/events.out.tfevents.1702143757.62a1e4c74a80.42734.23 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fec0649428a27ed99becd8d10b0a4e36c63b2d404dbe1bf3a02e1d17a6a0a54
3
+ size 4606
logs/events.out.tfevents.1702143852.62a1e4c74a80.42734.24 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65ec14bd5f1037745aac6f695c7624d598077d4f2a4b68075569c4ecd11f67f
3
+ size 4606
logs/events.out.tfevents.1702144190.62a1e4c74a80.42734.25 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0324a6b18d78ba569f831affce5344b2644cdef69296bf909480f4a736d4a5db
3
+ size 7740
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09037655973ebe73f63dc99bf71b69df1b21e4ed23b2b929dd5d59a3c929ef1f
3
  size 497814144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787fcb2b2868b4abaaf1ab8042b45c7c44eb9ba39f096b720c514963d76869c9
3
  size 497814144
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 2.105010747909546,
3
- "best_model_checkpoint": "bert-base-banking77-pt2/checkpoint-50",
4
  "epoch": 7.0,
5
  "eval_steps": 10,
6
  "global_step": 70,
@@ -10,110 +10,110 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "learning_rate": 4e-05,
14
- "loss": 17.0348,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 4.327815055847168,
20
- "eval_runtime": 0.2279,
21
- "eval_samples_per_second": 43.887,
22
- "eval_steps_per_second": 8.777,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 2.0,
27
- "learning_rate": 3e-05,
28
- "loss": 4.1755,
29
  "step": 20
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_loss": 2.9009530544281006,
34
- "eval_runtime": 0.1465,
35
- "eval_samples_per_second": 68.244,
36
- "eval_steps_per_second": 13.649,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 3.0,
41
- "learning_rate": 2e-05,
42
- "loss": 2.9516,
43
  "step": 30
44
  },
45
  {
46
  "epoch": 3.0,
47
- "eval_loss": 2.581455707550049,
48
- "eval_runtime": 0.1718,
49
- "eval_samples_per_second": 58.205,
50
- "eval_steps_per_second": 11.641,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 4.0,
55
- "learning_rate": 1e-05,
56
- "loss": 2.4828,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 4.0,
61
- "eval_loss": 2.1537680625915527,
62
- "eval_runtime": 0.1261,
63
- "eval_samples_per_second": 79.287,
64
- "eval_steps_per_second": 15.857,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 5.0,
69
- "learning_rate": 0.0,
70
- "loss": 2.2802,
71
  "step": 50
72
  },
73
  {
74
  "epoch": 5.0,
75
- "eval_loss": 2.105010747909546,
76
- "eval_runtime": 0.1259,
77
- "eval_samples_per_second": 79.453,
78
- "eval_steps_per_second": 15.891,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 6.0,
83
  "learning_rate": 7.142857142857143e-06,
84
- "loss": 2.1949,
85
  "step": 60
86
  },
87
  {
88
  "epoch": 6.0,
89
- "eval_loss": 2.1220641136169434,
90
- "eval_runtime": 0.161,
91
- "eval_samples_per_second": 62.127,
92
- "eval_steps_per_second": 12.425,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 7.0,
97
  "learning_rate": 0.0,
98
- "loss": 2.0416,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 7.0,
103
- "eval_loss": 2.118767023086548,
104
- "eval_runtime": 0.1487,
105
- "eval_samples_per_second": 67.242,
106
- "eval_steps_per_second": 13.448,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 7.0,
111
  "step": 70,
112
  "total_flos": 4572610560000.0,
113
- "train_loss": 0.6052134922572545,
114
- "train_runtime": 34.1418,
115
- "train_samples_per_second": 2.05,
116
- "train_steps_per_second": 2.05
117
  }
118
  ],
119
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 1.7083709239959717,
3
+ "best_model_checkpoint": "bert-base-banking77-pt2/checkpoint-70",
4
  "epoch": 7.0,
5
  "eval_steps": 10,
6
  "global_step": 70,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "learning_rate": 4.2857142857142856e-05,
14
+ "loss": 16.994,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 4.199960231781006,
20
+ "eval_runtime": 0.1251,
21
+ "eval_samples_per_second": 79.92,
22
+ "eval_steps_per_second": 15.984,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "learning_rate": 3.571428571428572e-05,
28
+ "loss": 3.8266,
29
  "step": 20
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_loss": 3.1919631958007812,
34
+ "eval_runtime": 0.1628,
35
+ "eval_samples_per_second": 61.412,
36
+ "eval_steps_per_second": 12.282,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 3.0,
41
+ "learning_rate": 2.857142857142857e-05,
42
+ "loss": 3.0646,
43
  "step": 30
44
  },
45
  {
46
  "epoch": 3.0,
47
+ "eval_loss": 2.2111892700195312,
48
+ "eval_runtime": 0.1271,
49
+ "eval_samples_per_second": 78.703,
50
+ "eval_steps_per_second": 15.741,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 4.0,
55
+ "learning_rate": 2.1428571428571428e-05,
56
+ "loss": 2.3667,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "eval_loss": 1.9768412113189697,
62
+ "eval_runtime": 0.1291,
63
+ "eval_samples_per_second": 77.443,
64
+ "eval_steps_per_second": 15.489,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 5.0,
69
+ "learning_rate": 1.4285714285714285e-05,
70
+ "loss": 2.0815,
71
  "step": 50
72
  },
73
  {
74
  "epoch": 5.0,
75
+ "eval_loss": 1.8095529079437256,
76
+ "eval_runtime": 0.1244,
77
+ "eval_samples_per_second": 80.397,
78
+ "eval_steps_per_second": 16.079,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 6.0,
83
  "learning_rate": 7.142857142857143e-06,
84
+ "loss": 1.9727,
85
  "step": 60
86
  },
87
  {
88
  "epoch": 6.0,
89
+ "eval_loss": 1.7462323904037476,
90
+ "eval_runtime": 0.1467,
91
+ "eval_samples_per_second": 68.173,
92
+ "eval_steps_per_second": 13.635,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 7.0,
97
  "learning_rate": 0.0,
98
+ "loss": 1.8305,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 7.0,
103
+ "eval_loss": 1.7083709239959717,
104
+ "eval_runtime": 0.1559,
105
+ "eval_samples_per_second": 64.127,
106
+ "eval_steps_per_second": 12.825,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 7.0,
111
  "step": 70,
112
  "total_flos": 4572610560000.0,
113
+ "train_loss": 4.5909277507237025,
114
+ "train_runtime": 92.0015,
115
+ "train_samples_per_second": 0.761,
116
+ "train_steps_per_second": 0.761
117
  }
118
  ],
119
  "logging_steps": 10,