ana-grassmann commited on
Commit
130a5c3
1 Parent(s): a6354c3

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b7007f61a373deb00621503390180c0e6f3f5a8b37facf65d670c423f04bf45
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:035ac4309d3064fbdb87084e018ecfb9b3d389199a08cd4e7d6b1b98837a5829
3
  size 437958648
run-0/checkpoint-1670/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db2a60dc6bbb07338c4d0dfff0424ac1bfa4a30608dbd8bd9055ff57493d0093
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:035ac4309d3064fbdb87084e018ecfb9b3d389199a08cd4e7d6b1b98837a5829
3
  size 437958648
run-0/checkpoint-1670/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44763a5e2c5d3f7b655029eda2376b76307b1122b86d80084664b5b86917899f
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2227f041b070701f9b2e36be87e58d7d918a20662ec98ca096c3f3d84049b261
3
  size 876038394
run-0/checkpoint-1670/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:627ea2f7cd2aedb5e1b4e507ea04b02dd0820e238075ddd9d5b5edf37464fe6d
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e9450ad04385d20fe34680dfade3c60745403e057ce0bf1f1a034d03735cabe
3
+ size 14244
run-0/checkpoint-1670/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:366cda4d6512ba28553df7753df51ae469825d7fb82c5003880659bdb93dbc77
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0a0124953825835361cdc6763f757f2b1d1a27a3f8fe88d27358245324a8d8a
3
  size 1064
run-0/checkpoint-1670/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9820850808867585,
3
  "best_model_checkpoint": "bert-base-uncased-finetuned-spam-real/run-0/checkpoint-1670",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,56 +10,56 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.6,
13
- "grad_norm": 1.6855844259262085,
14
- "learning_rate": 7.660219401214295e-06,
15
- "loss": 0.2399,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.9690832834032355,
21
- "eval_loss": 0.1404273957014084,
22
- "eval_runtime": 321.4031,
23
- "eval_samples_per_second": 51.929,
24
- "eval_steps_per_second": 3.248,
25
  "step": 835
26
  },
27
  {
28
  "epoch": 1.2,
29
- "grad_norm": 0.28391602635383606,
30
- "learning_rate": 6.618012679960649e-06,
31
- "loss": 0.1015,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 1.8,
36
- "grad_norm": 0.011191217228770256,
37
- "learning_rate": 5.575805958707004e-06,
38
- "loss": 0.0775,
39
  "step": 1500
40
  },
41
  {
42
  "epoch": 2.0,
43
- "eval_accuracy": 0.9820850808867585,
44
- "eval_loss": 0.08610038459300995,
45
- "eval_runtime": 318.1892,
46
- "eval_samples_per_second": 52.453,
47
- "eval_steps_per_second": 3.281,
48
  "step": 1670
49
  }
50
  ],
51
  "logging_steps": 500,
52
- "max_steps": 4175,
53
  "num_input_tokens_seen": 0,
54
- "num_train_epochs": 5,
55
  "save_steps": 500,
56
- "total_flos": 3026276636534160.0,
57
- "train_batch_size": 8,
58
  "trial_name": null,
59
  "trial_params": {
60
- "learning_rate": 8.70242612246794e-06,
61
- "num_train_epochs": 5,
62
- "per_device_train_batch_size": 8,
63
- "seed": 20
64
  }
65
  }
 
1
  {
2
+ "best_metric": 0.9642899940083882,
3
  "best_model_checkpoint": "bert-base-uncased-finetuned-spam-real/run-0/checkpoint-1670",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.6,
13
+ "grad_norm": 0.30469822883605957,
14
+ "learning_rate": 3.878567975913952e-06,
15
+ "loss": 0.3154,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.9488915518274416,
21
+ "eval_loss": 0.22129623591899872,
22
+ "eval_runtime": 335.9128,
23
+ "eval_samples_per_second": 49.686,
24
+ "eval_steps_per_second": 3.108,
25
  "step": 835
26
  },
27
  {
28
  "epoch": 1.2,
29
+ "grad_norm": 0.06290946900844574,
30
+ "learning_rate": 2.2210602938994423e-06,
31
+ "loss": 0.1341,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 1.8,
36
+ "grad_norm": 0.40462997555732727,
37
+ "learning_rate": 5.635526118849332e-07,
38
+ "loss": 0.1332,
39
  "step": 1500
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "eval_accuracy": 0.9642899940083882,
44
+ "eval_loss": 0.16416475176811218,
45
+ "eval_runtime": 335.3225,
46
+ "eval_samples_per_second": 49.773,
47
+ "eval_steps_per_second": 3.113,
48
  "step": 1670
49
  }
50
  ],
51
  "logging_steps": 500,
52
+ "max_steps": 1670,
53
  "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 2,
55
  "save_steps": 500,
56
+ "total_flos": 2178221399563560.0,
57
+ "train_batch_size": 4,
58
  "trial_name": null,
59
  "trial_params": {
60
+ "learning_rate": 5.536075657928461e-06,
61
+ "num_train_epochs": 2,
62
+ "per_device_train_batch_size": 4,
63
+ "seed": 30
64
  }
65
  }
run-0/checkpoint-1670/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e099818249ddecf442df98b949821a6e1bc96f54b7f97f17dac7a7067b75e3b4
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3cf64e30d88e54cc8c23684bd03a07683ebeb538447855e2dd3ae4132912700
3
  size 4920
runs/Apr07_21-55-44_2e7e2ed6349a/events.out.tfevents.1712526953.2e7e2ed6349a.1594.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1c754fb390b048cd7352780ed6b5b6add8708074d691787374dabd5ce6af6a1
3
- size 14275
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:549c8a8af752aa8d5c0a86a6c84a5173428b48aa013beaa85cea0f62ec42daa0
3
+ size 15374