devvanshhh commited on
Commit
355f4e3
1 Parent(s): bee3de8

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e769a8adc6851c571d318f9f8d597aa021ab357ebcd61e613e6346460833779a
3
  size 37789864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df0614e6ae0c4cca08ec46a3a0eca5c114c29487c73131333ee0ac8747dbc354
3
  size 37789864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e81abb427d2bb19813d0e17c56caed64154b36100906c45a8547cd38afbae3d5
3
  size 2621690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c375252e989676c82361bf75c9e05f8625ab364d94f54619de39642b83371a55
3
  size 2621690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:404667d89973760348cc653c442f08c243df998777422cd0dd9b4e5b7b18505d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3206cfea353705871f16d46e55ccf93893b489d46b83fd7ba2c4d45d71b93a7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:466256ee71b0a044b429768d7371618a1dc77becfc275f201cdc2e4d1e676296
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c8d100410f3813df39e9f84e8d440fdc3cad2e6c15e5ee2bb73e18d8c8af53
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,102 +1,32 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 1640,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_gen_len": 11.120274914089347,
14
- "eval_loss": 24.898656845092773,
15
- "eval_rouge1": 29.9366,
16
- "eval_rouge2": 22.9687,
17
- "eval_rougeL": 26.9975,
18
- "eval_rougeLsum": 27.1774,
19
- "eval_runtime": 152.4326,
20
- "eval_samples_per_second": 1.909,
21
- "eval_steps_per_second": 0.243,
22
- "step": 328
23
- },
24
- {
25
- "epoch": 1.52,
26
- "learning_rate": 2e-05,
27
- "loss": 25.467,
28
- "step": 500
29
- },
30
- {
31
- "epoch": 2.0,
32
- "eval_gen_len": 0.0,
33
- "eval_loss": 1.3504424095153809,
34
- "eval_rouge1": 51.142,
35
- "eval_rouge2": 49.8705,
36
- "eval_rougeL": 51.1588,
37
- "eval_rougeLsum": 51.1528,
38
- "eval_runtime": 153.7577,
39
- "eval_samples_per_second": 1.893,
40
- "eval_steps_per_second": 0.241,
41
- "step": 656
42
- },
43
- {
44
- "epoch": 3.0,
45
- "eval_gen_len": 14.927835051546392,
46
- "eval_loss": 0.8221376538276672,
47
- "eval_rouge1": 19.5594,
48
- "eval_rouge2": 12.7325,
49
- "eval_rougeL": 16.4586,
50
- "eval_rougeLsum": 16.7605,
51
- "eval_runtime": 157.0696,
52
- "eval_samples_per_second": 1.853,
53
- "eval_steps_per_second": 0.236,
54
- "step": 984
55
- },
56
- {
57
- "epoch": 3.05,
58
- "learning_rate": 1.1228070175438597e-05,
59
- "loss": 1.8759,
60
- "step": 1000
61
- },
62
- {
63
- "epoch": 4.0,
64
- "eval_gen_len": 14.109965635738831,
65
- "eval_loss": 0.7782504558563232,
66
- "eval_rouge1": 21.8348,
67
- "eval_rouge2": 14.9645,
68
- "eval_rougeL": 18.7764,
69
- "eval_rougeLsum": 19.0709,
70
- "eval_runtime": 155.486,
71
- "eval_samples_per_second": 1.872,
72
- "eval_steps_per_second": 0.238,
73
- "step": 1312
74
- },
75
- {
76
- "epoch": 4.57,
77
- "learning_rate": 2.456140350877193e-06,
78
- "loss": 0.8715,
79
- "step": 1500
80
- },
81
- {
82
- "epoch": 5.0,
83
- "eval_gen_len": 13.051546391752577,
84
- "eval_loss": 0.766891360282898,
85
- "eval_rouge1": 24.6538,
86
- "eval_rouge2": 17.821,
87
- "eval_rougeL": 21.5884,
88
- "eval_rougeLsum": 21.9045,
89
- "eval_runtime": 157.0206,
90
- "eval_samples_per_second": 1.853,
91
  "eval_steps_per_second": 0.236,
92
- "step": 1640
93
  }
94
  ],
95
  "logging_steps": 500,
96
- "max_steps": 1640,
97
- "num_train_epochs": 5,
98
  "save_steps": 500,
99
- "total_flos": 1.229071090286592e+16,
100
  "trial_name": null,
101
  "trial_params": null
102
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 328,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_gen_len": 15.577319587628866,
14
+ "eval_loss": 8.096635818481445,
15
+ "eval_rouge1": 16.5418,
16
+ "eval_rouge2": 10.3523,
17
+ "eval_rougeL": 13.972,
18
+ "eval_rougeLsum": 14.1918,
19
+ "eval_runtime": 157.1024,
20
+ "eval_samples_per_second": 1.852,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "eval_steps_per_second": 0.236,
22
+ "step": 328
23
  }
24
  ],
25
  "logging_steps": 500,
26
+ "max_steps": 3936,
27
+ "num_train_epochs": 12,
28
  "save_steps": 500,
29
+ "total_flos": 2458142180573184.0,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5c8949174dd9b49837d8ec609d2b880bd4fe35d9d9291ea4e7c0991d5dcd240
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83aee626fbc0f5035f17ef0b0e15a14a8afcb9d45d851b7bf8dc1e10d0cdf477
3
  size 4728