jpata commited on
Commit
652e089
·
1 Parent(s): 7bf0670

added CMS model from 2024 April 5 PF meeting

Browse files
cms/2024_04_05/pyg-cms_20240324_235743_208080/checkpoint-32-17.877384.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:131b35416593e8a69c529d85446cdbdd55e1d11f8870ac54c72448a75c72b57d
3
+ size 255893234
cms/2024_04_05/pyg-cms_20240324_235743_208080/hyperparameters.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Num of mlpf parameters": 21304339, "config": "parameters/pytorch/pyg-cms.yaml", "prefix": null, "data_dir": "/scratch/persistent/joosep/tensorflow_datasets", "gpus": 1, "gpu_batch_multiplier": 20, "dataset": "cms", "num_workers": 4, "prefetch_factor": 50, "resume_training": null, "load": null, "train": true, "test": null, "num_epochs": 100, "patience": null, "lr": null, "conv_type": "attention", "num_convs": null, "make_plots": null, "export_onnx": null, "ntrain": null, "ntest": null, "nvalid": null, "val_freq": null, "checkpoint_freq": 1, "hpo": null, "ray_train": false, "local": null, "ray_cpus": null, "ray_gpus": null, "comet": true, "comet_offline": false, "comet_step_freq": null, "experiments_dir": null, "pipeline": null, "dtype": null, "attention_type": null, "test_datasets": {"cms_pf_ttbar": {"version": "1.7.1"}, "cms_pf_qcd_high_pt": {"version": "1.7.0"}, "cms_pf_qcd": {"version": "1.7.0"}, "cms_pf_ztt": {"version": "1.7.0"}, "cms_pf_sms_t1tttt": {"version": "1.7.0"}}}
cms/2024_04_05/pyg-cms_20240324_235743_208080/mlpf_losses.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:234fc7ea5a30d4927e05dbd27bbb5725d5c1f80589fd202ab593289a10ea4205
3
+ size 1836
cms/2024_04_05/pyg-cms_20240324_235743_208080/model_kwargs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd634217a0268a8cc4e2d4f7ec0c5aa90697a9159c5b3092585a23f6ee20200
3
+ size 554
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1711317469.gpu1.local.2556485.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52c5673bf17a6d231904f796ab11c3a8d0811b632231cab6f35169339f77caa
3
+ size 110493839
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129489.joosep-desktop-work.646614.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6fecae60774c0ccdd10889871e01aaa6dc7dbacd4f29a4a3cfb66bf04584301
3
+ size 8739
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129556.joosep-desktop-work.647125.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:312f1568be932b846927c63a3f0d90ce6a88ac0ffff2e4539bd0256a0fd26cea
3
+ size 16350
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129684.joosep-desktop-work.647708.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc54dbdf81a50f9e9b9f91024169683f25fb0aba1f368e966f4bdc1037e388f4
3
+ size 23961
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1711317469.gpu1.local.2556485.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0811b521707025790c075d064a05992d40ce0da97127912e8e34b252c882223f
3
+ size 39256
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129489.joosep-desktop-work.646614.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13d9ae9963f649a22ce1378b4d792e99f5950b4ad72add0b791a13a93c8fbd9
3
+ size 88
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129556.joosep-desktop-work.647125.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbbc8fb83b45fae2fee80aa9f687ead4d28db6d8bf11dacda1048a2e29836cea
3
+ size 1312
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129684.joosep-desktop-work.647708.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a85e631dde89ac409373f2e1445ed69d1dedacfc29262171e13eb44e267a9d7
3
+ size 2536
cms/2024_04_05/pyg-cms_20240324_235743_208080/test-config.yaml ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_type: efficient
2
+ backend: pytorch
3
+ checkpoint_freq: 1
4
+ comet: false
5
+ comet_name: particleflow-pt
6
+ comet_offline: false
7
+ comet_step_freq: 10
8
+ config: parameters/pytorch/pyg-cms.yaml
9
+ conv_type: attention
10
+ data_dir: null
11
+ dataset: cms
12
+ dtype: float16
13
+ experiments_dir: experiments/
14
+ gpu_batch_multiplier: 10
15
+ gpus: 1
16
+ load: experiments/pyg-cms_20240324_235743_208080/checkpoints/checkpoint-32-17.877384.pth
17
+ lr: 0.0001
18
+ lr_schedule: cosinedecay
19
+ lr_schedule_config:
20
+ onecycle:
21
+ pct_start: 0.3
22
+ make_plots: true
23
+ model:
24
+ attention:
25
+ activation: relu
26
+ attention_type: efficient
27
+ conv_type: attention
28
+ dropout_conv_id_ff: 0.0
29
+ dropout_conv_id_mha: 0.0
30
+ dropout_conv_reg_ff: 0.0
31
+ dropout_conv_reg_mha: 0.0
32
+ dropout_ff: 0.0
33
+ head_dim: 16
34
+ num_convs: 6
35
+ num_heads: 32
36
+ cos_phi_mode: linear
37
+ energy_mode: linear
38
+ eta_mode: linear
39
+ gnn_lsh:
40
+ activation: elu
41
+ bin_size: 640
42
+ conv_type: gnn_lsh
43
+ distance_dim: 128
44
+ dropout_ff: 0.0
45
+ embedding_dim: 512
46
+ ffn_dist_hidden_dim: 128
47
+ ffn_dist_num_layers: 2
48
+ layernorm: true
49
+ max_num_bins: 200
50
+ num_convs: 3
51
+ num_node_messages: 2
52
+ width: 512
53
+ gravnet:
54
+ activation: elu
55
+ conv_type: gravnet
56
+ dropout_ff: 0.1
57
+ embedding_dim: 512
58
+ k: 16
59
+ num_convs: 3
60
+ propagate_dimensions: 32
61
+ space_dimensions: 4
62
+ width: 512
63
+ input_encoding: joint
64
+ learned_representation_mode: last
65
+ mamba:
66
+ activation: elu
67
+ conv_type: mamba
68
+ d_conv: 4
69
+ d_state: 32
70
+ dropout_ff: 0.0
71
+ embedding_dim: 1024
72
+ expand: 2
73
+ num_convs: 4
74
+ width: 1024
75
+ pt_mode: linear
76
+ sin_phi_mode: linear
77
+ trainable: all
78
+ ntest: null
79
+ ntrain: null
80
+ num_epochs: 100
81
+ num_workers: 4
82
+ nvalid: null
83
+ patience: 20
84
+ prefetch_factor: 50
85
+ ray_train: false
86
+ raytune:
87
+ asha:
88
+ brackets: 1
89
+ grace_period: 10
90
+ max_t: 200
91
+ reduction_factor: 4
92
+ default_metric: val_loss
93
+ default_mode: min
94
+ hyperband:
95
+ max_t: 200
96
+ reduction_factor: 4
97
+ hyperopt:
98
+ n_random_steps: 10
99
+ local_dir: null
100
+ nevergrad:
101
+ n_random_steps: 10
102
+ sched: asha
103
+ search_alg: hyperopt
104
+ sort_data: true
105
+ test_dataset:
106
+ cms_pf_qcd:
107
+ version: 1.7.1
108
+ test_datasets: []
109
+ train_dataset:
110
+ cms:
111
+ physical:
112
+ batch_size: 1
113
+ samples:
114
+ cms_pf_qcd:
115
+ version: 1.7.1
116
+ val_freq: null
117
+ valid_dataset:
118
+ cms:
119
+ physical:
120
+ batch_size: 1
121
+ samples:
122
+ cms_pf_qcd:
123
+ version: 1.7.1
cms/2024_04_05/pyg-cms_20240324_235743_208080/train-config.yaml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backend: pytorch
2
+ checkpoint_freq: 1
3
+ comet: true
4
+ comet_name: particleflow-pt
5
+ comet_offline: false
6
+ comet_step_freq: 10
7
+ config: parameters/pytorch/pyg-cms.yaml
8
+ conv_type: attention
9
+ data_dir: /scratch/persistent/joosep/tensorflow_datasets
10
+ dataset: cms
11
+ dtype: bfloat16
12
+ gpu_batch_multiplier: 20
13
+ gpus: 1
14
+ load: null
15
+ lr: 0.0001
16
+ lr_schedule: cosinedecay
17
+ lr_schedule_config:
18
+ onecycle:
19
+ pct_start: 0.3
20
+ model:
21
+ attention:
22
+ activation: relu
23
+ attention_type: flash
24
+ conv_type: attention
25
+ dropout_conv_id_ff: 0.0
26
+ dropout_conv_id_mha: 0.0
27
+ dropout_conv_reg_ff: 0.0
28
+ dropout_conv_reg_mha: 0.0
29
+ dropout_ff: 0.0
30
+ head_dim: 16
31
+ num_convs: 6
32
+ num_heads: 32
33
+ cos_phi_mode: linear
34
+ energy_mode: linear
35
+ eta_mode: linear
36
+ gnn_lsh:
37
+ activation: elu
38
+ bin_size: 640
39
+ conv_type: gnn_lsh
40
+ distance_dim: 128
41
+ dropout_ff: 0.0
42
+ embedding_dim: 512
43
+ ffn_dist_hidden_dim: 128
44
+ ffn_dist_num_layers: 2
45
+ layernorm: true
46
+ max_num_bins: 200
47
+ num_convs: 3
48
+ num_node_messages: 2
49
+ width: 512
50
+ gravnet:
51
+ activation: elu
52
+ conv_type: gravnet
53
+ dropout_ff: 0.1
54
+ embedding_dim: 512
55
+ k: 16
56
+ num_convs: 3
57
+ propagate_dimensions: 32
58
+ space_dimensions: 4
59
+ width: 512
60
+ input_encoding: joint
61
+ learned_representation_mode: last
62
+ mamba:
63
+ activation: elu
64
+ conv_type: mamba
65
+ d_conv: 4
66
+ d_state: 32
67
+ dropout_ff: 0.0
68
+ embedding_dim: 1024
69
+ expand: 2
70
+ num_convs: 4
71
+ width: 1024
72
+ pt_mode: linear
73
+ sin_phi_mode: linear
74
+ trainable: all
75
+ ntest: null
76
+ ntrain: null
77
+ num_epochs: 100
78
+ num_workers: 4
79
+ nvalid: null
80
+ patience: 20
81
+ prefetch_factor: 50
82
+ ray_train: false
83
+ raytune:
84
+ asha:
85
+ brackets: 1
86
+ grace_period: 10
87
+ max_t: 200
88
+ reduction_factor: 4
89
+ default_metric: val_loss
90
+ default_mode: min
91
+ hyperband:
92
+ max_t: 200
93
+ reduction_factor: 4
94
+ hyperopt:
95
+ n_random_steps: 10
96
+ local_dir: null
97
+ nevergrad:
98
+ n_random_steps: 10
99
+ sched: asha
100
+ search_alg: hyperopt
101
+ sort_data: true
102
+ test_dataset:
103
+ cms_pf_qcd:
104
+ version: 1.7.0
105
+ cms_pf_qcd_high_pt:
106
+ version: 1.7.0
107
+ cms_pf_sms_t1tttt:
108
+ version: 1.7.0
109
+ cms_pf_ttbar:
110
+ version: 1.7.1
111
+ cms_pf_ztt:
112
+ version: 1.7.0
113
+ test_datasets: []
114
+ train: true
115
+ train_dataset:
116
+ cms:
117
+ physical:
118
+ batch_size: 1
119
+ samples:
120
+ cms_pf_ttbar:
121
+ version: 1.7.1
122
+ val_freq: null
123
+ valid_dataset:
124
+ cms:
125
+ physical:
126
+ batch_size: 1
127
+ samples:
128
+ cms_pf_ttbar:
129
+ version: 1.7.1
cms/2024_04_05/pyg-cms_20240324_235743_208080/train.log ADDED
@@ -0,0 +1,821 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2024-03-24 23:57:43,238] INFO: Will use single-gpu: NVIDIA A100 80GB PCIe
2
+ [2024-03-24 23:57:43,242] INFO: using dtype=torch.bfloat16
3
+ [2024-03-24 23:57:43,242] INFO: using dtype=torch.bfloat16
4
+ [2024-03-24 23:57:43,277] INFO: using attention_type=flash
5
+ [2024-03-24 23:57:43,277] INFO: using attention_type=flash
6
+ [2024-03-24 23:57:43,290] INFO: using attention_type=flash
7
+ [2024-03-24 23:57:43,290] INFO: using attention_type=flash
8
+ [2024-03-24 23:57:43,302] INFO: using attention_type=flash
9
+ [2024-03-24 23:57:43,302] INFO: using attention_type=flash
10
+ [2024-03-24 23:57:43,314] INFO: using attention_type=flash
11
+ [2024-03-24 23:57:43,314] INFO: using attention_type=flash
12
+ [2024-03-24 23:57:43,330] INFO: using attention_type=flash
13
+ [2024-03-24 23:57:43,330] INFO: using attention_type=flash
14
+ [2024-03-24 23:57:43,342] INFO: using attention_type=flash
15
+ [2024-03-24 23:57:43,342] INFO: using attention_type=flash
16
+ [2024-03-24 23:57:43,354] INFO: using attention_type=flash
17
+ [2024-03-24 23:57:43,354] INFO: using attention_type=flash
18
+ [2024-03-24 23:57:43,367] INFO: using attention_type=flash
19
+ [2024-03-24 23:57:43,367] INFO: using attention_type=flash
20
+ [2024-03-24 23:57:43,380] INFO: using attention_type=flash
21
+ [2024-03-24 23:57:43,380] INFO: using attention_type=flash
22
+ [2024-03-24 23:57:43,392] INFO: using attention_type=flash
23
+ [2024-03-24 23:57:43,392] INFO: using attention_type=flash
24
+ [2024-03-24 23:57:43,404] INFO: using attention_type=flash
25
+ [2024-03-24 23:57:43,404] INFO: using attention_type=flash
26
+ [2024-03-24 23:57:43,415] INFO: using attention_type=flash
27
+ [2024-03-24 23:57:43,415] INFO: using attention_type=flash
28
+ [2024-03-24 23:57:43,660] INFO: MLPF(
29
+ (nn0_id): Sequential(
30
+ (0): Linear(in_features=55, out_features=512, bias=True)
31
+ (1): ReLU()
32
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
33
+ (3): Dropout(p=0.0, inplace=False)
34
+ (4): Linear(in_features=512, out_features=512, bias=True)
35
+ )
36
+ (nn0_reg): Sequential(
37
+ (0): Linear(in_features=55, out_features=512, bias=True)
38
+ (1): ReLU()
39
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
40
+ (3): Dropout(p=0.0, inplace=False)
41
+ (4): Linear(in_features=512, out_features=512, bias=True)
42
+ )
43
+ (conv_id): ModuleList(
44
+ (0-5): 6 x SelfAttentionLayer(
45
+ (mha): MultiheadAttention(
46
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
47
+ )
48
+ (norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
49
+ (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
50
+ (seq): Sequential(
51
+ (0): Linear(in_features=512, out_features=512, bias=True)
52
+ (1): ReLU()
53
+ (2): Linear(in_features=512, out_features=512, bias=True)
54
+ (3): ReLU()
55
+ )
56
+ (dropout): Dropout(p=0.0, inplace=False)
57
+ )
58
+ )
59
+ (conv_reg): ModuleList(
60
+ (0-5): 6 x SelfAttentionLayer(
61
+ (mha): MultiheadAttention(
62
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
63
+ )
64
+ (norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
65
+ (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
66
+ (seq): Sequential(
67
+ (0): Linear(in_features=512, out_features=512, bias=True)
68
+ (1): ReLU()
69
+ (2): Linear(in_features=512, out_features=512, bias=True)
70
+ (3): ReLU()
71
+ )
72
+ (dropout): Dropout(p=0.0, inplace=False)
73
+ )
74
+ )
75
+ (nn_id): Sequential(
76
+ (0): Linear(in_features=567, out_features=512, bias=True)
77
+ (1): ReLU()
78
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
79
+ (3): Dropout(p=0.0, inplace=False)
80
+ (4): Linear(in_features=512, out_features=9, bias=True)
81
+ )
82
+ (nn_pt): RegressionOutput(
83
+ (nn): Sequential(
84
+ (0): Linear(in_features=576, out_features=512, bias=True)
85
+ (1): ReLU()
86
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
87
+ (3): Dropout(p=0.0, inplace=False)
88
+ (4): Linear(in_features=512, out_features=2, bias=True)
89
+ )
90
+ )
91
+ (nn_eta): RegressionOutput(
92
+ (nn): Sequential(
93
+ (0): Linear(in_features=576, out_features=512, bias=True)
94
+ (1): ReLU()
95
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
96
+ (3): Dropout(p=0.0, inplace=False)
97
+ (4): Linear(in_features=512, out_features=2, bias=True)
98
+ )
99
+ )
100
+ (nn_sin_phi): RegressionOutput(
101
+ (nn): Sequential(
102
+ (0): Linear(in_features=576, out_features=512, bias=True)
103
+ (1): ReLU()
104
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
105
+ (3): Dropout(p=0.0, inplace=False)
106
+ (4): Linear(in_features=512, out_features=2, bias=True)
107
+ )
108
+ )
109
+ (nn_cos_phi): RegressionOutput(
110
+ (nn): Sequential(
111
+ (0): Linear(in_features=576, out_features=512, bias=True)
112
+ (1): ReLU()
113
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
114
+ (3): Dropout(p=0.0, inplace=False)
115
+ (4): Linear(in_features=512, out_features=2, bias=True)
116
+ )
117
+ )
118
+ (nn_energy): RegressionOutput(
119
+ (nn): Sequential(
120
+ (0): Linear(in_features=576, out_features=512, bias=True)
121
+ (1): ReLU()
122
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
123
+ (3): Dropout(p=0.0, inplace=False)
124
+ (4): Linear(in_features=512, out_features=2, bias=True)
125
+ )
126
+ )
127
+ )
128
+ [2024-03-24 23:57:43,660] INFO: MLPF(
129
+ (nn0_id): Sequential(
130
+ (0): Linear(in_features=55, out_features=512, bias=True)
131
+ (1): ReLU()
132
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
133
+ (3): Dropout(p=0.0, inplace=False)
134
+ (4): Linear(in_features=512, out_features=512, bias=True)
135
+ )
136
+ (nn0_reg): Sequential(
137
+ (0): Linear(in_features=55, out_features=512, bias=True)
138
+ (1): ReLU()
139
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
140
+ (3): Dropout(p=0.0, inplace=False)
141
+ (4): Linear(in_features=512, out_features=512, bias=True)
142
+ )
143
+ (conv_id): ModuleList(
144
+ (0-5): 6 x SelfAttentionLayer(
145
+ (mha): MultiheadAttention(
146
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
147
+ )
148
+ (norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
149
+ (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
150
+ (seq): Sequential(
151
+ (0): Linear(in_features=512, out_features=512, bias=True)
152
+ (1): ReLU()
153
+ (2): Linear(in_features=512, out_features=512, bias=True)
154
+ (3): ReLU()
155
+ )
156
+ (dropout): Dropout(p=0.0, inplace=False)
157
+ )
158
+ )
159
+ (conv_reg): ModuleList(
160
+ (0-5): 6 x SelfAttentionLayer(
161
+ (mha): MultiheadAttention(
162
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
163
+ )
164
+ (norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
165
+ (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
166
+ (seq): Sequential(
167
+ (0): Linear(in_features=512, out_features=512, bias=True)
168
+ (1): ReLU()
169
+ (2): Linear(in_features=512, out_features=512, bias=True)
170
+ (3): ReLU()
171
+ )
172
+ (dropout): Dropout(p=0.0, inplace=False)
173
+ )
174
+ )
175
+ (nn_id): Sequential(
176
+ (0): Linear(in_features=567, out_features=512, bias=True)
177
+ (1): ReLU()
178
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
179
+ (3): Dropout(p=0.0, inplace=False)
180
+ (4): Linear(in_features=512, out_features=9, bias=True)
181
+ )
182
+ (nn_pt): RegressionOutput(
183
+ (nn): Sequential(
184
+ (0): Linear(in_features=576, out_features=512, bias=True)
185
+ (1): ReLU()
186
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
187
+ (3): Dropout(p=0.0, inplace=False)
188
+ (4): Linear(in_features=512, out_features=2, bias=True)
189
+ )
190
+ )
191
+ (nn_eta): RegressionOutput(
192
+ (nn): Sequential(
193
+ (0): Linear(in_features=576, out_features=512, bias=True)
194
+ (1): ReLU()
195
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
196
+ (3): Dropout(p=0.0, inplace=False)
197
+ (4): Linear(in_features=512, out_features=2, bias=True)
198
+ )
199
+ )
200
+ (nn_sin_phi): RegressionOutput(
201
+ (nn): Sequential(
202
+ (0): Linear(in_features=576, out_features=512, bias=True)
203
+ (1): ReLU()
204
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
205
+ (3): Dropout(p=0.0, inplace=False)
206
+ (4): Linear(in_features=512, out_features=2, bias=True)
207
+ )
208
+ )
209
+ (nn_cos_phi): RegressionOutput(
210
+ (nn): Sequential(
211
+ (0): Linear(in_features=576, out_features=512, bias=True)
212
+ (1): ReLU()
213
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
214
+ (3): Dropout(p=0.0, inplace=False)
215
+ (4): Linear(in_features=512, out_features=2, bias=True)
216
+ )
217
+ )
218
+ (nn_energy): RegressionOutput(
219
+ (nn): Sequential(
220
+ (0): Linear(in_features=576, out_features=512, bias=True)
221
+ (1): ReLU()
222
+ (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
223
+ (3): Dropout(p=0.0, inplace=False)
224
+ (4): Linear(in_features=512, out_features=2, bias=True)
225
+ )
226
+ )
227
+ )
228
+ [2024-03-24 23:57:43,666] INFO: Trainable parameters: 21304339
229
+ [2024-03-24 23:57:43,666] INFO: Trainable parameters: 21304339
230
+ [2024-03-24 23:57:43,668] INFO: Non-trainable parameters: 0
231
+ [2024-03-24 23:57:43,668] INFO: Non-trainable parameters: 0
232
+ [2024-03-24 23:57:43,671] INFO: Total parameters: 21304339
233
+ [2024-03-24 23:57:43,671] INFO: Total parameters: 21304339
234
+ [2024-03-24 23:57:43,682] INFO: Modules Trainable parameters Non-tranable parameters
235
+ nn0_id.0.weight 28160 0
236
+ nn0_id.0.bias 512 0
237
+ nn0_id.2.weight 512 0
238
+ nn0_id.2.bias 512 0
239
+ nn0_id.4.weight 262144 0
240
+ nn0_id.4.bias 512 0
241
+ nn0_reg.0.weight 28160 0
242
+ nn0_reg.0.bias 512 0
243
+ nn0_reg.2.weight 512 0
244
+ nn0_reg.2.bias 512 0
245
+ nn0_reg.4.weight 262144 0
246
+ nn0_reg.4.bias 512 0
247
+ conv_id.0.mha.in_proj_weight 786432 0
248
+ conv_id.0.mha.in_proj_bias 1536 0
249
+ conv_id.0.mha.out_proj.weight 262144 0
250
+ conv_id.0.mha.out_proj.bias 512 0
251
+ conv_id.0.norm0.weight 512 0
252
+ conv_id.0.norm0.bias 512 0
253
+ conv_id.0.norm1.weight 512 0
254
+ conv_id.0.norm1.bias 512 0
255
+ conv_id.0.seq.0.weight 262144 0
256
+ conv_id.0.seq.0.bias 512 0
257
+ conv_id.0.seq.2.weight 262144 0
258
+ conv_id.0.seq.2.bias 512 0
259
+ conv_id.1.mha.in_proj_weight 786432 0
260
+ conv_id.1.mha.in_proj_bias 1536 0
261
+ conv_id.1.mha.out_proj.weight 262144 0
262
+ conv_id.1.mha.out_proj.bias 512 0
263
+ conv_id.1.norm0.weight 512 0
264
+ conv_id.1.norm0.bias 512 0
265
+ conv_id.1.norm1.weight 512 0
266
+ conv_id.1.norm1.bias 512 0
267
+ conv_id.1.seq.0.weight 262144 0
268
+ conv_id.1.seq.0.bias 512 0
269
+ conv_id.1.seq.2.weight 262144 0
270
+ conv_id.1.seq.2.bias 512 0
271
+ conv_id.2.mha.in_proj_weight 786432 0
272
+ conv_id.2.mha.in_proj_bias 1536 0
273
+ conv_id.2.mha.out_proj.weight 262144 0
274
+ conv_id.2.mha.out_proj.bias 512 0
275
+ conv_id.2.norm0.weight 512 0
276
+ conv_id.2.norm0.bias 512 0
277
+ conv_id.2.norm1.weight 512 0
278
+ conv_id.2.norm1.bias 512 0
279
+ conv_id.2.seq.0.weight 262144 0
280
+ conv_id.2.seq.0.bias 512 0
281
+ conv_id.2.seq.2.weight 262144 0
282
+ conv_id.2.seq.2.bias 512 0
283
+ conv_id.3.mha.in_proj_weight 786432 0
284
+ conv_id.3.mha.in_proj_bias 1536 0
285
+ conv_id.3.mha.out_proj.weight 262144 0
286
+ conv_id.3.mha.out_proj.bias 512 0
287
+ conv_id.3.norm0.weight 512 0
288
+ conv_id.3.norm0.bias 512 0
289
+ conv_id.3.norm1.weight 512 0
290
+ conv_id.3.norm1.bias 512 0
291
+ conv_id.3.seq.0.weight 262144 0
292
+ conv_id.3.seq.0.bias 512 0
293
+ conv_id.3.seq.2.weight 262144 0
294
+ conv_id.3.seq.2.bias 512 0
295
+ conv_id.4.mha.in_proj_weight 786432 0
296
+ conv_id.4.mha.in_proj_bias 1536 0
297
+ conv_id.4.mha.out_proj.weight 262144 0
298
+ conv_id.4.mha.out_proj.bias 512 0
299
+ conv_id.4.norm0.weight 512 0
300
+ conv_id.4.norm0.bias 512 0
301
+ conv_id.4.norm1.weight 512 0
302
+ conv_id.4.norm1.bias 512 0
303
+ conv_id.4.seq.0.weight 262144 0
304
+ conv_id.4.seq.0.bias 512 0
305
+ conv_id.4.seq.2.weight 262144 0
306
+ conv_id.4.seq.2.bias 512 0
307
+ conv_id.5.mha.in_proj_weight 786432 0
308
+ conv_id.5.mha.in_proj_bias 1536 0
309
+ conv_id.5.mha.out_proj.weight 262144 0
310
+ conv_id.5.mha.out_proj.bias 512 0
311
+ conv_id.5.norm0.weight 512 0
312
+ conv_id.5.norm0.bias 512 0
313
+ conv_id.5.norm1.weight 512 0
314
+ conv_id.5.norm1.bias 512 0
315
+ conv_id.5.seq.0.weight 262144 0
316
+ conv_id.5.seq.0.bias 512 0
317
+ conv_id.5.seq.2.weight 262144 0
318
+ conv_id.5.seq.2.bias 512 0
319
+ conv_reg.0.mha.in_proj_weight 786432 0
320
+ conv_reg.0.mha.in_proj_bias 1536 0
321
+ conv_reg.0.mha.out_proj.weight 262144 0
322
+ conv_reg.0.mha.out_proj.bias 512 0
323
+ conv_reg.0.norm0.weight 512 0
324
+ conv_reg.0.norm0.bias 512 0
325
+ conv_reg.0.norm1.weight 512 0
326
+ conv_reg.0.norm1.bias 512 0
327
+ conv_reg.0.seq.0.weight 262144 0
328
+ conv_reg.0.seq.0.bias 512 0
329
+ conv_reg.0.seq.2.weight 262144 0
330
+ conv_reg.0.seq.2.bias 512 0
331
+ conv_reg.1.mha.in_proj_weight 786432 0
332
+ conv_reg.1.mha.in_proj_bias 1536 0
333
+ conv_reg.1.mha.out_proj.weight 262144 0
334
+ conv_reg.1.mha.out_proj.bias 512 0
335
+ conv_reg.1.norm0.weight 512 0
336
+ conv_reg.1.norm0.bias 512 0
337
+ conv_reg.1.norm1.weight 512 0
338
+ conv_reg.1.norm1.bias 512 0
339
+ conv_reg.1.seq.0.weight 262144 0
340
+ conv_reg.1.seq.0.bias 512 0
341
+ conv_reg.1.seq.2.weight 262144 0
342
+ conv_reg.1.seq.2.bias 512 0
343
+ conv_reg.2.mha.in_proj_weight 786432 0
344
+ conv_reg.2.mha.in_proj_bias 1536 0
345
+ conv_reg.2.mha.out_proj.weight 262144 0
346
+ conv_reg.2.mha.out_proj.bias 512 0
347
+ conv_reg.2.norm0.weight 512 0
348
+ conv_reg.2.norm0.bias 512 0
349
+ conv_reg.2.norm1.weight 512 0
350
+ conv_reg.2.norm1.bias 512 0
351
+ conv_reg.2.seq.0.weight 262144 0
352
+ conv_reg.2.seq.0.bias 512 0
353
+ conv_reg.2.seq.2.weight 262144 0
354
+ conv_reg.2.seq.2.bias 512 0
355
+ conv_reg.3.mha.in_proj_weight 786432 0
356
+ conv_reg.3.mha.in_proj_bias 1536 0
357
+ conv_reg.3.mha.out_proj.weight 262144 0
358
+ conv_reg.3.mha.out_proj.bias 512 0
359
+ conv_reg.3.norm0.weight 512 0
360
+ conv_reg.3.norm0.bias 512 0
361
+ conv_reg.3.norm1.weight 512 0
362
+ conv_reg.3.norm1.bias 512 0
363
+ conv_reg.3.seq.0.weight 262144 0
364
+ conv_reg.3.seq.0.bias 512 0
365
+ conv_reg.3.seq.2.weight 262144 0
366
+ conv_reg.3.seq.2.bias 512 0
367
+ conv_reg.4.mha.in_proj_weight 786432 0
368
+ conv_reg.4.mha.in_proj_bias 1536 0
369
+ conv_reg.4.mha.out_proj.weight 262144 0
370
+ conv_reg.4.mha.out_proj.bias 512 0
371
+ conv_reg.4.norm0.weight 512 0
372
+ conv_reg.4.norm0.bias 512 0
373
+ conv_reg.4.norm1.weight 512 0
374
+ conv_reg.4.norm1.bias 512 0
375
+ conv_reg.4.seq.0.weight 262144 0
376
+ conv_reg.4.seq.0.bias 512 0
377
+ conv_reg.4.seq.2.weight 262144 0
378
+ conv_reg.4.seq.2.bias 512 0
379
+ conv_reg.5.mha.in_proj_weight 786432 0
380
+ conv_reg.5.mha.in_proj_bias 1536 0
381
+ conv_reg.5.mha.out_proj.weight 262144 0
382
+ conv_reg.5.mha.out_proj.bias 512 0
383
+ conv_reg.5.norm0.weight 512 0
384
+ conv_reg.5.norm0.bias 512 0
385
+ conv_reg.5.norm1.weight 512 0
386
+ conv_reg.5.norm1.bias 512 0
387
+ conv_reg.5.seq.0.weight 262144 0
388
+ conv_reg.5.seq.0.bias 512 0
389
+ conv_reg.5.seq.2.weight 262144 0
390
+ conv_reg.5.seq.2.bias 512 0
391
+ nn_id.0.weight 290304 0
392
+ nn_id.0.bias 512 0
393
+ nn_id.2.weight 512 0
394
+ nn_id.2.bias 512 0
395
+ nn_id.4.weight 4608 0
396
+ nn_id.4.bias 9 0
397
+ nn_pt.nn.0.weight 294912 0
398
+ nn_pt.nn.0.bias 512 0
399
+ nn_pt.nn.2.weight 512 0
400
+ nn_pt.nn.2.bias 512 0
401
+ nn_pt.nn.4.weight 1024 0
402
+ nn_pt.nn.4.bias 2 0
403
+ nn_eta.nn.0.weight 294912 0
404
+ nn_eta.nn.0.bias 512 0
405
+ nn_eta.nn.2.weight 512 0
406
+ nn_eta.nn.2.bias 512 0
407
+ nn_eta.nn.4.weight 1024 0
408
+ nn_eta.nn.4.bias 2 0
409
+ nn_sin_phi.nn.0.weight 294912 0
410
+ nn_sin_phi.nn.0.bias 512 0
411
+ nn_sin_phi.nn.2.weight 512 0
412
+ nn_sin_phi.nn.2.bias 512 0
413
+ nn_sin_phi.nn.4.weight 1024 0
414
+ nn_sin_phi.nn.4.bias 2 0
415
+ nn_cos_phi.nn.0.weight 294912 0
416
+ nn_cos_phi.nn.0.bias 512 0
417
+ nn_cos_phi.nn.2.weight 512 0
418
+ nn_cos_phi.nn.2.bias 512 0
419
+ nn_cos_phi.nn.4.weight 1024 0
420
+ nn_cos_phi.nn.4.bias 2 0
421
+ nn_energy.nn.0.weight 294912 0
422
+ nn_energy.nn.0.bias 512 0
423
+ nn_energy.nn.2.weight 512 0
424
+ nn_energy.nn.2.bias 512 0
425
+ nn_energy.nn.4.weight 1024 0
426
+ nn_energy.nn.4.bias 2 0
427
+ [2024-03-24 23:57:43,682] INFO: Modules Trainable parameters Non-tranable parameters
428
+ nn0_id.0.weight 28160 0
429
+ nn0_id.0.bias 512 0
430
+ nn0_id.2.weight 512 0
431
+ nn0_id.2.bias 512 0
432
+ nn0_id.4.weight 262144 0
433
+ nn0_id.4.bias 512 0
434
+ nn0_reg.0.weight 28160 0
435
+ nn0_reg.0.bias 512 0
436
+ nn0_reg.2.weight 512 0
437
+ nn0_reg.2.bias 512 0
438
+ nn0_reg.4.weight 262144 0
439
+ nn0_reg.4.bias 512 0
440
+ conv_id.0.mha.in_proj_weight 786432 0
441
+ conv_id.0.mha.in_proj_bias 1536 0
442
+ conv_id.0.mha.out_proj.weight 262144 0
443
+ conv_id.0.mha.out_proj.bias 512 0
444
+ conv_id.0.norm0.weight 512 0
445
+ conv_id.0.norm0.bias 512 0
446
+ conv_id.0.norm1.weight 512 0
447
+ conv_id.0.norm1.bias 512 0
448
+ conv_id.0.seq.0.weight 262144 0
449
+ conv_id.0.seq.0.bias 512 0
450
+ conv_id.0.seq.2.weight 262144 0
451
+ conv_id.0.seq.2.bias 512 0
452
+ conv_id.1.mha.in_proj_weight 786432 0
453
+ conv_id.1.mha.in_proj_bias 1536 0
454
+ conv_id.1.mha.out_proj.weight 262144 0
455
+ conv_id.1.mha.out_proj.bias 512 0
456
+ conv_id.1.norm0.weight 512 0
457
+ conv_id.1.norm0.bias 512 0
458
+ conv_id.1.norm1.weight 512 0
459
+ conv_id.1.norm1.bias 512 0
460
+ conv_id.1.seq.0.weight 262144 0
461
+ conv_id.1.seq.0.bias 512 0
462
+ conv_id.1.seq.2.weight 262144 0
463
+ conv_id.1.seq.2.bias 512 0
464
+ conv_id.2.mha.in_proj_weight 786432 0
465
+ conv_id.2.mha.in_proj_bias 1536 0
466
+ conv_id.2.mha.out_proj.weight 262144 0
467
+ conv_id.2.mha.out_proj.bias 512 0
468
+ conv_id.2.norm0.weight 512 0
469
+ conv_id.2.norm0.bias 512 0
470
+ conv_id.2.norm1.weight 512 0
471
+ conv_id.2.norm1.bias 512 0
472
+ conv_id.2.seq.0.weight 262144 0
473
+ conv_id.2.seq.0.bias 512 0
474
+ conv_id.2.seq.2.weight 262144 0
475
+ conv_id.2.seq.2.bias 512 0
476
+ conv_id.3.mha.in_proj_weight 786432 0
477
+ conv_id.3.mha.in_proj_bias 1536 0
478
+ conv_id.3.mha.out_proj.weight 262144 0
479
+ conv_id.3.mha.out_proj.bias 512 0
480
+ conv_id.3.norm0.weight 512 0
481
+ conv_id.3.norm0.bias 512 0
482
+ conv_id.3.norm1.weight 512 0
483
+ conv_id.3.norm1.bias 512 0
484
+ conv_id.3.seq.0.weight 262144 0
485
+ conv_id.3.seq.0.bias 512 0
486
+ conv_id.3.seq.2.weight 262144 0
487
+ conv_id.3.seq.2.bias 512 0
488
+ conv_id.4.mha.in_proj_weight 786432 0
489
+ conv_id.4.mha.in_proj_bias 1536 0
490
+ conv_id.4.mha.out_proj.weight 262144 0
491
+ conv_id.4.mha.out_proj.bias 512 0
492
+ conv_id.4.norm0.weight 512 0
493
+ conv_id.4.norm0.bias 512 0
494
+ conv_id.4.norm1.weight 512 0
495
+ conv_id.4.norm1.bias 512 0
496
+ conv_id.4.seq.0.weight 262144 0
497
+ conv_id.4.seq.0.bias 512 0
498
+ conv_id.4.seq.2.weight 262144 0
499
+ conv_id.4.seq.2.bias 512 0
500
+ conv_id.5.mha.in_proj_weight 786432 0
501
+ conv_id.5.mha.in_proj_bias 1536 0
502
+ conv_id.5.mha.out_proj.weight 262144 0
503
+ conv_id.5.mha.out_proj.bias 512 0
504
+ conv_id.5.norm0.weight 512 0
505
+ conv_id.5.norm0.bias 512 0
506
+ conv_id.5.norm1.weight 512 0
507
+ conv_id.5.norm1.bias 512 0
508
+ conv_id.5.seq.0.weight 262144 0
509
+ conv_id.5.seq.0.bias 512 0
510
+ conv_id.5.seq.2.weight 262144 0
511
+ conv_id.5.seq.2.bias 512 0
512
+ conv_reg.0.mha.in_proj_weight 786432 0
513
+ conv_reg.0.mha.in_proj_bias 1536 0
514
+ conv_reg.0.mha.out_proj.weight 262144 0
515
+ conv_reg.0.mha.out_proj.bias 512 0
516
+ conv_reg.0.norm0.weight 512 0
517
+ conv_reg.0.norm0.bias 512 0
518
+ conv_reg.0.norm1.weight 512 0
519
+ conv_reg.0.norm1.bias 512 0
520
+ conv_reg.0.seq.0.weight 262144 0
521
+ conv_reg.0.seq.0.bias 512 0
522
+ conv_reg.0.seq.2.weight 262144 0
523
+ conv_reg.0.seq.2.bias 512 0
524
+ conv_reg.1.mha.in_proj_weight 786432 0
525
+ conv_reg.1.mha.in_proj_bias 1536 0
526
+ conv_reg.1.mha.out_proj.weight 262144 0
527
+ conv_reg.1.mha.out_proj.bias 512 0
528
+ conv_reg.1.norm0.weight 512 0
529
+ conv_reg.1.norm0.bias 512 0
530
+ conv_reg.1.norm1.weight 512 0
531
+ conv_reg.1.norm1.bias 512 0
532
+ conv_reg.1.seq.0.weight 262144 0
533
+ conv_reg.1.seq.0.bias 512 0
534
+ conv_reg.1.seq.2.weight 262144 0
535
+ conv_reg.1.seq.2.bias 512 0
536
+ conv_reg.2.mha.in_proj_weight 786432 0
537
+ conv_reg.2.mha.in_proj_bias 1536 0
538
+ conv_reg.2.mha.out_proj.weight 262144 0
539
+ conv_reg.2.mha.out_proj.bias 512 0
540
+ conv_reg.2.norm0.weight 512 0
541
+ conv_reg.2.norm0.bias 512 0
542
+ conv_reg.2.norm1.weight 512 0
543
+ conv_reg.2.norm1.bias 512 0
544
+ conv_reg.2.seq.0.weight 262144 0
545
+ conv_reg.2.seq.0.bias 512 0
546
+ conv_reg.2.seq.2.weight 262144 0
547
+ conv_reg.2.seq.2.bias 512 0
548
+ conv_reg.3.mha.in_proj_weight 786432 0
549
+ conv_reg.3.mha.in_proj_bias 1536 0
550
+ conv_reg.3.mha.out_proj.weight 262144 0
551
+ conv_reg.3.mha.out_proj.bias 512 0
552
+ conv_reg.3.norm0.weight 512 0
553
+ conv_reg.3.norm0.bias 512 0
554
+ conv_reg.3.norm1.weight 512 0
555
+ conv_reg.3.norm1.bias 512 0
556
+ conv_reg.3.seq.0.weight 262144 0
557
+ conv_reg.3.seq.0.bias 512 0
558
+ conv_reg.3.seq.2.weight 262144 0
559
+ conv_reg.3.seq.2.bias 512 0
560
+ conv_reg.4.mha.in_proj_weight 786432 0
561
+ conv_reg.4.mha.in_proj_bias 1536 0
562
+ conv_reg.4.mha.out_proj.weight 262144 0
563
+ conv_reg.4.mha.out_proj.bias 512 0
564
+ conv_reg.4.norm0.weight 512 0
565
+ conv_reg.4.norm0.bias 512 0
566
+ conv_reg.4.norm1.weight 512 0
567
+ conv_reg.4.norm1.bias 512 0
568
+ conv_reg.4.seq.0.weight 262144 0
569
+ conv_reg.4.seq.0.bias 512 0
570
+ conv_reg.4.seq.2.weight 262144 0
571
+ conv_reg.4.seq.2.bias 512 0
572
+ conv_reg.5.mha.in_proj_weight 786432 0
573
+ conv_reg.5.mha.in_proj_bias 1536 0
574
+ conv_reg.5.mha.out_proj.weight 262144 0
575
+ conv_reg.5.mha.out_proj.bias 512 0
576
+ conv_reg.5.norm0.weight 512 0
577
+ conv_reg.5.norm0.bias 512 0
578
+ conv_reg.5.norm1.weight 512 0
579
+ conv_reg.5.norm1.bias 512 0
580
+ conv_reg.5.seq.0.weight 262144 0
581
+ conv_reg.5.seq.0.bias 512 0
582
+ conv_reg.5.seq.2.weight 262144 0
583
+ conv_reg.5.seq.2.bias 512 0
584
+ nn_id.0.weight 290304 0
585
+ nn_id.0.bias 512 0
586
+ nn_id.2.weight 512 0
587
+ nn_id.2.bias 512 0
588
+ nn_id.4.weight 4608 0
589
+ nn_id.4.bias 9 0
590
+ nn_pt.nn.0.weight 294912 0
591
+ nn_pt.nn.0.bias 512 0
592
+ nn_pt.nn.2.weight 512 0
593
+ nn_pt.nn.2.bias 512 0
594
+ nn_pt.nn.4.weight 1024 0
595
+ nn_pt.nn.4.bias 2 0
596
+ nn_eta.nn.0.weight 294912 0
597
+ nn_eta.nn.0.bias 512 0
598
+ nn_eta.nn.2.weight 512 0
599
+ nn_eta.nn.2.bias 512 0
600
+ nn_eta.nn.4.weight 1024 0
601
+ nn_eta.nn.4.bias 2 0
602
+ nn_sin_phi.nn.0.weight 294912 0
603
+ nn_sin_phi.nn.0.bias 512 0
604
+ nn_sin_phi.nn.2.weight 512 0
605
+ nn_sin_phi.nn.2.bias 512 0
606
+ nn_sin_phi.nn.4.weight 1024 0
607
+ nn_sin_phi.nn.4.bias 2 0
608
+ nn_cos_phi.nn.0.weight 294912 0
609
+ nn_cos_phi.nn.0.bias 512 0
610
+ nn_cos_phi.nn.2.weight 512 0
611
+ nn_cos_phi.nn.2.bias 512 0
612
+ nn_cos_phi.nn.4.weight 1024 0
613
+ nn_cos_phi.nn.4.bias 2 0
614
+ nn_energy.nn.0.weight 294912 0
615
+ nn_energy.nn.0.bias 512 0
616
+ nn_energy.nn.2.weight 512 0
617
+ nn_energy.nn.2.bias 512 0
618
+ nn_energy.nn.4.weight 1024 0
619
+ nn_energy.nn.4.bias 2 0
620
+ [2024-03-24 23:57:43,685] INFO: Creating experiment dir experiments/pyg-cms_20240324_235743_208080
621
+ [2024-03-24 23:57:43,685] INFO: Creating experiment dir experiments/pyg-cms_20240324_235743_208080
622
+ [2024-03-24 23:57:43,690] INFO: Model directory experiments/pyg-cms_20240324_235743_208080
623
+ [2024-03-24 23:57:43,690] INFO: Model directory experiments/pyg-cms_20240324_235743_208080
624
+ [2024-03-24 23:57:49,345] INFO: train_dataset: cms_pf_ttbar, 320100
625
+ [2024-03-24 23:57:49,345] INFO: train_dataset: cms_pf_ttbar, 320100
626
+ [2024-03-24 23:57:49,622] INFO: valid_dataset: cms_pf_ttbar, 80040
627
+ [2024-03-24 23:57:49,622] INFO: valid_dataset: cms_pf_ttbar, 80040
628
+ [2024-03-24 23:57:49,736] INFO: Initiating epoch #1 train run on device rank=0
629
+ [2024-03-24 23:57:49,736] INFO: Initiating epoch #1 train run on device rank=0
630
+ [2024-03-25 04:22:50,148] INFO: Initiating epoch #1 valid run on device rank=0
631
+ [2024-03-25 04:22:50,148] INFO: Initiating epoch #1 valid run on device rank=0
632
+ [2024-03-25 04:42:43,853] INFO: Rank 0: epoch=1 / 100 train_loss=20.8840 valid_loss=19.4969 stale=0 time=284.9m eta=28205.3m
633
+ [2024-03-25 04:42:43,853] INFO: Rank 0: epoch=1 / 100 train_loss=20.8840 valid_loss=19.4969 stale=0 time=284.9m eta=28205.3m
634
+ [2024-03-25 04:42:43,865] INFO: Initiating epoch #2 train run on device rank=0
635
+ [2024-03-25 04:42:43,865] INFO: Initiating epoch #2 train run on device rank=0
636
+ [2024-03-25 09:08:30,154] INFO: Initiating epoch #2 valid run on device rank=0
637
+ [2024-03-25 09:08:30,154] INFO: Initiating epoch #2 valid run on device rank=0
638
+ [2024-03-25 09:28:30,625] INFO: Rank 0: epoch=2 / 100 train_loss=19.0650 valid_loss=18.8620 stale=0 time=285.78m eta=27963.4m
639
+ [2024-03-25 09:28:30,625] INFO: Rank 0: epoch=2 / 100 train_loss=19.0650 valid_loss=18.8620 stale=0 time=285.78m eta=27963.4m
640
+ [2024-03-25 09:28:30,647] INFO: Initiating epoch #3 train run on device rank=0
641
+ [2024-03-25 09:28:30,647] INFO: Initiating epoch #3 train run on device rank=0
642
+ [2024-03-25 13:55:12,130] INFO: Initiating epoch #3 valid run on device rank=0
643
+ [2024-03-25 13:55:12,130] INFO: Initiating epoch #3 valid run on device rank=0
644
+ [2024-03-25 14:15:15,806] INFO: Rank 0: epoch=3 / 100 train_loss=18.7688 valid_loss=18.6758 stale=0 time=286.75m eta=27723.7m
645
+ [2024-03-25 14:15:15,806] INFO: Rank 0: epoch=3 / 100 train_loss=18.7688 valid_loss=18.6758 stale=0 time=286.75m eta=27723.7m
646
+ [2024-03-25 14:15:15,821] INFO: Initiating epoch #4 train run on device rank=0
647
+ [2024-03-25 14:15:15,821] INFO: Initiating epoch #4 train run on device rank=0
648
+ [2024-03-25 18:42:35,229] INFO: Initiating epoch #4 valid run on device rank=0
649
+ [2024-03-25 18:42:35,229] INFO: Initiating epoch #4 valid run on device rank=0
650
+ [2024-03-25 19:02:40,697] INFO: Rank 0: epoch=4 / 100 train_loss=18.6170 valid_loss=18.5653 stale=0 time=287.41m eta=27476.4m
651
+ [2024-03-25 19:02:40,697] INFO: Rank 0: epoch=4 / 100 train_loss=18.6170 valid_loss=18.5653 stale=0 time=287.41m eta=27476.4m
652
+ [2024-03-25 19:02:40,717] INFO: Initiating epoch #5 train run on device rank=0
653
+ [2024-03-25 19:02:40,717] INFO: Initiating epoch #5 train run on device rank=0
654
+ [2024-03-25 23:29:55,640] INFO: Initiating epoch #5 valid run on device rank=0
655
+ [2024-03-25 23:29:55,640] INFO: Initiating epoch #5 valid run on device rank=0
656
+ [2024-03-25 23:50:00,453] INFO: Rank 0: epoch=5 / 100 train_loss=18.5102 valid_loss=18.4685 stale=0 time=287.33m eta=27211.4m
657
+ [2024-03-25 23:50:00,453] INFO: Rank 0: epoch=5 / 100 train_loss=18.5102 valid_loss=18.4685 stale=0 time=287.33m eta=27211.4m
658
+ [2024-03-25 23:50:00,467] INFO: Initiating epoch #6 train run on device rank=0
659
+ [2024-03-25 23:50:00,467] INFO: Initiating epoch #6 train run on device rank=0
660
+ [2024-03-26 04:16:46,611] INFO: Initiating epoch #6 valid run on device rank=0
661
+ [2024-03-26 04:16:46,611] INFO: Initiating epoch #6 valid run on device rank=0
662
+ [2024-03-26 04:36:44,551] INFO: Rank 0: epoch=6 / 100 train_loss=18.4325 valid_loss=18.4090 stale=0 time=286.73m eta=26929.6m
663
+ [2024-03-26 04:36:44,551] INFO: Rank 0: epoch=6 / 100 train_loss=18.4325 valid_loss=18.4090 stale=0 time=286.73m eta=26929.6m
664
+ [2024-03-26 04:36:44,567] INFO: Initiating epoch #7 train run on device rank=0
665
+ [2024-03-26 04:36:44,567] INFO: Initiating epoch #7 train run on device rank=0
666
+ [2024-03-26 09:01:18,997] INFO: Initiating epoch #7 valid run on device rank=0
667
+ [2024-03-26 09:01:18,997] INFO: Initiating epoch #7 valid run on device rank=0
668
+ [2024-03-26 09:21:17,339] INFO: Rank 0: epoch=7 / 100 train_loss=18.3752 valid_loss=18.3620 stale=0 time=284.55m eta=26617.4m
669
+ [2024-03-26 09:21:17,339] INFO: Rank 0: epoch=7 / 100 train_loss=18.3752 valid_loss=18.3620 stale=0 time=284.55m eta=26617.4m
670
+ [2024-03-26 09:21:17,356] INFO: Initiating epoch #8 train run on device rank=0
671
+ [2024-03-26 09:21:17,356] INFO: Initiating epoch #8 train run on device rank=0
672
+ [2024-03-26 13:46:38,478] INFO: Initiating epoch #8 valid run on device rank=0
673
+ [2024-03-26 13:46:38,478] INFO: Initiating epoch #8 valid run on device rank=0
674
+ [2024-03-26 14:06:32,269] INFO: Rank 0: epoch=8 / 100 train_loss=18.3286 valid_loss=18.3267 stale=0 time=285.25m eta=26320.2m
675
+ [2024-03-26 14:06:32,269] INFO: Rank 0: epoch=8 / 100 train_loss=18.3286 valid_loss=18.3267 stale=0 time=285.25m eta=26320.2m
676
+ [2024-03-26 14:06:32,282] INFO: Initiating epoch #9 train run on device rank=0
677
+ [2024-03-26 14:06:32,282] INFO: Initiating epoch #9 train run on device rank=0
678
+ [2024-03-26 18:32:18,831] INFO: Initiating epoch #9 valid run on device rank=0
679
+ [2024-03-26 18:32:18,831] INFO: Initiating epoch #9 valid run on device rank=0
680
+ [2024-03-26 18:52:20,399] INFO: Rank 0: epoch=9 / 100 train_loss=18.2890 valid_loss=18.2883 stale=0 time=285.8m eta=26031.2m
681
+ [2024-03-26 18:52:20,399] INFO: Rank 0: epoch=9 / 100 train_loss=18.2890 valid_loss=18.2883 stale=0 time=285.8m eta=26031.2m
682
+ [2024-03-26 18:52:20,416] INFO: Initiating epoch #10 train run on device rank=0
683
+ [2024-03-26 18:52:20,416] INFO: Initiating epoch #10 train run on device rank=0
684
+ [2024-03-26 23:18:19,010] INFO: Initiating epoch #10 valid run on device rank=0
685
+ [2024-03-26 23:18:19,010] INFO: Initiating epoch #10 valid run on device rank=0
686
+ [2024-03-26 23:38:18,380] INFO: Rank 0: epoch=10 / 100 train_loss=18.2532 valid_loss=18.2458 stale=0 time=285.97m eta=25744.3m
687
+ [2024-03-26 23:38:18,380] INFO: Rank 0: epoch=10 / 100 train_loss=18.2532 valid_loss=18.2458 stale=0 time=285.97m eta=25744.3m
688
+ [2024-03-26 23:38:18,395] INFO: Initiating epoch #11 train run on device rank=0
689
+ [2024-03-26 23:38:18,395] INFO: Initiating epoch #11 train run on device rank=0
690
+ [2024-03-27 04:04:16,483] INFO: Initiating epoch #11 valid run on device rank=0
691
+ [2024-03-27 04:04:16,483] INFO: Initiating epoch #11 valid run on device rank=0
692
+ [2024-03-27 04:24:14,645] INFO: Rank 0: epoch=11 / 100 train_loss=18.2191 valid_loss=18.2212 stale=0 time=285.94m eta=25457.4m
693
+ [2024-03-27 04:24:14,645] INFO: Rank 0: epoch=11 / 100 train_loss=18.2191 valid_loss=18.2212 stale=0 time=285.94m eta=25457.4m
694
+ [2024-03-27 04:24:14,659] INFO: Initiating epoch #12 train run on device rank=0
695
+ [2024-03-27 04:24:14,659] INFO: Initiating epoch #12 train run on device rank=0
696
+ [2024-03-27 08:50:12,730] INFO: Initiating epoch #12 valid run on device rank=0
697
+ [2024-03-27 08:50:12,730] INFO: Initiating epoch #12 valid run on device rank=0
698
+ [2024-03-27 09:10:07,240] INFO: Rank 0: epoch=12 / 100 train_loss=18.1875 valid_loss=18.2005 stale=0 time=285.88m eta=25170.1m
699
+ [2024-03-27 09:10:07,240] INFO: Rank 0: epoch=12 / 100 train_loss=18.1875 valid_loss=18.2005 stale=0 time=285.88m eta=25170.1m
700
+ [2024-03-27 09:10:07,255] INFO: Initiating epoch #13 train run on device rank=0
701
+ [2024-03-27 09:10:07,255] INFO: Initiating epoch #13 train run on device rank=0
702
+ [2024-03-27 13:36:41,534] INFO: Initiating epoch #13 valid run on device rank=0
703
+ [2024-03-27 13:36:41,534] INFO: Initiating epoch #13 valid run on device rank=0
704
+ [2024-03-27 13:56:51,429] INFO: Rank 0: epoch=13 / 100 train_loss=18.1578 valid_loss=18.1808 stale=0 time=286.74m eta=24888.9m
705
+ [2024-03-27 13:56:51,429] INFO: Rank 0: epoch=13 / 100 train_loss=18.1578 valid_loss=18.1808 stale=0 time=286.74m eta=24888.9m
706
+ [2024-03-27 13:56:51,446] INFO: Initiating epoch #14 train run on device rank=0
707
+ [2024-03-27 13:56:51,446] INFO: Initiating epoch #14 train run on device rank=0
708
+ [2024-03-27 18:24:24,174] INFO: Initiating epoch #14 valid run on device rank=0
709
+ [2024-03-27 18:24:24,174] INFO: Initiating epoch #14 valid run on device rank=0
710
+ [2024-03-27 18:44:20,853] INFO: Rank 0: epoch=14 / 100 train_loss=18.1282 valid_loss=18.1575 stale=0 time=287.49m eta=24611.5m
711
+ [2024-03-27 18:44:20,853] INFO: Rank 0: epoch=14 / 100 train_loss=18.1282 valid_loss=18.1575 stale=0 time=287.49m eta=24611.5m
712
+ [2024-03-27 18:44:20,870] INFO: Initiating epoch #15 train run on device rank=0
713
+ [2024-03-27 18:44:20,870] INFO: Initiating epoch #15 train run on device rank=0
714
+ [2024-03-27 23:12:11,710] INFO: Initiating epoch #15 valid run on device rank=0
715
+ [2024-03-27 23:12:11,710] INFO: Initiating epoch #15 valid run on device rank=0
716
+ [2024-03-27 23:32:20,988] INFO: Rank 0: epoch=15 / 100 train_loss=18.0996 valid_loss=18.1267 stale=0 time=288.0m eta=24335.6m
717
+ [2024-03-27 23:32:20,988] INFO: Rank 0: epoch=15 / 100 train_loss=18.0996 valid_loss=18.1267 stale=0 time=288.0m eta=24335.6m
718
+ [2024-03-27 23:32:21,002] INFO: Initiating epoch #16 train run on device rank=0
719
+ [2024-03-27 23:32:21,002] INFO: Initiating epoch #16 train run on device rank=0
720
+ [2024-03-28 03:58:27,660] INFO: Initiating epoch #16 valid run on device rank=0
721
+ [2024-03-28 03:58:27,660] INFO: Initiating epoch #16 valid run on device rank=0
722
+ [2024-03-28 04:18:21,127] INFO: Rank 0: epoch=16 / 100 train_loss=18.0729 valid_loss=18.1030 stale=0 time=286.0m eta=24047.7m
723
+ [2024-03-28 04:18:21,127] INFO: Rank 0: epoch=16 / 100 train_loss=18.0729 valid_loss=18.1030 stale=0 time=286.0m eta=24047.7m
724
+ [2024-03-28 04:18:21,141] INFO: Initiating epoch #17 train run on device rank=0
725
+ [2024-03-28 04:18:21,141] INFO: Initiating epoch #17 train run on device rank=0
726
+ [2024-03-28 08:44:11,691] INFO: Initiating epoch #17 valid run on device rank=0
727
+ [2024-03-28 08:44:11,691] INFO: Initiating epoch #17 valid run on device rank=0
728
+ [2024-03-28 09:04:09,550] INFO: Rank 0: epoch=17 / 100 train_loss=18.0478 valid_loss=18.0912 stale=0 time=285.81m eta=23759.1m
729
+ [2024-03-28 09:04:09,550] INFO: Rank 0: epoch=17 / 100 train_loss=18.0478 valid_loss=18.0912 stale=0 time=285.81m eta=23759.1m
730
+ [2024-03-28 09:04:09,566] INFO: Initiating epoch #18 train run on device rank=0
731
+ [2024-03-28 09:04:09,566] INFO: Initiating epoch #18 train run on device rank=0
732
+ [2024-03-28 13:29:43,054] INFO: Initiating epoch #18 valid run on device rank=0
733
+ [2024-03-28 13:29:43,054] INFO: Initiating epoch #18 valid run on device rank=0
734
+ [2024-03-28 13:49:42,746] INFO: Rank 0: epoch=18 / 100 train_loss=18.0235 valid_loss=18.0697 stale=0 time=285.55m eta=23469.7m
735
+ [2024-03-28 13:49:42,746] INFO: Rank 0: epoch=18 / 100 train_loss=18.0235 valid_loss=18.0697 stale=0 time=285.55m eta=23469.7m
736
+ [2024-03-28 13:49:42,760] INFO: Initiating epoch #19 train run on device rank=0
737
+ [2024-03-28 13:49:42,760] INFO: Initiating epoch #19 train run on device rank=0
738
+ [2024-03-28 18:15:26,865] INFO: Initiating epoch #19 valid run on device rank=0
739
+ [2024-03-28 18:15:26,865] INFO: Initiating epoch #19 valid run on device rank=0
740
+ [2024-03-28 18:35:24,119] INFO: Rank 0: epoch=19 / 100 train_loss=18.0008 valid_loss=18.0532 stale=0 time=285.69m eta=23181.2m
741
+ [2024-03-28 18:35:24,119] INFO: Rank 0: epoch=19 / 100 train_loss=18.0008 valid_loss=18.0532 stale=0 time=285.69m eta=23181.2m
742
+ [2024-03-28 18:35:24,137] INFO: Initiating epoch #20 train run on device rank=0
743
+ [2024-03-28 18:35:24,137] INFO: Initiating epoch #20 train run on device rank=0
744
+ [2024-03-28 23:00:36,705] INFO: Initiating epoch #20 valid run on device rank=0
745
+ [2024-03-28 23:00:36,705] INFO: Initiating epoch #20 valid run on device rank=0
746
+ [2024-03-28 23:20:35,806] INFO: Rank 0: epoch=20 / 100 train_loss=17.9783 valid_loss=18.0347 stale=0 time=285.19m eta=22891.1m
747
+ [2024-03-28 23:20:35,806] INFO: Rank 0: epoch=20 / 100 train_loss=17.9783 valid_loss=18.0347 stale=0 time=285.19m eta=22891.1m
748
+ [2024-03-28 23:20:35,825] INFO: Initiating epoch #21 train run on device rank=0
749
+ [2024-03-28 23:20:35,825] INFO: Initiating epoch #21 train run on device rank=0
750
+ [2024-03-29 03:46:25,188] INFO: Initiating epoch #21 valid run on device rank=0
751
+ [2024-03-29 03:46:25,188] INFO: Initiating epoch #21 valid run on device rank=0
752
+ [2024-03-29 04:06:24,286] INFO: Rank 0: epoch=21 / 100 train_loss=17.9565 valid_loss=18.0197 stale=0 time=285.81m eta=22603.7m
753
+ [2024-03-29 04:06:24,286] INFO: Rank 0: epoch=21 / 100 train_loss=17.9565 valid_loss=18.0197 stale=0 time=285.81m eta=22603.7m
754
+ [2024-03-29 04:06:24,301] INFO: Initiating epoch #22 train run on device rank=0
755
+ [2024-03-29 04:06:24,301] INFO: Initiating epoch #22 train run on device rank=0
756
+ [2024-03-29 08:31:11,736] INFO: Initiating epoch #22 valid run on device rank=0
757
+ [2024-03-29 08:31:11,736] INFO: Initiating epoch #22 valid run on device rank=0
758
+ [2024-03-29 08:51:08,443] INFO: Rank 0: epoch=22 / 100 train_loss=17.9356 valid_loss=18.0009 stale=0 time=284.74m eta=22312.7m
759
+ [2024-03-29 08:51:08,443] INFO: Rank 0: epoch=22 / 100 train_loss=17.9356 valid_loss=18.0009 stale=0 time=284.74m eta=22312.7m
760
+ [2024-03-29 08:51:08,458] INFO: Initiating epoch #23 train run on device rank=0
761
+ [2024-03-29 08:51:08,458] INFO: Initiating epoch #23 train run on device rank=0
762
+ [2024-03-29 13:16:30,304] INFO: Initiating epoch #23 valid run on device rank=0
763
+ [2024-03-29 13:16:30,304] INFO: Initiating epoch #23 valid run on device rank=0
764
+ [2024-03-29 13:36:29,623] INFO: Rank 0: epoch=23 / 100 train_loss=17.9150 valid_loss=17.9919 stale=0 time=285.35m eta=22024.2m
765
+ [2024-03-29 13:36:29,623] INFO: Rank 0: epoch=23 / 100 train_loss=17.9150 valid_loss=17.9919 stale=0 time=285.35m eta=22024.2m
766
+ [2024-03-29 13:36:29,637] INFO: Initiating epoch #24 train run on device rank=0
767
+ [2024-03-29 13:36:29,637] INFO: Initiating epoch #24 train run on device rank=0
768
+ [2024-03-29 18:01:59,324] INFO: Initiating epoch #24 valid run on device rank=0
769
+ [2024-03-29 18:01:59,324] INFO: Initiating epoch #24 valid run on device rank=0
770
+ [2024-03-29 18:21:58,557] INFO: Rank 0: epoch=24 / 100 train_loss=17.8948 valid_loss=17.9806 stale=0 time=285.48m eta=21736.5m
771
+ [2024-03-29 18:21:58,557] INFO: Rank 0: epoch=24 / 100 train_loss=17.8948 valid_loss=17.9806 stale=0 time=285.48m eta=21736.5m
772
+ [2024-03-29 18:21:58,573] INFO: Initiating epoch #25 train run on device rank=0
773
+ [2024-03-29 18:21:58,573] INFO: Initiating epoch #25 train run on device rank=0
774
+ [2024-03-29 22:47:04,103] INFO: Initiating epoch #25 valid run on device rank=0
775
+ [2024-03-29 22:47:04,103] INFO: Initiating epoch #25 valid run on device rank=0
776
+ [2024-03-29 23:06:58,509] INFO: Rank 0: epoch=25 / 100 train_loss=17.8745 valid_loss=17.9677 stale=0 time=285.0m eta=21447.4m
777
+ [2024-03-29 23:06:58,509] INFO: Rank 0: epoch=25 / 100 train_loss=17.8745 valid_loss=17.9677 stale=0 time=285.0m eta=21447.4m
778
+ [2024-03-29 23:06:58,528] INFO: Initiating epoch #26 train run on device rank=0
779
+ [2024-03-29 23:06:58,528] INFO: Initiating epoch #26 train run on device rank=0
780
+ [2024-03-30 03:32:08,173] INFO: Initiating epoch #26 valid run on device rank=0
781
+ [2024-03-30 03:32:08,173] INFO: Initiating epoch #26 valid run on device rank=0
782
+ [2024-03-30 03:52:08,592] INFO: Rank 0: epoch=26 / 100 train_loss=17.8549 valid_loss=17.9569 stale=0 time=285.17m eta=21159.2m
783
+ [2024-03-30 03:52:08,592] INFO: Rank 0: epoch=26 / 100 train_loss=17.8549 valid_loss=17.9569 stale=0 time=285.17m eta=21159.2m
784
+ [2024-03-30 03:52:08,608] INFO: Initiating epoch #27 train run on device rank=0
785
+ [2024-03-30 03:52:08,608] INFO: Initiating epoch #27 train run on device rank=0
786
+ [2024-03-30 08:17:59,526] INFO: Initiating epoch #27 valid run on device rank=0
787
+ [2024-03-30 08:17:59,526] INFO: Initiating epoch #27 valid run on device rank=0
788
+ [2024-03-30 08:38:00,700] INFO: Rank 0: epoch=27 / 100 train_loss=17.8347 valid_loss=17.9366 stale=0 time=285.87m eta=20873.1m
789
+ [2024-03-30 08:38:00,700] INFO: Rank 0: epoch=27 / 100 train_loss=17.8347 valid_loss=17.9366 stale=0 time=285.87m eta=20873.1m
790
+ [2024-03-30 08:38:00,714] INFO: Initiating epoch #28 train run on device rank=0
791
+ [2024-03-30 08:38:00,714] INFO: Initiating epoch #28 train run on device rank=0
792
+ [2024-03-30 13:03:33,057] INFO: Initiating epoch #28 valid run on device rank=0
793
+ [2024-03-30 13:03:33,057] INFO: Initiating epoch #28 valid run on device rank=0
794
+ [2024-03-30 13:23:29,411] INFO: Rank 0: epoch=28 / 100 train_loss=17.8149 valid_loss=17.9264 stale=0 time=285.48m eta=20586.0m
795
+ [2024-03-30 13:23:29,411] INFO: Rank 0: epoch=28 / 100 train_loss=17.8149 valid_loss=17.9264 stale=0 time=285.48m eta=20586.0m
796
+ [2024-03-30 13:23:29,424] INFO: Initiating epoch #29 train run on device rank=0
797
+ [2024-03-30 13:23:29,424] INFO: Initiating epoch #29 train run on device rank=0
798
+ [2024-03-30 17:49:27,995] INFO: Initiating epoch #29 valid run on device rank=0
799
+ [2024-03-30 17:49:27,995] INFO: Initiating epoch #29 valid run on device rank=0
800
+ [2024-03-30 18:09:26,968] INFO: Rank 0: epoch=29 / 100 train_loss=17.7950 valid_loss=17.9112 stale=0 time=285.96m eta=20300.2m
801
+ [2024-03-30 18:09:26,968] INFO: Rank 0: epoch=29 / 100 train_loss=17.7950 valid_loss=17.9112 stale=0 time=285.96m eta=20300.2m
802
+ [2024-03-30 18:09:26,982] INFO: Initiating epoch #30 train run on device rank=0
803
+ [2024-03-30 18:09:26,982] INFO: Initiating epoch #30 train run on device rank=0
804
+ [2024-03-30 22:34:51,876] INFO: Initiating epoch #30 valid run on device rank=0
805
+ [2024-03-30 22:34:51,876] INFO: Initiating epoch #30 valid run on device rank=0
806
+ [2024-03-30 22:54:53,145] INFO: Rank 0: epoch=30 / 100 train_loss=17.7757 valid_loss=17.9006 stale=0 time=285.44m eta=20013.1m
807
+ [2024-03-30 22:54:53,145] INFO: Rank 0: epoch=30 / 100 train_loss=17.7757 valid_loss=17.9006 stale=0 time=285.44m eta=20013.1m
808
+ [2024-03-30 22:54:53,159] INFO: Initiating epoch #31 train run on device rank=0
809
+ [2024-03-30 22:54:53,159] INFO: Initiating epoch #31 train run on device rank=0
810
+ [2024-03-31 04:20:44,340] INFO: Initiating epoch #31 valid run on device rank=0
811
+ [2024-03-31 04:20:44,340] INFO: Initiating epoch #31 valid run on device rank=0
812
+ [2024-03-31 04:40:42,326] INFO: Rank 0: epoch=31 / 100 train_loss=17.7565 valid_loss=17.8932 stale=0 time=285.82m eta=19727.0m
813
+ [2024-03-31 04:40:42,326] INFO: Rank 0: epoch=31 / 100 train_loss=17.7565 valid_loss=17.8932 stale=0 time=285.82m eta=19727.0m
814
+ [2024-03-31 04:40:42,350] INFO: Initiating epoch #32 train run on device rank=0
815
+ [2024-03-31 04:40:42,350] INFO: Initiating epoch #32 train run on device rank=0
816
+ [2024-03-31 09:06:05,665] INFO: Initiating epoch #32 valid run on device rank=0
817
+ [2024-03-31 09:06:05,665] INFO: Initiating epoch #32 valid run on device rank=0
818
+ [2024-03-31 09:26:03,353] INFO: Rank 0: epoch=32 / 100 train_loss=17.7375 valid_loss=17.8774 stale=0 time=285.35m eta=19440.0m
819
+ [2024-03-31 09:26:03,353] INFO: Rank 0: epoch=32 / 100 train_loss=17.7375 valid_loss=17.8774 stale=0 time=285.35m eta=19440.0m
820
+ [2024-03-31 09:26:03,368] INFO: Initiating epoch #33 train run on device rank=0
821
+ [2024-03-31 09:26:03,368] INFO: Initiating epoch #33 train run on device rank=0