HYPJUDY commited on
Commit
a8e49b9
1 Parent(s): 1fbe376
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "bos_token_id": 0,
4
+ "classifier_dropout": null,
5
+ "coordinate_size": 128,
6
+ "device": "cuda",
7
+ "eos_token_id": 2,
8
+ "has_relative_attention_bias": true,
9
+ "has_spatial_attention_bias": true,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "input_size": 224,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_2d_position_embeddings": 1024,
18
+ "max_position_embeddings": 514,
19
+ "max_rel_2d_pos": 256,
20
+ "max_rel_pos": 128,
21
+ "model_type": "layoutlmv3",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 12,
24
+ "pad_token_id": 1,
25
+ "rel_2d_pos_bins": 64,
26
+ "rel_pos_bins": 32,
27
+ "second_input_size": 112,
28
+ "shape_size": 128,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.12.5",
31
+ "type_vocab_size": 1,
32
+ "use_cache": true,
33
+ "visual_embed": true,
34
+ "vocab_size": 50265
35
+ }
config.yaml ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AUG:
2
+ DETR: true
3
+ CACHE_DIR: /mnt/localdata/users/yupanhuang/cache/huggingface
4
+ CUDNN_BENCHMARK: false
5
+ DATALOADER:
6
+ ASPECT_RATIO_GROUPING: true
7
+ FILTER_EMPTY_ANNOTATIONS: false
8
+ NUM_WORKERS: 4
9
+ REPEAT_THRESHOLD: 0.0
10
+ SAMPLER_TRAIN: TrainingSampler
11
+ DATASETS:
12
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
13
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
14
+ PROPOSAL_FILES_TEST: []
15
+ PROPOSAL_FILES_TRAIN: []
16
+ TEST:
17
+ - publaynet_val
18
+ TRAIN:
19
+ - publaynet_train
20
+ GLOBAL:
21
+ HACK: 1.0
22
+ ICDAR_DATA_DIR_TEST: ''
23
+ ICDAR_DATA_DIR_TRAIN: ''
24
+ INPUT:
25
+ CROP:
26
+ ENABLED: true
27
+ SIZE:
28
+ - 384
29
+ - 600
30
+ TYPE: absolute_range
31
+ FORMAT: RGB
32
+ MASK_FORMAT: polygon
33
+ MAX_SIZE_TEST: 1333
34
+ MAX_SIZE_TRAIN: 1333
35
+ MIN_SIZE_TEST: 800
36
+ MIN_SIZE_TRAIN:
37
+ - 480
38
+ - 512
39
+ - 544
40
+ - 576
41
+ - 608
42
+ - 640
43
+ - 672
44
+ - 704
45
+ - 736
46
+ - 768
47
+ - 800
48
+ MIN_SIZE_TRAIN_SAMPLING: choice
49
+ RANDOM_FLIP: horizontal
50
+ MODEL:
51
+ ANCHOR_GENERATOR:
52
+ ANGLES:
53
+ - - -90
54
+ - 0
55
+ - 90
56
+ ASPECT_RATIOS:
57
+ - - 0.5
58
+ - 1.0
59
+ - 2.0
60
+ NAME: DefaultAnchorGenerator
61
+ OFFSET: 0.0
62
+ SIZES:
63
+ - - 32
64
+ - - 64
65
+ - - 128
66
+ - - 256
67
+ - - 512
68
+ BACKBONE:
69
+ FREEZE_AT: 2
70
+ NAME: build_vit_fpn_backbone
71
+ CONFIG_PATH: ''
72
+ DEVICE: cuda
73
+ FPN:
74
+ FUSE_TYPE: sum
75
+ IN_FEATURES:
76
+ - layer3
77
+ - layer5
78
+ - layer7
79
+ - layer11
80
+ NORM: ''
81
+ OUT_CHANNELS: 256
82
+ IMAGE_ONLY: true
83
+ KEYPOINT_ON: false
84
+ LOAD_PROPOSALS: false
85
+ MASK_ON: true
86
+ MAX_LENGTH: 510
87
+ META_ARCHITECTURE: VLGeneralizedRCNN
88
+ PANOPTIC_FPN:
89
+ COMBINE:
90
+ ENABLED: true
91
+ INSTANCES_CONFIDENCE_THRESH: 0.5
92
+ OVERLAP_THRESH: 0.5
93
+ STUFF_AREA_LIMIT: 4096
94
+ INSTANCE_LOSS_WEIGHT: 1.0
95
+ PIXEL_MEAN:
96
+ - 127.5
97
+ - 127.5
98
+ - 127.5
99
+ PIXEL_STD:
100
+ - 127.5
101
+ - 127.5
102
+ - 127.5
103
+ PROPOSAL_GENERATOR:
104
+ MIN_SIZE: 0
105
+ NAME: RPN
106
+ RESNETS:
107
+ DEFORM_MODULATED: false
108
+ DEFORM_NUM_GROUPS: 1
109
+ DEFORM_ON_PER_STAGE:
110
+ - false
111
+ - false
112
+ - false
113
+ - false
114
+ DEPTH: 50
115
+ NORM: FrozenBN
116
+ NUM_GROUPS: 1
117
+ OUT_FEATURES:
118
+ - res4
119
+ RES2_OUT_CHANNELS: 256
120
+ RES5_DILATION: 1
121
+ STEM_OUT_CHANNELS: 64
122
+ STRIDE_IN_1X1: true
123
+ WIDTH_PER_GROUP: 64
124
+ RETINANET:
125
+ BBOX_REG_LOSS_TYPE: smooth_l1
126
+ BBOX_REG_WEIGHTS: &id001
127
+ - 1.0
128
+ - 1.0
129
+ - 1.0
130
+ - 1.0
131
+ FOCAL_LOSS_ALPHA: 0.25
132
+ FOCAL_LOSS_GAMMA: 2.0
133
+ IN_FEATURES:
134
+ - p3
135
+ - p4
136
+ - p5
137
+ - p6
138
+ - p7
139
+ IOU_LABELS:
140
+ - 0
141
+ - -1
142
+ - 1
143
+ IOU_THRESHOLDS:
144
+ - 0.4
145
+ - 0.5
146
+ NMS_THRESH_TEST: 0.5
147
+ NORM: ''
148
+ NUM_CLASSES: 80
149
+ NUM_CONVS: 4
150
+ PRIOR_PROB: 0.01
151
+ SCORE_THRESH_TEST: 0.05
152
+ SMOOTH_L1_LOSS_BETA: 0.1
153
+ TOPK_CANDIDATES_TEST: 1000
154
+ ROI_BOX_CASCADE_HEAD:
155
+ BBOX_REG_WEIGHTS:
156
+ - - 10.0
157
+ - 10.0
158
+ - 5.0
159
+ - 5.0
160
+ - - 20.0
161
+ - 20.0
162
+ - 10.0
163
+ - 10.0
164
+ - - 30.0
165
+ - 30.0
166
+ - 15.0
167
+ - 15.0
168
+ IOUS:
169
+ - 0.5
170
+ - 0.6
171
+ - 0.7
172
+ ROI_BOX_HEAD:
173
+ BBOX_REG_LOSS_TYPE: smooth_l1
174
+ BBOX_REG_LOSS_WEIGHT: 1.0
175
+ BBOX_REG_WEIGHTS:
176
+ - 10.0
177
+ - 10.0
178
+ - 5.0
179
+ - 5.0
180
+ CLS_AGNOSTIC_BBOX_REG: true
181
+ CONV_DIM: 256
182
+ FC_DIM: 1024
183
+ NAME: FastRCNNConvFCHead
184
+ NORM: ''
185
+ NUM_CONV: 0
186
+ NUM_FC: 2
187
+ POOLER_RESOLUTION: 7
188
+ POOLER_SAMPLING_RATIO: 0
189
+ POOLER_TYPE: ROIAlignV2
190
+ SMOOTH_L1_BETA: 0.0
191
+ TRAIN_ON_PRED_BOXES: false
192
+ ROI_HEADS:
193
+ BATCH_SIZE_PER_IMAGE: 512
194
+ IN_FEATURES:
195
+ - p2
196
+ - p3
197
+ - p4
198
+ - p5
199
+ IOU_LABELS:
200
+ - 0
201
+ - 1
202
+ IOU_THRESHOLDS:
203
+ - 0.5
204
+ NAME: CascadeROIHeads
205
+ NMS_THRESH_TEST: 0.5
206
+ NUM_CLASSES: 5
207
+ POSITIVE_FRACTION: 0.25
208
+ PROPOSAL_APPEND_GT: true
209
+ SCORE_THRESH_TEST: 0.05
210
+ ROI_KEYPOINT_HEAD:
211
+ CONV_DIMS:
212
+ - 512
213
+ - 512
214
+ - 512
215
+ - 512
216
+ - 512
217
+ - 512
218
+ - 512
219
+ - 512
220
+ LOSS_WEIGHT: 1.0
221
+ MIN_KEYPOINTS_PER_IMAGE: 1
222
+ NAME: KRCNNConvDeconvUpsampleHead
223
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
224
+ NUM_KEYPOINTS: 17
225
+ POOLER_RESOLUTION: 14
226
+ POOLER_SAMPLING_RATIO: 0
227
+ POOLER_TYPE: ROIAlignV2
228
+ ROI_MASK_HEAD:
229
+ CLS_AGNOSTIC_MASK: false
230
+ CONV_DIM: 256
231
+ NAME: MaskRCNNConvUpsampleHead
232
+ NORM: ''
233
+ NUM_CONV: 4
234
+ POOLER_RESOLUTION: 14
235
+ POOLER_SAMPLING_RATIO: 0
236
+ POOLER_TYPE: ROIAlignV2
237
+ RPN:
238
+ BATCH_SIZE_PER_IMAGE: 256
239
+ BBOX_REG_LOSS_TYPE: smooth_l1
240
+ BBOX_REG_LOSS_WEIGHT: 1.0
241
+ BBOX_REG_WEIGHTS: *id001
242
+ BOUNDARY_THRESH: -1
243
+ CONV_DIMS:
244
+ - -1
245
+ HEAD_NAME: StandardRPNHead
246
+ IN_FEATURES:
247
+ - p2
248
+ - p3
249
+ - p4
250
+ - p5
251
+ - p6
252
+ IOU_LABELS:
253
+ - 0
254
+ - -1
255
+ - 1
256
+ IOU_THRESHOLDS:
257
+ - 0.3
258
+ - 0.7
259
+ LOSS_WEIGHT: 1.0
260
+ NMS_THRESH: 0.7
261
+ POSITIVE_FRACTION: 0.5
262
+ POST_NMS_TOPK_TEST: 1000
263
+ POST_NMS_TOPK_TRAIN: 2000
264
+ PRE_NMS_TOPK_TEST: 1000
265
+ PRE_NMS_TOPK_TRAIN: 2000
266
+ SMOOTH_L1_BETA: 0.0
267
+ SEM_SEG_HEAD:
268
+ COMMON_STRIDE: 4
269
+ CONVS_DIM: 128
270
+ IGNORE_VALUE: 255
271
+ IN_FEATURES:
272
+ - p2
273
+ - p3
274
+ - p4
275
+ - p5
276
+ LOSS_WEIGHT: 1.0
277
+ NAME: SemSegFPNHead
278
+ NORM: GN
279
+ NUM_CLASSES: 54
280
+ VIT:
281
+ DROP_PATH: 0.1
282
+ IMG_SIZE:
283
+ - 224
284
+ - 224
285
+ MODEL_KWARGS: '{}'
286
+ NAME: layoutlmv3_base
287
+ OUT_FEATURES:
288
+ - layer3
289
+ - layer5
290
+ - layer7
291
+ - layer11
292
+ POS_TYPE: abs
293
+ WEIGHTS: /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/model_final.pth
294
+ OUTPUT_DIR: /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/
295
+ PUBLAYNET_DATA_DIR_TEST: /mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/val
296
+ PUBLAYNET_DATA_DIR_TRAIN: /mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/train
297
+ SEED: 42
298
+ SOLVER:
299
+ AMP:
300
+ ENABLED: true
301
+ BACKBONE_MULTIPLIER: 1.0
302
+ BASE_LR: 0.0002
303
+ BIAS_LR_FACTOR: 1.0
304
+ CHECKPOINT_PERIOD: 2000
305
+ CLIP_GRADIENTS:
306
+ CLIP_TYPE: full_model
307
+ CLIP_VALUE: 1.0
308
+ ENABLED: true
309
+ NORM_TYPE: 2.0
310
+ GAMMA: 0.1
311
+ GRADIENT_ACCUMULATION_STEPS: 1
312
+ IMS_PER_BATCH: 32
313
+ LR_SCHEDULER_NAME: WarmupCosineLR
314
+ MAX_ITER: 60000
315
+ MOMENTUM: 0.9
316
+ NESTEROV: false
317
+ OPTIMIZER: ADAMW
318
+ REFERENCE_WORLD_SIZE: 0
319
+ STEPS:
320
+ - 30000
321
+ WARMUP_FACTOR: 0.01
322
+ WARMUP_ITERS: 1000
323
+ WARMUP_METHOD: linear
324
+ WEIGHT_DECAY: 0.05
325
+ WEIGHT_DECAY_BIAS: null
326
+ WEIGHT_DECAY_NORM: 0.0
327
+ TEST:
328
+ AUG:
329
+ ENABLED: false
330
+ FLIP: true
331
+ MAX_SIZE: 4000
332
+ MIN_SIZES:
333
+ - 400
334
+ - 500
335
+ - 600
336
+ - 700
337
+ - 800
338
+ - 900
339
+ - 1000
340
+ - 1100
341
+ - 1200
342
+ DETECTIONS_PER_IMAGE: 100
343
+ EVAL_PERIOD: 2000
344
+ EXPECTED_RESULTS: []
345
+ KEYPOINT_OKS_SIGMAS: []
346
+ PRECISE_BN:
347
+ ENABLED: false
348
+ NUM_ITER: 200
349
+ VERSION: 2
350
+ VIS_PERIOD: 0
events.out.tfevents.1648092666.40461928b0877f0b496ecfdcbf613f0d-master-0.1776.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a67ef7e305bf2605c86d03416e80f7934bbcd300c91e999eb28535b0ab1fac
3
+ size 6235994
log.txt ADDED
@@ -0,0 +1,668 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [04/17 14:10:02 detectron2]: Rank of current process: 0. World size: 8
2
+ [04/17 14:10:20 detectron2]: Environment info:
3
+ ---------------------- --------------------------------------------------------------------------------------------------------------------------
4
+ sys.platform linux
5
+ Python 3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]
6
+ numpy 1.21.5
7
+ detectron2 0.6 @/mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2
8
+ Compiler GCC 7.3
9
+ CUDA compiler CUDA 11.1
10
+ detectron2 arch flags 3.7, 5.0, 5.2, 6.0, 6.1, 7.0, 7.5, 8.0, 8.6
11
+ DETECTRON2_ENV_MODULE <not set>
12
+ PyTorch 1.10.0+cu111 @/mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch
13
+ PyTorch debug build False
14
+ GPU available Yes
15
+ GPU 0,1,2,3,4,5,6,7 A100-SXM4-40GB (arch=8.0)
16
+ Driver version 450.142.00
17
+ CUDA_HOME /usr/local/cuda
18
+ Pillow 8.4.0
19
+ torchvision 0.11.1+cu111 @/mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torchvision
20
+ torchvision arch flags 3.5, 5.0, 6.0, 7.0, 7.5, 8.0, 8.6
21
+ fvcore 0.1.5.post20211023
22
+ iopath 0.1.9
23
+ cv2 Not found
24
+ ---------------------- --------------------------------------------------------------------------------------------------------------------------
25
+ PyTorch built with:
26
+ - GCC 7.3
27
+ - C++ Version: 201402
28
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
29
+ - Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
30
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
31
+ - LAPACK is enabled (usually provided by MKL)
32
+ - NNPACK is enabled
33
+ - CPU capability usage: AVX2
34
+ - CUDA Runtime 11.1
35
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86
36
+ - CuDNN 8.0.5
37
+ - Magma 2.5.2
38
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.10.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
39
+
40
+ [04/17 14:10:20 detectron2]: Command line arguments: Namespace(config_file='cascade_layoutlmv3.yaml', debug=False, dist_url='tcp://127.0.0.1:50156', eval_only=True, machine_rank=0, num_gpus=8, num_machines=1, opts=['MODEL.WEIGHTS', '/mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/model_final.pth', 'OUTPUT_DIR', '/mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/'], resume=False)
41
+ [04/17 14:10:20 detectron2]: Contents of args.config_file=cascade_layoutlmv3.yaml:
42
+ MODEL:
43
+ MASK_ON: True
44
+ MAX_LENGTH: 510
45
+ IMAGE_ONLY: True
46
+ META_ARCHITECTURE: "VLGeneralizedRCNN"
47
+ PIXEL_MEAN: [ 127.5, 127.5, 127.5 ]
48
+ PIXEL_STD: [ 127.5, 127.5, 127.5 ]
49
+ WEIGHTS: "/mnt/localdata/users/yupanhuang/models/layoutlmv3/pts/layoutlmv3-base/pytorch_model.bin"
50
+ BACKBONE:
51
+ NAME: "build_vit_fpn_backbone"
52
+ VIT:
53
+ NAME: "layoutlmv3_base"
54
+ OUT_FEATURES: [ "layer3", "layer5", "layer7", "layer11" ]
55
+ DROP_PATH: 0.1
56
+ IMG_SIZE: [ 224,224 ]
57
+ POS_TYPE: "abs"
58
+ ROI_HEADS:
59
+ NAME: CascadeROIHeads
60
+ IN_FEATURES: [ "p2", "p3", "p4", "p5" ]
61
+ NUM_CLASSES: 5
62
+ ROI_BOX_HEAD:
63
+ CLS_AGNOSTIC_BBOX_REG: True
64
+ NAME: "FastRCNNConvFCHead"
65
+ NUM_FC: 2
66
+ POOLER_RESOLUTION: 7
67
+ ROI_MASK_HEAD:
68
+ NAME: "MaskRCNNConvUpsampleHead"
69
+ NUM_CONV: 4
70
+ POOLER_RESOLUTION: 14
71
+ FPN:
72
+ IN_FEATURES: [ "layer3", "layer5", "layer7", "layer11" ]
73
+ ANCHOR_GENERATOR:
74
+ SIZES: [ [ 32 ], [ 64 ], [ 128 ], [ 256 ], [ 512 ] ] # One size for each in feature map
75
+ ASPECT_RATIOS: [ [ 0.5, 1.0, 2.0 ] ] # Three aspect ratios (same for all in feature maps)
76
+ RPN:
77
+ IN_FEATURES: [ "p2", "p3", "p4", "p5", "p6" ]
78
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
79
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
80
+ # Detectron1 uses 2000 proposals per-batch,
81
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
82
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
83
+ POST_NMS_TOPK_TRAIN: 2000
84
+ POST_NMS_TOPK_TEST: 1000
85
+ DATASETS:
86
+ TRAIN: ("publaynet_train",)
87
+ TEST: ("publaynet_val",)
88
+ SOLVER:
89
+ GRADIENT_ACCUMULATION_STEPS: 1
90
+ BASE_LR: 0.0002
91
+ WARMUP_ITERS: 1000
92
+ IMS_PER_BATCH: 32
93
+ MAX_ITER: 60000
94
+ CHECKPOINT_PERIOD: 2000
95
+ LR_SCHEDULER_NAME: "WarmupCosineLR"
96
+ AMP:
97
+ ENABLED: True
98
+ OPTIMIZER: "ADAMW"
99
+ BACKBONE_MULTIPLIER: 1.0
100
+ CLIP_GRADIENTS:
101
+ ENABLED: True
102
+ CLIP_TYPE: "full_model"
103
+ CLIP_VALUE: 1.0
104
+ NORM_TYPE: 2.0
105
+ WARMUP_FACTOR: 0.01
106
+ WEIGHT_DECAY: 0.05
107
+ TEST:
108
+ EVAL_PERIOD: 2000
109
+ INPUT:
110
+ CROP:
111
+ ENABLED: True
112
+ TYPE: "absolute_range"
113
+ SIZE: (384, 600)
114
+ MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
115
+ FORMAT: "RGB"
116
+ DATALOADER:
117
+ FILTER_EMPTY_ANNOTATIONS: False
118
+ VERSION: 2
119
+ AUG:
120
+ DETR: True
121
+ SEED: 42
122
+ OUTPUT_DIR: "/mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet/"
123
+ PUBLAYNET_DATA_DIR_TRAIN: "/mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/train"
124
+ PUBLAYNET_DATA_DIR_TEST: "/mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/val"
125
+ OCR_DATA_DIR_TRAIN: "/mnt/localdata/users/yupanhuang/data/PubLayNet/ocr/train"
126
+ OCR_DATA_DIR_TEST: "/mnt/localdata/users/yupanhuang/data/PubLayNet/ocr/val"
127
+ CACHE_DIR: "/mnt/localdata/users/yupanhuang/cache/huggingface"
128
+
129
+ [04/17 14:10:20 detectron2]: Running with full config:
130
+ AUG:
131
+ DETR: true
132
+ CACHE_DIR: /mnt/localdata/users/yupanhuang/cache/huggingface
133
+ CUDNN_BENCHMARK: false
134
+ DATALOADER:
135
+ ASPECT_RATIO_GROUPING: true
136
+ FILTER_EMPTY_ANNOTATIONS: false
137
+ NUM_WORKERS: 4
138
+ REPEAT_THRESHOLD: 0.0
139
+ SAMPLER_TRAIN: TrainingSampler
140
+ DATASETS:
141
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
142
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
143
+ PROPOSAL_FILES_TEST: []
144
+ PROPOSAL_FILES_TRAIN: []
145
+ TEST:
146
+ - publaynet_val
147
+ TRAIN:
148
+ - publaynet_train
149
+ GLOBAL:
150
+ HACK: 1.0
151
+ ICDAR_DATA_DIR_TEST: ''
152
+ ICDAR_DATA_DIR_TRAIN: ''
153
+ INPUT:
154
+ CROP:
155
+ ENABLED: true
156
+ SIZE:
157
+ - 384
158
+ - 600
159
+ TYPE: absolute_range
160
+ FORMAT: RGB
161
+ MASK_FORMAT: polygon
162
+ MAX_SIZE_TEST: 1333
163
+ MAX_SIZE_TRAIN: 1333
164
+ MIN_SIZE_TEST: 800
165
+ MIN_SIZE_TRAIN:
166
+ - 480
167
+ - 512
168
+ - 544
169
+ - 576
170
+ - 608
171
+ - 640
172
+ - 672
173
+ - 704
174
+ - 736
175
+ - 768
176
+ - 800
177
+ MIN_SIZE_TRAIN_SAMPLING: choice
178
+ RANDOM_FLIP: horizontal
179
+ MODEL:
180
+ ANCHOR_GENERATOR:
181
+ ANGLES:
182
+ - - -90
183
+ - 0
184
+ - 90
185
+ ASPECT_RATIOS:
186
+ - - 0.5
187
+ - 1.0
188
+ - 2.0
189
+ NAME: DefaultAnchorGenerator
190
+ OFFSET: 0.0
191
+ SIZES:
192
+ - - 32
193
+ - - 64
194
+ - - 128
195
+ - - 256
196
+ - - 512
197
+ BACKBONE:
198
+ FREEZE_AT: 2
199
+ NAME: build_vit_fpn_backbone
200
+ CONFIG_PATH: ''
201
+ DEVICE: cuda
202
+ FPN:
203
+ FUSE_TYPE: sum
204
+ IN_FEATURES:
205
+ - layer3
206
+ - layer5
207
+ - layer7
208
+ - layer11
209
+ NORM: ''
210
+ OUT_CHANNELS: 256
211
+ IMAGE_ONLY: true
212
+ KEYPOINT_ON: false
213
+ LOAD_PROPOSALS: false
214
+ MASK_ON: true
215
+ MAX_LENGTH: 510
216
+ META_ARCHITECTURE: VLGeneralizedRCNN
217
+ PANOPTIC_FPN:
218
+ COMBINE:
219
+ ENABLED: true
220
+ INSTANCES_CONFIDENCE_THRESH: 0.5
221
+ OVERLAP_THRESH: 0.5
222
+ STUFF_AREA_LIMIT: 4096
223
+ INSTANCE_LOSS_WEIGHT: 1.0
224
+ PIXEL_MEAN:
225
+ - 127.5
226
+ - 127.5
227
+ - 127.5
228
+ PIXEL_STD:
229
+ - 127.5
230
+ - 127.5
231
+ - 127.5
232
+ PROPOSAL_GENERATOR:
233
+ MIN_SIZE: 0
234
+ NAME: RPN
235
+ RESNETS:
236
+ DEFORM_MODULATED: false
237
+ DEFORM_NUM_GROUPS: 1
238
+ DEFORM_ON_PER_STAGE:
239
+ - false
240
+ - false
241
+ - false
242
+ - false
243
+ DEPTH: 50
244
+ NORM: FrozenBN
245
+ NUM_GROUPS: 1
246
+ OUT_FEATURES:
247
+ - res4
248
+ RES2_OUT_CHANNELS: 256
249
+ RES5_DILATION: 1
250
+ STEM_OUT_CHANNELS: 64
251
+ STRIDE_IN_1X1: true
252
+ WIDTH_PER_GROUP: 64
253
+ RETINANET:
254
+ BBOX_REG_LOSS_TYPE: smooth_l1
255
+ BBOX_REG_WEIGHTS: &id001
256
+ - 1.0
257
+ - 1.0
258
+ - 1.0
259
+ - 1.0
260
+ FOCAL_LOSS_ALPHA: 0.25
261
+ FOCAL_LOSS_GAMMA: 2.0
262
+ IN_FEATURES:
263
+ - p3
264
+ - p4
265
+ - p5
266
+ - p6
267
+ - p7
268
+ IOU_LABELS:
269
+ - 0
270
+ - -1
271
+ - 1
272
+ IOU_THRESHOLDS:
273
+ - 0.4
274
+ - 0.5
275
+ NMS_THRESH_TEST: 0.5
276
+ NORM: ''
277
+ NUM_CLASSES: 80
278
+ NUM_CONVS: 4
279
+ PRIOR_PROB: 0.01
280
+ SCORE_THRESH_TEST: 0.05
281
+ SMOOTH_L1_LOSS_BETA: 0.1
282
+ TOPK_CANDIDATES_TEST: 1000
283
+ ROI_BOX_CASCADE_HEAD:
284
+ BBOX_REG_WEIGHTS:
285
+ - - 10.0
286
+ - 10.0
287
+ - 5.0
288
+ - 5.0
289
+ - - 20.0
290
+ - 20.0
291
+ - 10.0
292
+ - 10.0
293
+ - - 30.0
294
+ - 30.0
295
+ - 15.0
296
+ - 15.0
297
+ IOUS:
298
+ - 0.5
299
+ - 0.6
300
+ - 0.7
301
+ ROI_BOX_HEAD:
302
+ BBOX_REG_LOSS_TYPE: smooth_l1
303
+ BBOX_REG_LOSS_WEIGHT: 1.0
304
+ BBOX_REG_WEIGHTS:
305
+ - 10.0
306
+ - 10.0
307
+ - 5.0
308
+ - 5.0
309
+ CLS_AGNOSTIC_BBOX_REG: true
310
+ CONV_DIM: 256
311
+ FC_DIM: 1024
312
+ NAME: FastRCNNConvFCHead
313
+ NORM: ''
314
+ NUM_CONV: 0
315
+ NUM_FC: 2
316
+ POOLER_RESOLUTION: 7
317
+ POOLER_SAMPLING_RATIO: 0
318
+ POOLER_TYPE: ROIAlignV2
319
+ SMOOTH_L1_BETA: 0.0
320
+ TRAIN_ON_PRED_BOXES: false
321
+ ROI_HEADS:
322
+ BATCH_SIZE_PER_IMAGE: 512
323
+ IN_FEATURES:
324
+ - p2
325
+ - p3
326
+ - p4
327
+ - p5
328
+ IOU_LABELS:
329
+ - 0
330
+ - 1
331
+ IOU_THRESHOLDS:
332
+ - 0.5
333
+ NAME: CascadeROIHeads
334
+ NMS_THRESH_TEST: 0.5
335
+ NUM_CLASSES: 5
336
+ POSITIVE_FRACTION: 0.25
337
+ PROPOSAL_APPEND_GT: true
338
+ SCORE_THRESH_TEST: 0.05
339
+ ROI_KEYPOINT_HEAD:
340
+ CONV_DIMS:
341
+ - 512
342
+ - 512
343
+ - 512
344
+ - 512
345
+ - 512
346
+ - 512
347
+ - 512
348
+ - 512
349
+ LOSS_WEIGHT: 1.0
350
+ MIN_KEYPOINTS_PER_IMAGE: 1
351
+ NAME: KRCNNConvDeconvUpsampleHead
352
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
353
+ NUM_KEYPOINTS: 17
354
+ POOLER_RESOLUTION: 14
355
+ POOLER_SAMPLING_RATIO: 0
356
+ POOLER_TYPE: ROIAlignV2
357
+ ROI_MASK_HEAD:
358
+ CLS_AGNOSTIC_MASK: false
359
+ CONV_DIM: 256
360
+ NAME: MaskRCNNConvUpsampleHead
361
+ NORM: ''
362
+ NUM_CONV: 4
363
+ POOLER_RESOLUTION: 14
364
+ POOLER_SAMPLING_RATIO: 0
365
+ POOLER_TYPE: ROIAlignV2
366
+ RPN:
367
+ BATCH_SIZE_PER_IMAGE: 256
368
+ BBOX_REG_LOSS_TYPE: smooth_l1
369
+ BBOX_REG_LOSS_WEIGHT: 1.0
370
+ BBOX_REG_WEIGHTS: *id001
371
+ BOUNDARY_THRESH: -1
372
+ CONV_DIMS:
373
+ - -1
374
+ HEAD_NAME: StandardRPNHead
375
+ IN_FEATURES:
376
+ - p2
377
+ - p3
378
+ - p4
379
+ - p5
380
+ - p6
381
+ IOU_LABELS:
382
+ - 0
383
+ - -1
384
+ - 1
385
+ IOU_THRESHOLDS:
386
+ - 0.3
387
+ - 0.7
388
+ LOSS_WEIGHT: 1.0
389
+ NMS_THRESH: 0.7
390
+ POSITIVE_FRACTION: 0.5
391
+ POST_NMS_TOPK_TEST: 1000
392
+ POST_NMS_TOPK_TRAIN: 2000
393
+ PRE_NMS_TOPK_TEST: 1000
394
+ PRE_NMS_TOPK_TRAIN: 2000
395
+ SMOOTH_L1_BETA: 0.0
396
+ SEM_SEG_HEAD:
397
+ COMMON_STRIDE: 4
398
+ CONVS_DIM: 128
399
+ IGNORE_VALUE: 255
400
+ IN_FEATURES:
401
+ - p2
402
+ - p3
403
+ - p4
404
+ - p5
405
+ LOSS_WEIGHT: 1.0
406
+ NAME: SemSegFPNHead
407
+ NORM: GN
408
+ NUM_CLASSES: 54
409
+ VIT:
410
+ DROP_PATH: 0.1
411
+ IMG_SIZE:
412
+ - 224
413
+ - 224
414
+ MODEL_KWARGS: '{}'
415
+ NAME: layoutlmv3_base
416
+ OUT_FEATURES:
417
+ - layer3
418
+ - layer5
419
+ - layer7
420
+ - layer11
421
+ POS_TYPE: abs
422
+ WEIGHTS: /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/model_final.pth
423
+ OCR_DATA_DIR_TEST: /mnt/localdata/users/yupanhuang/data/PubLayNet/ocr/val
424
+ OCR_DATA_DIR_TRAIN: /mnt/localdata/users/yupanhuang/data/PubLayNet/ocr/train
425
+ OUTPUT_DIR: /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/
426
+ PUBLAYNET_DATA_DIR_TEST: /mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/val
427
+ PUBLAYNET_DATA_DIR_TRAIN: /mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/train
428
+ SEED: 42
429
+ SOLVER:
430
+ AMP:
431
+ ENABLED: true
432
+ BACKBONE_MULTIPLIER: 1.0
433
+ BASE_LR: 0.0002
434
+ BIAS_LR_FACTOR: 1.0
435
+ CHECKPOINT_PERIOD: 2000
436
+ CLIP_GRADIENTS:
437
+ CLIP_TYPE: full_model
438
+ CLIP_VALUE: 1.0
439
+ ENABLED: true
440
+ NORM_TYPE: 2.0
441
+ GAMMA: 0.1
442
+ GRADIENT_ACCUMULATION_STEPS: 1
443
+ IMS_PER_BATCH: 32
444
+ LR_SCHEDULER_NAME: WarmupCosineLR
445
+ MAX_ITER: 60000
446
+ MOMENTUM: 0.9
447
+ NESTEROV: false
448
+ OPTIMIZER: ADAMW
449
+ REFERENCE_WORLD_SIZE: 0
450
+ STEPS:
451
+ - 30000
452
+ WARMUP_FACTOR: 0.01
453
+ WARMUP_ITERS: 1000
454
+ WARMUP_METHOD: linear
455
+ WEIGHT_DECAY: 0.05
456
+ WEIGHT_DECAY_BIAS: null
457
+ WEIGHT_DECAY_NORM: 0.0
458
+ TEST:
459
+ AUG:
460
+ ENABLED: false
461
+ FLIP: true
462
+ MAX_SIZE: 4000
463
+ MIN_SIZES:
464
+ - 400
465
+ - 500
466
+ - 600
467
+ - 700
468
+ - 800
469
+ - 900
470
+ - 1000
471
+ - 1100
472
+ - 1200
473
+ DETECTIONS_PER_IMAGE: 100
474
+ EVAL_PERIOD: 2000
475
+ EXPECTED_RESULTS: []
476
+ KEYPOINT_OKS_SIGMAS: []
477
+ PRECISE_BN:
478
+ ENABLED: false
479
+ NUM_ITER: 200
480
+ VERSION: 2
481
+ VIS_PERIOD: 0
482
+
483
+ [04/17 14:10:20 detectron2]: Full config saved to /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/config.yaml
484
+ [04/17 14:10:21 fvcore.common.checkpoint]: [Checkpointer] Loading from /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/model_final.pth ...
485
+ [04/17 14:10:23 d2.data.datasets.coco]: Loading /mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/val.json takes 1.71 seconds.
486
+ [04/17 14:10:24 d2.data.datasets.coco]: Loaded 11245 images in COCO format from /mnt/localdata/users/yupanhuang/data/PubLayNet/publaynet/val.json
487
+ [04/17 14:10:25 d2.data.build]: Distribution of instances among all 5 categories:
488
+ | category | #instances | category | #instances | category | #instances |
489
+ |:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
490
+ | text | 88625 | title | 18801 | list | 4239 |
491
+ | table | 4769 | figure | 4327 | | |
492
+ | total | 120761 | | | | |
493
+ [04/17 14:10:25 d2.data.common]: Serializing 11245 elements to byte tensors and concatenating them all ...
494
+ [04/17 14:10:25 d2.data.common]: Serialized dataset takes 55.80 MiB
495
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
496
+ max_size = (max_size + (stride - 1)) // stride * stride
497
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
498
+ "See the documentation of nn.Upsample for details.".format(mode)
499
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
500
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
501
+ [04/17 14:10:27 d2.evaluation.evaluator]: Start inference on 1406 batches
502
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
503
+ max_size = (max_size + (stride - 1)) // stride * stride
504
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
505
+ "See the documentation of nn.Upsample for details.".format(mode)
506
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
507
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
508
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
509
+ max_size = (max_size + (stride - 1)) // stride * stride
510
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
511
+ max_size = (max_size + (stride - 1)) // stride * stride
512
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
513
+ "See the documentation of nn.Upsample for details.".format(mode)
514
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
515
+ "See the documentation of nn.Upsample for details.".format(mode)
516
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
517
+ max_size = (max_size + (stride - 1)) // stride * stride
518
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
519
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
520
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
521
+ "See the documentation of nn.Upsample for details.".format(mode)
522
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
523
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
524
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
525
+ max_size = (max_size + (stride - 1)) // stride * stride
526
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
527
+ max_size = (max_size + (stride - 1)) // stride * stride
528
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
529
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
530
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
531
+ "See the documentation of nn.Upsample for details.".format(mode)
532
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
533
+ "See the documentation of nn.Upsample for details.".format(mode)
534
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
535
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
536
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
537
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
538
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/detectron2/structures/image_list.py:88: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
539
+ max_size = (max_size + (stride - 1)) // stride * stride
540
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/nn/functional.py:3635: UserWarning: Default upsampling behavior when mode=bicubic is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
541
+ "See the documentation of nn.Upsample for details.".format(mode)
542
+ /mnt/localdata/users/yupanhuang/Downloads/miniconda3/envs/layoutlmft/lib/python3.7/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2157.)
543
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
544
+ [04/17 14:10:39 d2.evaluation.evaluator]: Inference done 11/1406. Dataloading: 0.0029 s/iter. Inference: 0.1609 s/iter. Eval: 0.0212 s/iter. Total: 0.1850 s/iter. ETA=0:04:18
545
+ [04/17 14:10:44 d2.evaluation.evaluator]: Inference done 38/1406. Dataloading: 0.0036 s/iter. Inference: 0.1729 s/iter. Eval: 0.0140 s/iter. Total: 0.1909 s/iter. ETA=0:04:21
546
+ [04/17 14:10:50 d2.evaluation.evaluator]: Inference done 66/1406. Dataloading: 0.0027 s/iter. Inference: 0.1703 s/iter. Eval: 0.0149 s/iter. Total: 0.1882 s/iter. ETA=0:04:12
547
+ [04/17 14:10:55 d2.evaluation.evaluator]: Inference done 93/1406. Dataloading: 0.0035 s/iter. Inference: 0.1691 s/iter. Eval: 0.0146 s/iter. Total: 0.1874 s/iter. ETA=0:04:06
548
+ [04/17 14:11:00 d2.evaluation.evaluator]: Inference done 121/1406. Dataloading: 0.0034 s/iter. Inference: 0.1687 s/iter. Eval: 0.0141 s/iter. Total: 0.1864 s/iter. ETA=0:03:59
549
+ [04/17 14:11:05 d2.evaluation.evaluator]: Inference done 149/1406. Dataloading: 0.0031 s/iter. Inference: 0.1684 s/iter. Eval: 0.0137 s/iter. Total: 0.1853 s/iter. ETA=0:03:52
550
+ [04/17 14:11:10 d2.evaluation.evaluator]: Inference done 177/1406. Dataloading: 0.0029 s/iter. Inference: 0.1684 s/iter. Eval: 0.0134 s/iter. Total: 0.1849 s/iter. ETA=0:03:47
551
+ [04/17 14:11:15 d2.evaluation.evaluator]: Inference done 206/1406. Dataloading: 0.0030 s/iter. Inference: 0.1680 s/iter. Eval: 0.0127 s/iter. Total: 0.1838 s/iter. ETA=0:03:40
552
+ [04/17 14:11:20 d2.evaluation.evaluator]: Inference done 234/1406. Dataloading: 0.0032 s/iter. Inference: 0.1676 s/iter. Eval: 0.0125 s/iter. Total: 0.1835 s/iter. ETA=0:03:35
553
+ [04/17 14:11:25 d2.evaluation.evaluator]: Inference done 261/1406. Dataloading: 0.0031 s/iter. Inference: 0.1682 s/iter. Eval: 0.0124 s/iter. Total: 0.1838 s/iter. ETA=0:03:30
554
+ [04/17 14:11:30 d2.evaluation.evaluator]: Inference done 288/1406. Dataloading: 0.0031 s/iter. Inference: 0.1692 s/iter. Eval: 0.0122 s/iter. Total: 0.1846 s/iter. ETA=0:03:26
555
+ [04/17 14:11:35 d2.evaluation.evaluator]: Inference done 315/1406. Dataloading: 0.0030 s/iter. Inference: 0.1694 s/iter. Eval: 0.0121 s/iter. Total: 0.1846 s/iter. ETA=0:03:21
556
+ [04/17 14:11:40 d2.evaluation.evaluator]: Inference done 342/1406. Dataloading: 0.0030 s/iter. Inference: 0.1698 s/iter. Eval: 0.0121 s/iter. Total: 0.1850 s/iter. ETA=0:03:16
557
+ [04/17 14:11:46 d2.evaluation.evaluator]: Inference done 370/1406. Dataloading: 0.0030 s/iter. Inference: 0.1696 s/iter. Eval: 0.0118 s/iter. Total: 0.1846 s/iter. ETA=0:03:11
558
+ [04/17 14:11:51 d2.evaluation.evaluator]: Inference done 396/1406. Dataloading: 0.0030 s/iter. Inference: 0.1704 s/iter. Eval: 0.0117 s/iter. Total: 0.1852 s/iter. ETA=0:03:07
559
+ [04/17 14:11:56 d2.evaluation.evaluator]: Inference done 423/1406. Dataloading: 0.0029 s/iter. Inference: 0.1707 s/iter. Eval: 0.0118 s/iter. Total: 0.1856 s/iter. ETA=0:03:02
560
+ [04/17 14:12:01 d2.evaluation.evaluator]: Inference done 450/1406. Dataloading: 0.0030 s/iter. Inference: 0.1708 s/iter. Eval: 0.0120 s/iter. Total: 0.1859 s/iter. ETA=0:02:57
561
+ [04/17 14:12:06 d2.evaluation.evaluator]: Inference done 476/1406. Dataloading: 0.0029 s/iter. Inference: 0.1713 s/iter. Eval: 0.0120 s/iter. Total: 0.1863 s/iter. ETA=0:02:53
562
+ [04/17 14:12:11 d2.evaluation.evaluator]: Inference done 501/1406. Dataloading: 0.0029 s/iter. Inference: 0.1721 s/iter. Eval: 0.0119 s/iter. Total: 0.1871 s/iter. ETA=0:02:49
563
+ [04/17 14:12:16 d2.evaluation.evaluator]: Inference done 528/1406. Dataloading: 0.0030 s/iter. Inference: 0.1720 s/iter. Eval: 0.0120 s/iter. Total: 0.1871 s/iter. ETA=0:02:44
564
+ [04/17 14:12:21 d2.evaluation.evaluator]: Inference done 555/1406. Dataloading: 0.0030 s/iter. Inference: 0.1721 s/iter. Eval: 0.0121 s/iter. Total: 0.1873 s/iter. ETA=0:02:39
565
+ [04/17 14:12:26 d2.evaluation.evaluator]: Inference done 581/1406. Dataloading: 0.0031 s/iter. Inference: 0.1722 s/iter. Eval: 0.0123 s/iter. Total: 0.1876 s/iter. ETA=0:02:34
566
+ [04/17 14:12:31 d2.evaluation.evaluator]: Inference done 607/1406. Dataloading: 0.0031 s/iter. Inference: 0.1725 s/iter. Eval: 0.0123 s/iter. Total: 0.1880 s/iter. ETA=0:02:30
567
+ [04/17 14:12:36 d2.evaluation.evaluator]: Inference done 633/1406. Dataloading: 0.0031 s/iter. Inference: 0.1728 s/iter. Eval: 0.0122 s/iter. Total: 0.1882 s/iter. ETA=0:02:25
568
+ [04/17 14:12:41 d2.evaluation.evaluator]: Inference done 658/1406. Dataloading: 0.0031 s/iter. Inference: 0.1733 s/iter. Eval: 0.0123 s/iter. Total: 0.1888 s/iter. ETA=0:02:21
569
+ [04/17 14:12:47 d2.evaluation.evaluator]: Inference done 684/1406. Dataloading: 0.0031 s/iter. Inference: 0.1736 s/iter. Eval: 0.0123 s/iter. Total: 0.1891 s/iter. ETA=0:02:16
570
+ [04/17 14:12:52 d2.evaluation.evaluator]: Inference done 710/1406. Dataloading: 0.0031 s/iter. Inference: 0.1738 s/iter. Eval: 0.0124 s/iter. Total: 0.1894 s/iter. ETA=0:02:11
571
+ [04/17 14:12:57 d2.evaluation.evaluator]: Inference done 736/1406. Dataloading: 0.0031 s/iter. Inference: 0.1740 s/iter. Eval: 0.0124 s/iter. Total: 0.1897 s/iter. ETA=0:02:07
572
+ [04/17 14:13:02 d2.evaluation.evaluator]: Inference done 762/1406. Dataloading: 0.0031 s/iter. Inference: 0.1742 s/iter. Eval: 0.0124 s/iter. Total: 0.1898 s/iter. ETA=0:02:02
573
+ [04/17 14:13:07 d2.evaluation.evaluator]: Inference done 787/1406. Dataloading: 0.0031 s/iter. Inference: 0.1743 s/iter. Eval: 0.0126 s/iter. Total: 0.1902 s/iter. ETA=0:01:57
574
+ [04/17 14:13:12 d2.evaluation.evaluator]: Inference done 813/1406. Dataloading: 0.0031 s/iter. Inference: 0.1746 s/iter. Eval: 0.0126 s/iter. Total: 0.1904 s/iter. ETA=0:01:52
575
+ [04/17 14:13:17 d2.evaluation.evaluator]: Inference done 839/1406. Dataloading: 0.0031 s/iter. Inference: 0.1748 s/iter. Eval: 0.0125 s/iter. Total: 0.1905 s/iter. ETA=0:01:48
576
+ [04/17 14:13:22 d2.evaluation.evaluator]: Inference done 865/1406. Dataloading: 0.0031 s/iter. Inference: 0.1750 s/iter. Eval: 0.0125 s/iter. Total: 0.1907 s/iter. ETA=0:01:43
577
+ [04/17 14:13:27 d2.evaluation.evaluator]: Inference done 891/1406. Dataloading: 0.0031 s/iter. Inference: 0.1754 s/iter. Eval: 0.0124 s/iter. Total: 0.1910 s/iter. ETA=0:01:38
578
+ [04/17 14:13:32 d2.evaluation.evaluator]: Inference done 918/1406. Dataloading: 0.0031 s/iter. Inference: 0.1755 s/iter. Eval: 0.0123 s/iter. Total: 0.1910 s/iter. ETA=0:01:33
579
+ [04/17 14:13:37 d2.evaluation.evaluator]: Inference done 943/1406. Dataloading: 0.0030 s/iter. Inference: 0.1759 s/iter. Eval: 0.0121 s/iter. Total: 0.1912 s/iter. ETA=0:01:28
580
+ [04/17 14:13:43 d2.evaluation.evaluator]: Inference done 969/1406. Dataloading: 0.0030 s/iter. Inference: 0.1762 s/iter. Eval: 0.0121 s/iter. Total: 0.1914 s/iter. ETA=0:01:23
581
+ [04/17 14:13:48 d2.evaluation.evaluator]: Inference done 995/1406. Dataloading: 0.0030 s/iter. Inference: 0.1763 s/iter. Eval: 0.0121 s/iter. Total: 0.1915 s/iter. ETA=0:01:18
582
+ [04/17 14:13:53 d2.evaluation.evaluator]: Inference done 1021/1406. Dataloading: 0.0030 s/iter. Inference: 0.1763 s/iter. Eval: 0.0121 s/iter. Total: 0.1916 s/iter. ETA=0:01:13
583
+ [04/17 14:13:58 d2.evaluation.evaluator]: Inference done 1047/1406. Dataloading: 0.0031 s/iter. Inference: 0.1765 s/iter. Eval: 0.0120 s/iter. Total: 0.1917 s/iter. ETA=0:01:08
584
+ [04/17 14:14:03 d2.evaluation.evaluator]: Inference done 1073/1406. Dataloading: 0.0031 s/iter. Inference: 0.1766 s/iter. Eval: 0.0120 s/iter. Total: 0.1918 s/iter. ETA=0:01:03
585
+ [04/17 14:14:08 d2.evaluation.evaluator]: Inference done 1099/1406. Dataloading: 0.0031 s/iter. Inference: 0.1767 s/iter. Eval: 0.0120 s/iter. Total: 0.1919 s/iter. ETA=0:00:58
586
+ [04/17 14:14:13 d2.evaluation.evaluator]: Inference done 1125/1406. Dataloading: 0.0031 s/iter. Inference: 0.1768 s/iter. Eval: 0.0120 s/iter. Total: 0.1919 s/iter. ETA=0:00:53
587
+ [04/17 14:14:18 d2.evaluation.evaluator]: Inference done 1151/1406. Dataloading: 0.0031 s/iter. Inference: 0.1768 s/iter. Eval: 0.0120 s/iter. Total: 0.1920 s/iter. ETA=0:00:48
588
+ [04/17 14:14:23 d2.evaluation.evaluator]: Inference done 1177/1406. Dataloading: 0.0031 s/iter. Inference: 0.1769 s/iter. Eval: 0.0119 s/iter. Total: 0.1920 s/iter. ETA=0:00:43
589
+ [04/17 14:14:28 d2.evaluation.evaluator]: Inference done 1203/1406. Dataloading: 0.0031 s/iter. Inference: 0.1769 s/iter. Eval: 0.0120 s/iter. Total: 0.1921 s/iter. ETA=0:00:39
590
+ [04/17 14:14:33 d2.evaluation.evaluator]: Inference done 1228/1406. Dataloading: 0.0031 s/iter. Inference: 0.1770 s/iter. Eval: 0.0121 s/iter. Total: 0.1923 s/iter. ETA=0:00:34
591
+ [04/17 14:14:38 d2.evaluation.evaluator]: Inference done 1254/1406. Dataloading: 0.0031 s/iter. Inference: 0.1769 s/iter. Eval: 0.0122 s/iter. Total: 0.1924 s/iter. ETA=0:00:29
592
+ [04/17 14:14:43 d2.evaluation.evaluator]: Inference done 1279/1406. Dataloading: 0.0032 s/iter. Inference: 0.1770 s/iter. Eval: 0.0123 s/iter. Total: 0.1926 s/iter. ETA=0:00:24
593
+ [04/17 14:14:48 d2.evaluation.evaluator]: Inference done 1305/1406. Dataloading: 0.0031 s/iter. Inference: 0.1769 s/iter. Eval: 0.0124 s/iter. Total: 0.1926 s/iter. ETA=0:00:19
594
+ [04/17 14:14:54 d2.evaluation.evaluator]: Inference done 1331/1406. Dataloading: 0.0031 s/iter. Inference: 0.1770 s/iter. Eval: 0.0124 s/iter. Total: 0.1926 s/iter. ETA=0:00:14
595
+ [04/17 14:14:59 d2.evaluation.evaluator]: Inference done 1357/1406. Dataloading: 0.0031 s/iter. Inference: 0.1769 s/iter. Eval: 0.0126 s/iter. Total: 0.1927 s/iter. ETA=0:00:09
596
+ [04/17 14:15:04 d2.evaluation.evaluator]: Inference done 1385/1406. Dataloading: 0.0031 s/iter. Inference: 0.1767 s/iter. Eval: 0.0125 s/iter. Total: 0.1924 s/iter. ETA=0:00:04
597
+ [04/17 14:15:08 d2.evaluation.evaluator]: Total inference time: 0:04:29.845715 (0.192609 s / iter per device, on 8 devices)
598
+ [04/17 14:15:08 d2.evaluation.evaluator]: Total inference pure compute time: 0:04:07 (0.176466 s / iter per device, on 8 devices)
599
+ [04/17 14:15:17 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
600
+ [04/17 14:15:17 d2.evaluation.coco_evaluation]: Saving results to /mnt/localdata/users/yupanhuang/models/layoutlmv3/fts/publaynet-base/inference/coco_instances_results.json
601
+ [04/17 14:15:18 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API...
602
+ Loading and preparing results...
603
+ DONE (t=0.12s)
604
+ creating index...
605
+ index created!
606
+ [04/17 14:15:19 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox*
607
+ [04/17 14:15:22 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 3.39 seconds.
608
+ [04/17 14:15:22 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
609
+ [04/17 14:15:23 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.40 seconds.
610
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.951
611
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.981
612
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.969
613
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.468
614
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.856
615
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.976
616
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.543
617
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.953
618
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.964
619
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.607
620
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.897
621
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.986
622
+ [04/17 14:15:23 d2.evaluation.coco_evaluation]: Evaluation results for bbox:
623
+ | AP | AP50 | AP75 | APs | APm | APl |
624
+ |:------:|:------:|:------:|:------:|:------:|:------:|
625
+ | 95.088 | 98.066 | 96.933 | 46.800 | 85.592 | 97.626 |
626
+ [04/17 14:15:23 d2.evaluation.coco_evaluation]: Per-category bbox AP:
627
+ | category | AP | category | AP | category | AP |
628
+ |:-----------|:-------|:-----------|:-------|:-----------|:-------|
629
+ | text | 94.466 | title | 90.569 | list | 95.522 |
630
+ | table | 97.883 | figure | 97.001 | | |
631
+ Loading and preparing results...
632
+ DONE (t=2.05s)
633
+ creating index...
634
+ index created!
635
+ [04/17 14:15:28 d2.evaluation.fast_eval_api]: Evaluate annotation type *segm*
636
+ [04/17 14:15:38 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 10.92 seconds.
637
+ [04/17 14:15:39 d2.evaluation.fast_eval_api]: Accumulating evaluation results...
638
+ [04/17 14:15:39 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.43 seconds.
639
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.928
640
+ Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.981
641
+ Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.967
642
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.506
643
+ Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.824
644
+ Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.959
645
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.535
646
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.938
647
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.949
648
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.632
649
+ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.879
650
+ Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.973
651
+ [04/17 14:15:39 d2.evaluation.coco_evaluation]: Evaluation results for segm:
652
+ | AP | AP50 | AP75 | APs | APm | APl |
653
+ |:------:|:------:|:------:|:------:|:------:|:------:|
654
+ | 92.819 | 98.070 | 96.719 | 50.628 | 82.397 | 95.917 |
655
+ [04/17 14:15:39 d2.evaluation.coco_evaluation]: Per-category segm AP:
656
+ | category | AP | category | AP | category | AP |
657
+ |:-----------|:-------|:-----------|:-------|:-----------|:-------|
658
+ | text | 93.433 | title | 87.009 | list | 88.864 |
659
+ | table | 97.799 | figure | 96.989 | | |
660
+ [04/17 14:15:40 d2.evaluation.testing]: copypaste: Task: bbox
661
+ [04/17 14:15:40 d2.evaluation.testing]: copypaste: AP,AP50,AP75,APs,APm,APl
662
+ [04/17 14:15:40 d2.evaluation.testing]: copypaste: 95.0883,98.0662,96.9331,46.8005,85.5919,97.6258
663
+ [04/17 14:15:40 d2.evaluation.testing]: copypaste: Task: segm
664
+ [04/17 14:15:40 d2.evaluation.testing]: copypaste: AP,AP50,AP75,APs,APm,APl
665
+ [04/17 14:15:40 d2.evaluation.testing]: copypaste: 92.8187,98.0704,96.7191,50.6278,82.3972,95.9172
666
+
667
+ Process finished with exit code 0
668
+
model_final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a00f69371203a19a5815896ab849e61956bea1fa8f3bdc831ee560dd0c2ce2b
3
+ size 563985959