Files changed (1) hide show
  1. config.yml +492 -0
config.yml ADDED
@@ -0,0 +1,492 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AUG:
2
+ AA_TYPE: rand-m9-mstd0.5-inc1
3
+ COLOR_JITTER: 0.4
4
+ ENABLE: false
5
+ GEN_MASK_LOADER: false
6
+ INTERPOLATION: bicubic
7
+ MASK_FRAMES: false
8
+ MASK_RATIO: 0.0
9
+ MASK_TUBE: false
10
+ MASK_WINDOW_SIZE:
11
+ - 8
12
+ - 7
13
+ - 7
14
+ MAX_MASK_PATCHES_PER_BLOCK: null
15
+ NUM_SAMPLE: 1
16
+ RE_COUNT: 1
17
+ RE_MODE: pixel
18
+ RE_PROB: 0.25
19
+ RE_SPLIT: false
20
+ AVA:
21
+ ANNOTATION_DIR: /mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/
22
+ BGR: false
23
+ DETECTION_SCORE_THRESH: 0.9
24
+ EXCLUSION_FILE: ava_val_excluded_timestamps_v2.2.csv
25
+ FRAME_DIR: /mnt/fair-flash3-east/ava_trainval_frames.img/
26
+ FRAME_LIST_DIR: /mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/
27
+ FULL_TEST_ON_VAL: false
28
+ GROUNDTRUTH_FILE: ava_val_v2.2.csv
29
+ IMG_PROC_BACKEND: cv2
30
+ LABEL_MAP_FILE: ava_action_list_v2.2_for_activitynet_2019.pbtxt
31
+ TEST_FORCE_FLIP: false
32
+ TEST_LISTS:
33
+ - val.csv
34
+ TEST_PREDICT_BOX_LISTS:
35
+ - ava_val_predicted_boxes.csv
36
+ TRAIN_GT_BOX_LISTS:
37
+ - ava_train_v2.2.csv
38
+ TRAIN_LISTS:
39
+ - train.csv
40
+ TRAIN_PCA_JITTER_ONLY: true
41
+ TRAIN_PREDICT_BOX_LISTS: []
42
+ TRAIN_USE_COLOR_AUGMENTATION: false
43
+ BENCHMARK:
44
+ LOG_PERIOD: 100
45
+ NUM_EPOCHS: 5
46
+ SHUFFLE: true
47
+ BN:
48
+ GLOBAL_SYNC: false
49
+ NORM_TYPE: sync_batchnorm
50
+ NUM_BATCHES_PRECISE: 200
51
+ NUM_SPLITS: 1
52
+ NUM_SYNC_DEVICES: 1
53
+ USE_PRECISE_STATS: true
54
+ WEIGHT_DECAY: 0.0
55
+ CONTRASTIVE:
56
+ BN_MLP: false
57
+ BN_SYNC_MLP: false
58
+ DELTA_CLIPS_MAX: .inf
59
+ DELTA_CLIPS_MIN: -.inf
60
+ DIM: 128
61
+ INTERP_MEMORY: false
62
+ KNN_ON: true
63
+ LENGTH: 239975
64
+ LOCAL_SHUFFLE_BN: true
65
+ MEM_TYPE: 1d
66
+ MLP_DIM: 2048
67
+ MOCO_MULTI_VIEW_QUEUE: false
68
+ MOMENTUM: 0.5
69
+ MOMENTUM_ANNEALING: false
70
+ NUM_CLASSES_DOWNSTREAM: 400
71
+ NUM_MLP_LAYERS: 1
72
+ PREDICTOR_DEPTHS: []
73
+ QUEUE_LEN: 65536
74
+ SEQUENTIAL: false
75
+ SIMCLR_DIST_ON: true
76
+ SWAV_QEUE_LEN: 0
77
+ T: 0.07
78
+ TYPE: mem
79
+ DATA:
80
+ COLOR_RND_GRAYSCALE: 0.0
81
+ DECODING_BACKEND: torchvision
82
+ DECODING_SHORT_SIZE: 256
83
+ DUMMY_LOAD: false
84
+ ENSEMBLE_METHOD: max
85
+ IN22K_TRAINVAL: false
86
+ IN22k_VAL_IN1K: ''
87
+ INPUT_CHANNEL_NUM:
88
+ - 3
89
+ INV_UNIFORM_SAMPLE: true
90
+ IN_VAL_CROP_RATIO: 0.875
91
+ LOADER_CHUNK_OVERALL_SIZE: 0
92
+ LOADER_CHUNK_SIZE: 0
93
+ MEAN:
94
+ - 0.45
95
+ - 0.45
96
+ - 0.45
97
+ MULTI_LABEL: true
98
+ NUM_FRAMES: 16
99
+ PATH_LABEL_SEPARATOR: ' '
100
+ PATH_PREFIX: kabr/KABR/dataset/image
101
+ PATH_TO_DATA_DIR: kabr/KABR/annotation
102
+ PATH_TO_PRELOAD_IMDB: ''
103
+ RANDOM_FLIP: true
104
+ REVERSE_INPUT_CHANNEL: true
105
+ SAMPLING_RATE: 5
106
+ SKIP_ROWS: 0
107
+ SSL_BLUR_SIGMA_MAX:
108
+ - 0.0
109
+ - 2.0
110
+ SSL_BLUR_SIGMA_MIN:
111
+ - 0.0
112
+ - 0.1
113
+ SSL_COLOR_BRI_CON_SAT:
114
+ - 0.2
115
+ - 0.2
116
+ - 0.2
117
+ SSL_COLOR_HUE: 0.1
118
+ SSL_COLOR_JITTER: true
119
+ SSL_MOCOV2_AUG: false
120
+ STD:
121
+ - 0.225
122
+ - 0.225
123
+ - 0.225
124
+ TARGET_FPS: 30
125
+ TEST_CROP_SIZE: 300
126
+ TIME_DIFF_PROB: 0.0
127
+ TRAIN_CROP_NUM_SPATIAL: 1
128
+ TRAIN_CROP_NUM_TEMPORAL: 1
129
+ TRAIN_CROP_SIZE: 300
130
+ TRAIN_JITTER_ASPECT_RELATIVE: []
131
+ TRAIN_JITTER_FPS: 0.0
132
+ TRAIN_JITTER_MOTION_SHIFT: false
133
+ TRAIN_JITTER_SCALES:
134
+ - 300
135
+ - 400
136
+ TRAIN_JITTER_SCALES_RELATIVE: []
137
+ TRAIN_PCA_EIGVAL:
138
+ - 0.225
139
+ - 0.224
140
+ - 0.229
141
+ TRAIN_PCA_EIGVEC:
142
+ - - -0.5675
143
+ - 0.7192
144
+ - 0.4009
145
+ - - -0.5808
146
+ - -0.0045
147
+ - -0.814
148
+ - - -0.5836
149
+ - -0.6948
150
+ - 0.4203
151
+ USE_OFFSET_SAMPLING: false
152
+ DATA_LOADER:
153
+ ENABLE_MULTI_THREAD_DECODE: false
154
+ NUM_WORKERS: 8
155
+ PIN_MEMORY: true
156
+ DEMO:
157
+ BUFFER_SIZE: 0
158
+ CLIP_VIS_SIZE: 10
159
+ COMMON_CLASS_NAMES:
160
+ - watch (a person)
161
+ - talk to (e.g., self, a person, a group)
162
+ - listen to (a person)
163
+ - touch (an object)
164
+ - carry/hold (an object)
165
+ - walk
166
+ - sit
167
+ - lie/sleep
168
+ - bend/bow (at the waist)
169
+ COMMON_CLASS_THRES: 0.7
170
+ DETECTRON2_CFG: COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
171
+ DETECTRON2_THRESH: 0.9
172
+ DETECTRON2_WEIGHTS: detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl
173
+ DISPLAY_HEIGHT: 0
174
+ DISPLAY_WIDTH: 0
175
+ ENABLE: false
176
+ FPS: 30
177
+ GT_BOXES: ''
178
+ INPUT_FORMAT: BGR
179
+ INPUT_VIDEO: kabr/KABR/dataset/video/G0103.mp4
180
+ LABEL_FILE_PATH: kabr/KABR/annotation/classes.json
181
+ NUM_CLIPS_SKIP: 1
182
+ NUM_VIS_INSTANCES: 1
183
+ OUTPUT_FILE: kabr/KABR/dataset/predict/G0103.mp4
184
+ OUTPUT_FPS: -1
185
+ PREDS_BOXES: ''
186
+ SLOWMO: 1
187
+ STARTING_SECOND: 900
188
+ THREAD_ENABLE: false
189
+ UNCOMMON_CLASS_THRES: 0.3
190
+ VIS_MODE: thres
191
+ WEBCAM: -1
192
+ DETECTION:
193
+ ALIGNED: true
194
+ ENABLE: false
195
+ ROI_XFORM_RESOLUTION: 7
196
+ SPATIAL_SCALE_FACTOR: 16
197
+ DIST_BACKEND: nccl
198
+ LOG_MODEL_INFO: true
199
+ LOG_PERIOD: 10
200
+ MASK:
201
+ DECODER_DEPTH: 0
202
+ DECODER_EMBED_DIM: 512
203
+ DECODER_SEP_POS_EMBED: false
204
+ DEC_KV_KERNEL: []
205
+ DEC_KV_STRIDE: []
206
+ ENABLE: false
207
+ HEAD_TYPE: separate
208
+ MAE_ON: false
209
+ MAE_RND_MASK: false
210
+ NORM_PRED_PIXEL: true
211
+ PER_FRAME_MASKING: false
212
+ PRED_HOG: false
213
+ PRETRAIN_DEPTH:
214
+ - 15
215
+ SCALE_INIT_BY_DEPTH: false
216
+ TIME_STRIDE_LOSS: true
217
+ MIXUP:
218
+ ALPHA: 0.8
219
+ CUTMIX_ALPHA: 1.0
220
+ ENABLE: false
221
+ LABEL_SMOOTH_VALUE: 0.1
222
+ PROB: 1.0
223
+ SWITCH_PROB: 0.5
224
+ MODEL:
225
+ ACT_CHECKPOINT: false
226
+ ARCH: x3d
227
+ DETACH_FINAL_FC: false
228
+ DROPCONNECT_RATE: 0.0
229
+ DROPOUT_RATE: 0.5
230
+ FC_INIT_STD: 0.01
231
+ FP16_ALLREDUCE: false
232
+ FROZEN_BN: false
233
+ HEAD_ACT: sigmoid
234
+ LOSS_FUNC: EQL
235
+ MODEL_NAME: X3D
236
+ MULTI_PATHWAY_ARCH:
237
+ - slowfast
238
+ NUM_CLASSES: 8
239
+ SINGLE_PATHWAY_ARCH:
240
+ - 2d
241
+ - c2d
242
+ - i3d
243
+ - slow
244
+ - x3d
245
+ - mvit
246
+ - maskmvit
247
+ MULTIGRID:
248
+ BN_BASE_SIZE: 8
249
+ DEFAULT_B: 0
250
+ DEFAULT_S: 0
251
+ DEFAULT_T: 0
252
+ EPOCH_FACTOR: 1.5
253
+ EVAL_FREQ: 3
254
+ LONG_CYCLE: false
255
+ LONG_CYCLE_FACTORS:
256
+ - - 0.25
257
+ - 0.7071067811865476
258
+ - - 0.5
259
+ - 0.7071067811865476
260
+ - - 0.5
261
+ - 1
262
+ - - 1
263
+ - 1
264
+ LONG_CYCLE_SAMPLING_RATE: 0
265
+ SHORT_CYCLE: false
266
+ SHORT_CYCLE_FACTORS:
267
+ - 0.5
268
+ - 0.7071067811865476
269
+ MVIT:
270
+ CLS_EMBED_ON: true
271
+ DEPTH: 16
272
+ DIM_MUL: []
273
+ DIM_MUL_IN_ATT: false
274
+ DROPOUT_RATE: 0.0
275
+ DROPPATH_RATE: 0.1
276
+ EMBED_DIM: 96
277
+ HEAD_INIT_SCALE: 1.0
278
+ HEAD_MUL: []
279
+ LAYER_SCALE_INIT_VALUE: 0.0
280
+ MLP_RATIO: 4.0
281
+ MODE: conv
282
+ NORM: layernorm
283
+ NORM_STEM: false
284
+ NUM_HEADS: 1
285
+ PATCH_2D: false
286
+ PATCH_KERNEL:
287
+ - 3
288
+ - 7
289
+ - 7
290
+ PATCH_PADDING:
291
+ - 2
292
+ - 4
293
+ - 4
294
+ PATCH_STRIDE:
295
+ - 2
296
+ - 4
297
+ - 4
298
+ POOL_FIRST: false
299
+ POOL_KVQ_KERNEL: null
300
+ POOL_KV_STRIDE: []
301
+ POOL_KV_STRIDE_ADAPTIVE: null
302
+ POOL_Q_STRIDE: []
303
+ QKV_BIAS: true
304
+ REL_POS_SPATIAL: false
305
+ REL_POS_TEMPORAL: false
306
+ REL_POS_ZERO_INIT: false
307
+ RESIDUAL_POOLING: false
308
+ REV:
309
+ BUFFER_LAYERS: []
310
+ ENABLE: false
311
+ PRE_Q_FUSION: avg
312
+ RESPATH_FUSE: concat
313
+ RES_PATH: conv
314
+ SEPARATE_QKV: false
315
+ SEP_POS_EMBED: false
316
+ USE_ABS_POS: true
317
+ USE_FIXED_SINCOS_POS: false
318
+ USE_MEAN_POOLING: false
319
+ ZERO_DECAY_POS_CLS: true
320
+ NONLOCAL:
321
+ GROUP:
322
+ - - 1
323
+ - - 1
324
+ - - 1
325
+ - - 1
326
+ INSTANTIATION: dot_product
327
+ LOCATION:
328
+ - - []
329
+ - - []
330
+ - - []
331
+ - - []
332
+ POOL:
333
+ - - - 1
334
+ - 2
335
+ - 2
336
+ - - 1
337
+ - 2
338
+ - 2
339
+ - - - 1
340
+ - 2
341
+ - 2
342
+ - - 1
343
+ - 2
344
+ - 2
345
+ - - - 1
346
+ - 2
347
+ - 2
348
+ - - 1
349
+ - 2
350
+ - 2
351
+ - - - 1
352
+ - 2
353
+ - 2
354
+ - - 1
355
+ - 2
356
+ - 2
357
+ NUM_GPUS: 8
358
+ NUM_SHARDS: 1
359
+ OUTPUT_DIR: kabr/KABR/logs/x3d-l-kabr
360
+ RESNET:
361
+ DEPTH: 50
362
+ INPLACE_RELU: true
363
+ NUM_BLOCK_TEMP_KERNEL:
364
+ - - 3
365
+ - - 4
366
+ - - 6
367
+ - - 3
368
+ NUM_GROUPS: 1
369
+ SPATIAL_DILATIONS:
370
+ - - 1
371
+ - - 1
372
+ - - 1
373
+ - - 1
374
+ SPATIAL_STRIDES:
375
+ - - 1
376
+ - - 2
377
+ - - 2
378
+ - - 2
379
+ STRIDE_1X1: false
380
+ TRANS_FUNC: x3d_transform
381
+ WIDTH_PER_GROUP: 64
382
+ ZERO_INIT_FINAL_BN: true
383
+ ZERO_INIT_FINAL_CONV: false
384
+ RNG_SEED: 0
385
+ SHARD_ID: 0
386
+ SLOWFAST:
387
+ ALPHA: 8
388
+ BETA_INV: 8
389
+ FUSION_CONV_CHANNEL_RATIO: 2
390
+ FUSION_KERNEL_SZ: 5
391
+ SOLVER:
392
+ BASE_LR: 0.05
393
+ BASE_LR_SCALE_NUM_SHARDS: true
394
+ BETAS:
395
+ - 0.9
396
+ - 0.999
397
+ CLIP_GRAD_L2NORM: null
398
+ CLIP_GRAD_VAL: null
399
+ COSINE_AFTER_WARMUP: false
400
+ COSINE_END_LR: 0.0
401
+ DAMPENING: 0.0
402
+ GAMMA: 0.1
403
+ LARS_ON: false
404
+ LAYER_DECAY: 1.0
405
+ LRS: []
406
+ LR_POLICY: cosine
407
+ MAX_EPOCH: 120
408
+ MOMENTUM: 0.9
409
+ NESTEROV: true
410
+ OPTIMIZING_METHOD: sgd
411
+ STEPS: []
412
+ STEP_SIZE: 1
413
+ WARMUP_EPOCHS: 35.0
414
+ WARMUP_FACTOR: 0.1
415
+ WARMUP_START_LR: 0.01
416
+ WEIGHT_DECAY: 5.0e-05
417
+ ZERO_WD_1D_PARAM: false
418
+ TASK: ''
419
+ TENSORBOARD:
420
+ CATEGORIES_PATH: ''
421
+ CLASS_NAMES_PATH: kabr/KABR/annotation/classes.json
422
+ CONFUSION_MATRIX:
423
+ ENABLE: true
424
+ FIGSIZE:
425
+ - 8
426
+ - 8
427
+ SUBSET_PATH: kabr/KABR/annotation/classes.txt
428
+ ENABLE: true
429
+ HISTOGRAM:
430
+ ENABLE: true
431
+ FIGSIZE:
432
+ - 8
433
+ - 8
434
+ SUBSET_PATH: kabr/KABR/annotation/classes.txt
435
+ TOPK: 3
436
+ LOG_DIR: ''
437
+ MODEL_VIS:
438
+ ACTIVATIONS: true
439
+ COLORMAP: Pastel2
440
+ ENABLE: true
441
+ GRAD_CAM:
442
+ COLORMAP: viridis
443
+ ENABLE: true
444
+ LAYER_LIST:
445
+ - s5/pathway0_res14
446
+ USE_TRUE_LABEL: false
447
+ INPUT_VIDEO: true
448
+ LAYER_LIST:
449
+ - s5/pathway0_res14
450
+ MODEL_WEIGHTS: true
451
+ TOPK_PREDS: 1
452
+ PREDICTIONS_PATH: ''
453
+ WRONG_PRED_VIS:
454
+ ENABLE: false
455
+ SUBSET_PATH: ''
456
+ TAG: Incorrectly classified videos.
457
+ TEST:
458
+ BATCH_SIZE: 64
459
+ CHECKPOINT_FILE_PATH: ''
460
+ CHECKPOINT_TYPE: pytorch
461
+ DATASET: charades
462
+ ENABLE: false
463
+ NUM_ENSEMBLE_VIEWS: 2
464
+ NUM_SPATIAL_CROPS: 1
465
+ NUM_TEMPORAL_CLIPS: []
466
+ SAVE_RESULTS_PATH: kabr/KABR/logs/x3d-l-kabr/results.txt
467
+ TRAIN:
468
+ AUTO_RESUME: true
469
+ BATCH_SIZE: 64
470
+ CHECKPOINT_CLEAR_NAME_PATTERN: []
471
+ CHECKPOINT_EPOCH_RESET: true
472
+ CHECKPOINT_FILE_PATH: slowfast/projects/x3d/x3d_l.pyth
473
+ CHECKPOINT_INFLATE: false
474
+ CHECKPOINT_IN_INIT: false
475
+ CHECKPOINT_PERIOD: 5
476
+ CHECKPOINT_TYPE: pytorch
477
+ DATASET: charades
478
+ ENABLE: true
479
+ EVAL_PERIOD: 5
480
+ KILL_LOSS_EXPLOSION_FACTOR: 0.0
481
+ MIXED_PRECISION: false
482
+ VIS_MASK:
483
+ ENABLE: false
484
+ X3D:
485
+ BN_LIN5: false
486
+ BOTTLENECK_FACTOR: 2.25
487
+ CHANNELWISE_3x3x3: true
488
+ DEPTH_FACTOR: 5.0
489
+ DIM_C1: 12
490
+ DIM_C5: 2048
491
+ SCALE_RES2: false
492
+ WIDTH_FACTOR: 2.0