diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..901061412743b3f85fa1dce42444ccc6932e0a34
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+.DS_Store
+__pycache__/
+.idea/
+.tmp/
+.vscode/
+bdd/
+runs/
+inference/
+*.pth
+*.pt
+*.tar
+*.tar.gz
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..46e89ae9c33188a6435a43d01387d5b28863fadc
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Hust Visual Learning Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 4de009a6228d46abcca4170bb53b063225b9636d..9a0788dbfcbccb952cff4a849d7e9de16c8fb5e3 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,289 @@
-YOLOP
\ No newline at end of file
+
+
+## You Only :eyes: Once for Panoptic :car: Perception
+> [**You Only Look at Once for Panoptic driving Perception**](https://arxiv.org/abs/2108.11250)
+>
+> by Dong Wu, Manwen Liao, Weitian Zhang, [Xinggang Wang](https://xinggangw.info/)
:email: [*School of EIC, HUST*](http://eic.hust.edu.cn/English/Home.htm)
+>
+> (
:email:) corresponding author.
+>
+> *arXiv technical report ([arXiv 2108.11250](https://arxiv.org/abs/2108.11250))*
+
+---
+
+### The Illustration of YOLOP
+
+![yolop](pictures/yolop.png)
+
+### Contributions
+
+* We put forward an efficient multi-task network that can jointly handle three crucial tasks in autonomous driving: object detection, drivable area segmentation and lane detection to save computational costs, reduce inference time as well as improve the performance of each task. Our work is the first to reach real-time on embedded devices while maintaining state-of-the-art level performance on the `BDD100K `dataset.
+
+* We design the ablative experiments to verify the effectiveness of our multi-tasking scheme. It is proved that the three tasks can be learned jointly without tedious alternating optimization.
+
+
+
+### Results
+
+#### Traffic Object Detection Result
+
+| Model | Recall(%) | mAP50(%) | Speed(fps) |
+| -------------- | --------- | -------- | ---------- |
+| `Multinet` | 81.3 | 60.2 | 8.6 |
+| `DLT-Net` | 89.4 | 68.4 | 9.3 |
+| `Faster R-CNN` | 77.2 | 55.6 | 5.3 |
+| `YOLOv5s` | 86.8 | 77.2 | 82 |
+| `YOLOP(ours)` | 89.2 | 76.5 | 41 |
+#### Drivable Area Segmentation Result
+
+| Model | mIOU(%) | Speed(fps) |
+| ------------- | ------- | ---------- |
+| `Multinet` | 71.6 | 8.6 |
+| `DLT-Net` | 71.3 | 9.3 |
+| `PSPNet` | 89.6 | 11.1 |
+| `YOLOP(ours)` | 91.5 | 41 |
+
+#### Lane Detection Result:
+
+| Model | mIOU(%) | IOU(%) |
+| ------------- | ------- | ------ |
+| `ENet` | 34.12 | 14.64 |
+| `SCNN` | 35.79 | 15.84 |
+| `ENet-SAD` | 36.56 | 16.02 |
+| `YOLOP(ours)` | 70.50 | 26.20 |
+
+#### Ablation Studies 1: End-to-end v.s. Step-by-step:
+
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ |
+| `ES-W` | 87.0 | 75.3 | 90.4 | 66.8 | 26.2 |
+| `ED-W` | 87.3 | 76.0 | 91.6 | 71.2 | 26.1 |
+| `ES-D-W` | 87.0 | 75.1 | 91.7 | 68.6 | 27.0 |
+| `ED-S-W` | 87.5 | 76.1 | 91.6 | 68.0 | 26.8 |
+| `End-to-end` | 89.2 | 76.5 | 91.5 | 70.5 | 26.2 |
+
+#### Ablation Studies 2: Multi-task v.s. Single task:
+
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
+| `Det(only)` | 88.2 | 76.9 | - | - | - | 15.7 |
+| `Da-Seg(only)` | - | - | 92.0 | - | - | 14.8 |
+| `Ll-Seg(only)` | - | - | - | 79.6 | 27.9 | 14.8 |
+| `Multitask` | 89.2 | 76.5 | 91.5 | 70.5 | 26.2 | 24.4 |
+
+**Notes**:
+
+- The works we has use for reference including `Multinet` ([paper](https://arxiv.org/pdf/1612.07695.pdf?utm_campaign=affiliate-ir-Optimise%20media%28%20South%20East%20Asia%29%20Pte.%20ltd._156_-99_national_R_all_ACQ_cpa_en&utm_content=&utm_source=%20388939),[code](https://github.com/MarvinTeichmann/MultiNet)),`DLT-Net` ([paper](https://ieeexplore.ieee.org/abstract/document/8937825)),`Faster R-CNN` ([paper](https://proceedings.neurips.cc/paper/2015/file/14bfa6bb14875e45bba028a21ed38046-Paper.pdf),[code](https://github.com/ShaoqingRen/faster_rcnn)),`YOLOv5s`([code](https://github.com/ultralytics/yolov5)) ,`PSPNet`([paper](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf),[code](https://github.com/hszhao/PSPNet)) ,`ENet`([paper](https://arxiv.org/pdf/1606.02147.pdf),[code](https://github.com/osmr/imgclsmob)) `SCNN`([paper](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/16802/16322),[code](https://github.com/XingangPan/SCNN)) `SAD-ENet`([paper](https://openaccess.thecvf.com/content_ICCV_2019/papers/Hou_Learning_Lightweight_Lane_Detection_CNNs_by_Self_Attention_Distillation_ICCV_2019_paper.pdf),[code](https://github.com/cardwing/Codes-for-Lane-Detection)). Thanks for their wonderful works.
+- In table 4, E, D, S and W refer to Encoder, Detect head, two Segment heads and whole network. So the Algorithm (First, we only train Encoder and Detect head. Then we freeze the Encoder and Detect head as well as train two Segmentation heads. Finally, the entire network is trained jointly for all three tasks.) can be marked as ED-S-W, and the same for others.
+
+---
+
+### Visualization
+
+#### Traffic Object Detection Result
+
+![detect result](pictures/detect.png)
+
+#### Drivable Area Segmentation Result
+
+![](pictures/da.png)
+
+#### Lane Detection Result
+
+![](pictures/ll.png)
+
+**Notes**:
+
+- The visualization of lane detection result has been post processed by quadratic fitting.
+
+---
+
+### Project Structure
+
+```python
+├─inference
+│ ├─images # inference images
+│ ├─output # inference result
+├─lib
+│ ├─config/default # configuration of training and validation
+│ ├─core
+│ │ ├─activations.py # activation function
+│ │ ├─evaluate.py # calculation of metric
+│ │ ├─function.py # training and validation of model
+│ │ ├─general.py #calculation of metric、nms、conversion of data-format、visualization
+│ │ ├─loss.py # loss function
+│ │ ├─postprocess.py # postprocess(refine da-seg and ll-seg, unrelated to paper)
+│ ├─dataset
+│ │ ├─AutoDriveDataset.py # Superclass dataset,general function
+│ │ ├─bdd.py # Subclass dataset,specific function
+│ │ ├─hust.py # Subclass dataset(Campus scene, unrelated to paper)
+│ │ ├─convect.py
+│ │ ├─DemoDataset.py # demo dataset(image, video and stream)
+│ ├─models
+│ │ ├─YOLOP.py # Setup and Configuration of model
+│ │ ├─light.py # Model lightweight(unrelated to paper, zwt)
+│ │ ├─commom.py # calculation module
+│ ├─utils
+│ │ ├─augmentations.py # data augumentation
+│ │ ├─autoanchor.py # auto anchor(k-means)
+│ │ ├─split_dataset.py # (Campus scene, unrelated to paper)
+│ │ ├─utils.py # logging、device_select、time_measure、optimizer_select、model_save&initialize 、Distributed training
+│ ├─run
+│ │ ├─dataset/training time # Visualization, logging and model_save
+├─tools
+│ │ ├─demo.py # demo(folder、camera)
+│ │ ├─test.py
+│ │ ├─train.py
+├─toolkits
+│ │ ├─depoly # Deployment of model
+├─weights # Pretraining model
+```
+
+---
+
+### Requirement
+
+This codebase has been developed with python version 3.7, PyTorch 1.7+ and torchvision 0.8+:
+
+```
+conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch
+```
+
+See `requirements.txt` for additional dependencies and version requirements.
+
+```setup
+pip install -r requirements.txt
+```
+
+### Data preparation
+
+#### Download
+
+- Download the images from [images](https://bdd-data.berkeley.edu/).
+
+- Download the annotations of detection from [det_annotations](https://drive.google.com/file/d/1Ge-R8NTxG1eqd4zbryFo-1Uonuh0Nxyl/view?usp=sharing).
+- Download the annotations of drivable area segmentation from [da_seg_annotations](https://drive.google.com/file/d/1xy_DhUZRHR8yrZG3OwTQAHhYTnXn7URv/view?usp=sharing).
+- Download the annotations of lane line segmentation from [ll_seg_annotations](https://drive.google.com/file/d/1lDNTPIQj_YLNZVkksKM25CvCHuquJ8AP/view?usp=sharing).
+
+We recommend the dataset directory structure to be the following:
+
+```
+# The id represent the correspondence relation
+├─dataset root
+│ ├─images
+│ │ ├─train
+│ │ ├─val
+│ ├─det_annotations
+│ │ ├─train
+│ │ ├─val
+│ ├─da_seg_annotations
+│ │ ├─train
+│ │ ├─val
+│ ├─ll_seg_annotations
+│ │ ├─train
+│ │ ├─val
+```
+
+Update the your dataset path in the `./lib/config/default.py`.
+
+### Training
+
+You can set the training configuration in the `./lib/config/default.py`. (Including: the loading of preliminary model, loss, data augmentation, optimizer, warm-up and cosine annealing, auto-anchor, training epochs, batch_size).
+
+If you want try alternating optimization or train model for single task, please modify the corresponding configuration in `./lib/config/default.py` to `True`. (As following, all configurations is `False`, which means training multiple tasks end to end).
+
+```python
+# Alternating optimization
+_C.TRAIN.SEG_ONLY = False # Only train two segmentation branchs
+_C.TRAIN.DET_ONLY = False # Only train detection branch
+_C.TRAIN.ENC_SEG_ONLY = False # Only train encoder and two segmentation branchs
+_C.TRAIN.ENC_DET_ONLY = False # Only train encoder and detection branch
+
+# Single task
+_C.TRAIN.DRIVABLE_ONLY = False # Only train da_segmentation task
+_C.TRAIN.LANE_ONLY = False # Only train ll_segmentation task
+_C.TRAIN.DET_ONLY = False # Only train detection task
+```
+
+Start training:
+
+```shell
+python tools/train.py
+```
+
+
+
+### Evaluation
+
+You can set the evaluation configuration in the `./lib/config/default.py`. (Including: batch_size and threshold value for nms).
+
+Start evaluating:
+
+```shell
+python tools/test.py --weights weights/End-to-end.pth
+```
+
+
+
+### Demo Test
+
+We provide two testing method.
+
+#### Folder
+
+You can store the image or video in `--source`, and then save the reasoning result to `--save-dir`
+
+```shell
+python tools/demo --source inference/images
+```
+
+
+
+#### Camera
+
+If there are any camera connected to your computer, you can set the `source` as the camera number(The default is 0).
+
+```shell
+python tools/demo --source 0
+```
+
+
+
+#### Demonstration
+
+
+
+ input |
+ output |
+
+
+ |
+ |
+
+
+ |
+ |
+
+
+
+
+
+### Deployment
+
+Our model can reason in real-time on `Jetson Tx2`, with `Zed Camera` to capture image. We use `TensorRT` tool for speeding up. We provide code for deployment and reasoning of model in `./tools/deploy`.
+
+
+
+## Citation
+
+If you find our paper and code useful for your research, please consider giving a star :star: and citation :pencil: :
+
+```BibTeX
+@misc{2108.11250,
+Author = {Dong Wu and Manwen Liao and Weitian Zhang and Xinggang Wang},
+Title = {YOLOP: You Only Look Once for Panoptic Driving Perception},
+Year = {2021},
+Eprint = {arXiv:2108.11250},
+}
+```
+
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/lib/config/__init__.py b/lib/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..34052907947a5431f48239e16ec45820a06bc441
--- /dev/null
+++ b/lib/config/__init__.py
@@ -0,0 +1,2 @@
+from .default import _C as cfg
+from .default import update_config
\ No newline at end of file
diff --git a/lib/config/default.py b/lib/config/default.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1f044e55f4aea100484fb50028a17cd82ff80e2
--- /dev/null
+++ b/lib/config/default.py
@@ -0,0 +1,157 @@
+import os
+from yacs.config import CfgNode as CN
+
+
+_C = CN()
+
+_C.LOG_DIR = 'runs/'
+_C.GPUS = (0,1)
+_C.WORKERS = 8
+_C.PIN_MEMORY = False
+_C.PRINT_FREQ = 20
+_C.AUTO_RESUME =False # Resume from the last training interrupt
+_C.NEED_AUTOANCHOR = False # Re-select the prior anchor(k-means) When training from scratch (epoch=0), set it to be ture!
+_C.DEBUG = False
+_C.num_seg_class = 2
+
+# Cudnn related params
+_C.CUDNN = CN()
+_C.CUDNN.BENCHMARK = True
+_C.CUDNN.DETERMINISTIC = False
+_C.CUDNN.ENABLED = True
+
+
+# common params for NETWORK
+_C.MODEL = CN(new_allowed=True)
+_C.MODEL.NAME = ''
+_C.MODEL.STRU_WITHSHARE = False #add share_block to segbranch
+_C.MODEL.HEADS_NAME = ['']
+_C.MODEL.PRETRAINED = ""
+_C.MODEL.PRETRAINED_DET = ""
+_C.MODEL.IMAGE_SIZE = [640, 640] # width * height, ex: 192 * 256
+_C.MODEL.EXTRA = CN(new_allowed=True)
+
+
+# loss params
+_C.LOSS = CN(new_allowed=True)
+_C.LOSS.LOSS_NAME = ''
+_C.LOSS.MULTI_HEAD_LAMBDA = None
+_C.LOSS.FL_GAMMA = 0.0 # focal loss gamma
+_C.LOSS.CLS_POS_WEIGHT = 1.0 # classification loss positive weights
+_C.LOSS.OBJ_POS_WEIGHT = 1.0 # object loss positive weights
+_C.LOSS.SEG_POS_WEIGHT = 1.0 # segmentation loss positive weights
+_C.LOSS.BOX_GAIN = 0.05 # box loss gain
+_C.LOSS.CLS_GAIN = 0.5 # classification loss gain
+_C.LOSS.OBJ_GAIN = 1.0 # object loss gain
+_C.LOSS.DA_SEG_GAIN = 0.2 # driving area segmentation loss gain
+_C.LOSS.LL_SEG_GAIN = 0.2 # lane line segmentation loss gain
+_C.LOSS.LL_IOU_GAIN = 0.2 # lane line iou loss gain
+
+
+# DATASET related params
+_C.DATASET = CN(new_allowed=True)
+_C.DATASET.DATAROOT = '/home/zwt/bdd/bdd100k/images/100k' # the path of images folder
+_C.DATASET.LABELROOT = '/home/zwt/bdd/bdd100k/labels/100k' # the path of det_annotations folder
+_C.DATASET.MASKROOT = '/home/zwt/bdd/bdd_seg_gt' # the path of da_seg_annotations folder
+_C.DATASET.LANEROOT = '/home/zwt/bdd/bdd_lane_gt' # the path of ll_seg_annotations folder
+_C.DATASET.DATASET = 'BddDataset'
+_C.DATASET.TRAIN_SET = 'train'
+_C.DATASET.TEST_SET = 'val'
+_C.DATASET.DATA_FORMAT = 'jpg'
+_C.DATASET.SELECT_DATA = False
+_C.DATASET.ORG_IMG_SIZE = [720, 1280]
+
+# training data augmentation
+_C.DATASET.FLIP = True
+_C.DATASET.SCALE_FACTOR = 0.25
+_C.DATASET.ROT_FACTOR = 10
+_C.DATASET.TRANSLATE = 0.1
+_C.DATASET.SHEAR = 0.0
+_C.DATASET.COLOR_RGB = False
+_C.DATASET.HSV_H = 0.015 # image HSV-Hue augmentation (fraction)
+_C.DATASET.HSV_S = 0.7 # image HSV-Saturation augmentation (fraction)
+_C.DATASET.HSV_V = 0.4 # image HSV-Value augmentation (fraction)
+# TODO: more augmet params to add
+
+
+# train
+_C.TRAIN = CN(new_allowed=True)
+_C.TRAIN.LR0 = 0.001 # initial learning rate (SGD=1E-2, Adam=1E-3)
+_C.TRAIN.LRF = 0.2 # final OneCycleLR learning rate (lr0 * lrf)
+_C.TRAIN.WARMUP_EPOCHS = 3.0
+_C.TRAIN.WARMUP_BIASE_LR = 0.1
+_C.TRAIN.WARMUP_MOMENTUM = 0.8
+
+_C.TRAIN.OPTIMIZER = 'adam'
+_C.TRAIN.MOMENTUM = 0.937
+_C.TRAIN.WD = 0.0005
+_C.TRAIN.NESTEROV = True
+_C.TRAIN.GAMMA1 = 0.99
+_C.TRAIN.GAMMA2 = 0.0
+
+_C.TRAIN.BEGIN_EPOCH = 0
+_C.TRAIN.END_EPOCH = 240
+
+_C.TRAIN.VAL_FREQ = 1
+_C.TRAIN.BATCH_SIZE_PER_GPU =24
+_C.TRAIN.SHUFFLE = True
+
+_C.TRAIN.IOU_THRESHOLD = 0.2
+_C.TRAIN.ANCHOR_THRESHOLD = 4.0
+
+# if training 3 tasks end-to-end, set all parameters as True
+# Alternating optimization
+_C.TRAIN.SEG_ONLY = False # Only train two segmentation branchs
+_C.TRAIN.DET_ONLY = False # Only train detection branch
+_C.TRAIN.ENC_SEG_ONLY = False # Only train encoder and two segmentation branchs
+_C.TRAIN.ENC_DET_ONLY = False # Only train encoder and detection branch
+
+# Single task
+_C.TRAIN.DRIVABLE_ONLY = False # Only train da_segmentation task
+_C.TRAIN.LANE_ONLY = False # Only train ll_segmentation task
+_C.TRAIN.DET_ONLY = False # Only train detection task
+
+
+
+
+_C.TRAIN.PLOT = True #
+
+# testing
+_C.TEST = CN(new_allowed=True)
+_C.TEST.BATCH_SIZE_PER_GPU = 24
+_C.TEST.MODEL_FILE = ''
+_C.TEST.SAVE_JSON = False
+_C.TEST.SAVE_TXT = False
+_C.TEST.PLOTS = True
+_C.TEST.NMS_CONF_THRESHOLD = 0.001
+_C.TEST.NMS_IOU_THRESHOLD = 0.6
+
+
+def update_config(cfg, args):
+ cfg.defrost()
+ # cfg.merge_from_file(args.cfg)
+
+ if args.modelDir:
+ cfg.OUTPUT_DIR = args.modelDir
+
+ if args.logDir:
+ cfg.LOG_DIR = args.logDir
+
+ # if args.conf_thres:
+ # cfg.TEST.NMS_CONF_THRESHOLD = args.conf_thres
+
+ # if args.iou_thres:
+ # cfg.TEST.NMS_IOU_THRESHOLD = args.iou_thres
+
+
+
+ # cfg.MODEL.PRETRAINED = os.path.join(
+ # cfg.DATA_DIR, cfg.MODEL.PRETRAINED
+ # )
+ #
+ # if cfg.TEST.MODEL_FILE:
+ # cfg.TEST.MODEL_FILE = os.path.join(
+ # cfg.DATA_DIR, cfg.TEST.MODEL_FILE
+ # )
+
+ cfg.freeze()
diff --git a/lib/core/__init__.py b/lib/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8baaf0be623aaa200324da599152ce234f40d21
--- /dev/null
+++ b/lib/core/__init__.py
@@ -0,0 +1 @@
+from .function import AverageMeter
\ No newline at end of file
diff --git a/lib/core/activations.py b/lib/core/activations.py
new file mode 100644
index 0000000000000000000000000000000000000000..24f5a30f021fdf0b639116a8c32d6135a2d16750
--- /dev/null
+++ b/lib/core/activations.py
@@ -0,0 +1,72 @@
+# Activation functions
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
+class Swish(nn.Module): #
+ @staticmethod
+ def forward(x):
+ return x * torch.sigmoid(x)
+
+
+class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
+ @staticmethod
+ def forward(x):
+ # return x * F.hardsigmoid(x) # for torchscript and CoreML
+ return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
+
+
+class MemoryEfficientSwish(nn.Module):
+ class F(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, x):
+ ctx.save_for_backward(x)
+ return x * torch.sigmoid(x)
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ x = ctx.saved_tensors[0]
+ sx = torch.sigmoid(x)
+ return grad_output * (sx * (1 + x * (1 - sx)))
+
+ def forward(self, x):
+ return self.F.apply(x)
+
+
+# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
+class Mish(nn.Module):
+ @staticmethod
+ def forward(x):
+ return x * F.softplus(x).tanh()
+
+
+class MemoryEfficientMish(nn.Module):
+ class F(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, x):
+ ctx.save_for_backward(x)
+ return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ x = ctx.saved_tensors[0]
+ sx = torch.sigmoid(x)
+ fx = F.softplus(x).tanh()
+ return grad_output * (fx + x * sx * (1 - fx * fx))
+
+ def forward(self, x):
+ return self.F.apply(x)
+
+
+# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
+class FReLU(nn.Module):
+ def __init__(self, c1, k=3): # ch_in, kernel
+ super().__init__()
+ self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
+ self.bn = nn.BatchNorm2d(c1)
+
+ def forward(self, x):
+ return torch.max(x, self.bn(self.conv(x)))
diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5812c68aec58898b3eaa9e8a66dc7e9d27bf85c
--- /dev/null
+++ b/lib/core/evaluate.py
@@ -0,0 +1,278 @@
+# Model validation metrics
+
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+
+from . import general
+
+
+def fitness(x):
+ # Model fitness as a weighted combination of metrics
+ w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
+ return (x[:, :4] * w).sum(1)
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
+ """ Compute the average precision, given the recall and precision curves.
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+ # Arguments
+ tp: True positives (nparray, nx1 or nx10).
+ conf: Objectness value from 0-1 (nparray).
+ pred_cls: Predicted object classes (nparray).
+ target_cls: True object classes (nparray).
+ plot: Plot precision-recall curve at mAP@0.5
+ save_dir: Plot save directory
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+
+ # Sort by objectness
+ i = np.argsort(-conf) # sorted index from big to small
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+ # Find unique classes, each number just showed up once
+ unique_classes = np.unique(target_cls)
+
+ # Create Precision-Recall curve and compute AP for each class
+ px, py = np.linspace(0, 1, 1000), [] # for plotting
+ pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
+ s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
+ ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000))
+ for ci, c in enumerate(unique_classes):
+ i = pred_cls == c
+ n_l = (target_cls == c).sum() # number of labels
+ n_p = i.sum() # number of predictions
+
+ if n_p == 0 or n_l == 0:
+ continue
+ else:
+ # Accumulate FPs and TPs
+ fpc = (1 - tp[i]).cumsum(0)
+ tpc = tp[i].cumsum(0)
+
+ # Recall
+ recall = tpc / (n_l + 1e-16) # recall curve
+ r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # r at pr_score, negative x, xp because xp decreases
+
+ # Precision
+ precision = tpc / (tpc + fpc) # precision curve
+ p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
+
+ # AP from recall-precision curve
+ for j in range(tp.shape[1]):
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+ if plot and (j == 0):
+ py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
+
+ # Compute F1 score (harmonic mean of precision and recall)
+ f1 = 2 * p * r / (p + r + 1e-16)
+ i = r.mean(0).argmax()
+
+ if plot:
+ plot_pr_curve(px, py, ap, save_dir, names)
+
+ return p[:, i], r[:, i], ap, f1, unique_classes.astype('int32')
+
+
+def compute_ap(recall, precision):
+ """ Compute the average precision, given the recall and precision curves
+ # Arguments
+ recall: The recall curve (list)
+ precision: The precision curve (list)
+ # Returns
+ Average precision, precision curve, recall curve
+ """
+
+ # Append sentinel values to beginning and end
+ mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
+ mpre = np.concatenate(([1.], precision, [0.]))
+
+ # Compute the precision envelope
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+ # Integrate area under curve
+ method = 'interp' # methods: 'continuous', 'interp'
+ if method == 'interp':
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
+ else: # 'continuous'
+ i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
+
+ return ap, mpre, mrec
+
+
+class ConfusionMatrix:
+ # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
+ def __init__(self, nc, conf=0.25, iou_thres=0.45):
+ self.matrix = np.zeros((nc + 1, nc + 1))
+ self.nc = nc # number of classes
+ self.conf = conf
+ self.iou_thres = iou_thres
+
+ def process_batch(self, detections, labels):
+ """
+ Return intersection-over-union (Jaccard index) of boxes.
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+ Arguments:
+ detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+ labels (Array[M, 5]), class, x1, y1, x2, y2
+ Returns:
+ None, updates confusion matrix accordingly
+ """
+ detections = detections[detections[:, 4] > self.conf]
+ gt_classes = labels[:, 0].int()
+ detection_classes = detections[:, 5].int()
+ iou = general.box_iou(labels[:, 1:], detections[:, :4])
+
+ x = torch.where(iou > self.iou_thres)
+ if x[0].shape[0]:
+ matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
+ if x[0].shape[0] > 1:
+ matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+ matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+ else:
+ matches = np.zeros((0, 3))
+
+ n = matches.shape[0] > 0
+ m0, m1, _ = matches.transpose().astype(np.int16)
+ for i, gc in enumerate(gt_classes):
+ j = m0 == i
+ if n and sum(j) == 1:
+ self.matrix[gc, detection_classes[m1[j]]] += 1 # correct
+ else:
+ self.matrix[gc, self.nc] += 1 # background FP
+
+ if n:
+ for i, dc in enumerate(detection_classes):
+ if not any(m1 == i):
+ self.matrix[self.nc, dc] += 1 # background FN
+
+ def matrix(self):
+ return self.matrix
+
+ def plot(self, save_dir='', names=()):
+ try:
+ import seaborn as sn
+
+ array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize
+ array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
+
+ fig = plt.figure(figsize=(12, 9), tight_layout=True)
+ sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
+ labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
+ sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
+ xticklabels=names + ['background FN'] if labels else "auto",
+ yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
+ fig.axes[0].set_xlabel('True')
+ fig.axes[0].set_ylabel('Predicted')
+ fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
+ except Exception as e:
+ pass
+
+ def print(self):
+ for i in range(self.nc + 1):
+ print(' '.join(map(str, self.matrix[i])))
+
+class SegmentationMetric(object):
+ '''
+ imgLabel [batch_size, height(144), width(256)]
+ confusionMatrix [[0(TN),1(FP)],
+ [2(FN),3(TP)]]
+ '''
+ def __init__(self, numClass):
+ self.numClass = numClass
+ self.confusionMatrix = np.zeros((self.numClass,)*2)
+
+ def pixelAccuracy(self):
+ # return all class overall pixel accuracy
+ # acc = (TP + TN) / (TP + TN + FP + TN)
+ acc = np.diag(self.confusionMatrix).sum() / self.confusionMatrix.sum()
+ return acc
+
+ def lineAccuracy(self):
+ Acc = np.diag(self.confusionMatrix) / (self.confusionMatrix.sum(axis=1) + 1e-12)
+ return Acc[1]
+
+ def classPixelAccuracy(self):
+ # return each category pixel accuracy(A more accurate way to call it precision)
+ # acc = (TP) / TP + FP
+ classAcc = np.diag(self.confusionMatrix) / (self.confusionMatrix.sum(axis=0) + 1e-12)
+ return classAcc
+
+ def meanPixelAccuracy(self):
+ classAcc = self.classPixelAccuracy()
+ meanAcc = np.nanmean(classAcc)
+ return meanAcc
+
+ def meanIntersectionOverUnion(self):
+ # Intersection = TP Union = TP + FP + FN
+ # IoU = TP / (TP + FP + FN)
+ intersection = np.diag(self.confusionMatrix)
+ union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(self.confusionMatrix)
+ IoU = intersection / union
+ IoU[np.isnan(IoU)] = 0
+ mIoU = np.nanmean(IoU)
+ return mIoU
+
+ def IntersectionOverUnion(self):
+ intersection = np.diag(self.confusionMatrix)
+ union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(self.confusionMatrix)
+ IoU = intersection / union
+ IoU[np.isnan(IoU)] = 0
+ return IoU[1]
+
+ def genConfusionMatrix(self, imgPredict, imgLabel):
+ # remove classes from unlabeled pixels in gt image and predict
+ # print(imgLabel.shape)
+ mask = (imgLabel >= 0) & (imgLabel < self.numClass)
+ label = self.numClass * imgLabel[mask] + imgPredict[mask]
+ count = np.bincount(label, minlength=self.numClass**2)
+ confusionMatrix = count.reshape(self.numClass, self.numClass)
+ return confusionMatrix
+
+ def Frequency_Weighted_Intersection_over_Union(self):
+ # FWIOU = [(TP+FN)/(TP+FP+TN+FN)] *[TP / (TP + FP + FN)]
+ freq = np.sum(self.confusionMatrix, axis=1) / np.sum(self.confusionMatrix)
+ iu = np.diag(self.confusionMatrix) / (
+ np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) -
+ np.diag(self.confusionMatrix))
+ FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
+ return FWIoU
+
+
+ def addBatch(self, imgPredict, imgLabel):
+ assert imgPredict.shape == imgLabel.shape
+ self.confusionMatrix += self.genConfusionMatrix(imgPredict, imgLabel)
+
+ def reset(self):
+ self.confusionMatrix = np.zeros((self.numClass, self.numClass))
+
+
+
+
+
+# Plots ----------------------------------------------------------------------------------------------------------------
+
+def plot_pr_curve(px, py, ap, save_dir='.', names=()):
+ fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+ py = np.stack(py, axis=1)
+
+ if 0 < len(names) < 21: # show mAP in legend if < 10 classes
+ for i, y in enumerate(py.T):
+ ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
+ else:
+ ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
+
+ ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
+ ax.set_xlabel('Recall')
+ ax.set_ylabel('Precision')
+ ax.set_xlim(0, 1)
+ ax.set_ylim(0, 1)
+ plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+ fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
diff --git a/lib/core/function.py b/lib/core/function.py
new file mode 100644
index 0000000000000000000000000000000000000000..554b1fb388c1368daa496e0e6f0a2350367adb21
--- /dev/null
+++ b/lib/core/function.py
@@ -0,0 +1,510 @@
+import time
+from lib.core.evaluate import ConfusionMatrix,SegmentationMetric
+from lib.core.general import non_max_suppression,check_img_size,scale_coords,xyxy2xywh,xywh2xyxy,box_iou,coco80_to_coco91_class,plot_images,ap_per_class,output_to_target
+from lib.utils.utils import time_synchronized
+from lib.utils import plot_img_and_mask,plot_one_box,show_seg_result
+import torch
+from threading import Thread
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+from pathlib import Path
+import json
+import random
+import cv2
+import os
+import math
+from torch.cuda import amp
+from tqdm import tqdm
+
+
+def train(cfg, train_loader, model, criterion, optimizer, scaler, epoch, num_batch, num_warmup,
+ writer_dict, logger, device, rank=-1):
+ """
+ train for one epoch
+
+ Inputs:
+ - config: configurations
+ - train_loader: loder for data
+ - model:
+ - criterion: (function) calculate all the loss, return total_loss, head_losses
+ - writer_dict:
+ outputs(2,)
+ output[0] len:3, [1,3,32,32,85], [1,3,16,16,85], [1,3,8,8,85]
+ output[1] len:1, [2,256,256]
+ output[2] len:1, [2,256,256]
+ target(2,)
+ target[0] [1,n,5]
+ target[1] [2,256,256]
+ target[2] [2,256,256]
+ Returns:
+ None
+
+ """
+ batch_time = AverageMeter()
+ data_time = AverageMeter()
+ losses = AverageMeter()
+
+ # switch to train mode
+ model.train()
+ start = time.time()
+ for i, (input, target, paths, shapes) in enumerate(train_loader):
+ intermediate = time.time()
+ #print('tims:{}'.format(intermediate-start))
+ num_iter = i + num_batch * (epoch - 1)
+
+ if num_iter < num_warmup:
+ # warm up
+ lf = lambda x: ((1 + math.cos(x * math.pi / cfg.TRAIN.END_EPOCH)) / 2) * \
+ (1 - cfg.TRAIN.LRF) + cfg.TRAIN.LRF # cosine
+ xi = [0, num_warmup]
+ # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
+ for j, x in enumerate(optimizer.param_groups):
+ # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+ x['lr'] = np.interp(num_iter, xi, [cfg.TRAIN.WARMUP_BIASE_LR if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+ if 'momentum' in x:
+ x['momentum'] = np.interp(num_iter, xi, [cfg.TRAIN.WARMUP_MOMENTUM, cfg.TRAIN.MOMENTUM])
+
+ data_time.update(time.time() - start)
+ if not cfg.DEBUG:
+ input = input.to(device, non_blocking=True)
+ assign_target = []
+ for tgt in target:
+ assign_target.append(tgt.to(device))
+ target = assign_target
+ with amp.autocast(enabled=device.type != 'cpu'):
+ outputs = model(input)
+ total_loss, head_losses = criterion(outputs, target, shapes,model)
+ # print(head_losses)
+
+ # compute gradient and do update step
+ optimizer.zero_grad()
+ scaler.scale(total_loss).backward()
+ scaler.step(optimizer)
+ scaler.update()
+
+ if rank in [-1, 0]:
+ # measure accuracy and record loss
+ losses.update(total_loss.item(), input.size(0))
+
+ # _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
+ # target.detach().cpu().numpy())
+ # acc.update(avg_acc, cnt)
+
+ # measure elapsed time
+ batch_time.update(time.time() - start)
+ end = time.time()
+ if i % cfg.PRINT_FREQ == 0:
+ msg = 'Epoch: [{0}][{1}/{2}]\t' \
+ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+ 'Speed {speed:.1f} samples/s\t' \
+ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+ 'Loss {loss.val:.5f} ({loss.avg:.5f})'.format(
+ epoch, i, len(train_loader), batch_time=batch_time,
+ speed=input.size(0)/batch_time.val,
+ data_time=data_time, loss=losses)
+ logger.info(msg)
+
+ writer = writer_dict['writer']
+ global_steps = writer_dict['train_global_steps']
+ writer.add_scalar('train_loss', losses.val, global_steps)
+ # writer.add_scalar('train_acc', acc.val, global_steps)
+ writer_dict['train_global_steps'] = global_steps + 1
+
+
+def validate(epoch,config, val_loader, val_dataset, model, criterion, output_dir,
+ tb_log_dir, writer_dict=None, logger=None, device='cpu', rank=-1):
+ """
+ validata
+
+ Inputs:
+ - config: configurations
+ - train_loader: loder for data
+ - model:
+ - criterion: (function) calculate all the loss, return
+ - writer_dict:
+
+ Return:
+ None
+ """
+ # setting
+ max_stride = 32
+ weights = None
+
+ save_dir = output_dir + os.path.sep + 'visualization'
+ if not os.path.exists(save_dir):
+ os.mkdir(save_dir)
+
+ # print(save_dir)
+ _, imgsz = [check_img_size(x, s=max_stride) for x in config.MODEL.IMAGE_SIZE] #imgsz is multiple of max_stride
+ batch_size = config.TRAIN.BATCH_SIZE_PER_GPU * len(config.GPUS)
+ test_batch_size = config.TEST.BATCH_SIZE_PER_GPU * len(config.GPUS)
+ training = False
+ is_coco = False #is coco dataset
+ save_conf=False # save auto-label confidences
+ verbose=False
+ save_hybrid=False
+ log_imgs,wandb = min(16,100), None
+
+ nc = 1
+ iouv = torch.linspace(0.5,0.95,10).to(device) #iou vector for mAP@0.5:0.95
+ niou = iouv.numel()
+
+ try:
+ import wandb
+ except ImportError:
+ wandb = None
+ log_imgs = 0
+
+ seen = 0
+ confusion_matrix = ConfusionMatrix(nc=model.nc) #detector confusion matrix
+ da_metric = SegmentationMetric(config.num_seg_class) #segment confusion matrix
+ ll_metric = SegmentationMetric(2) #segment confusion matrix
+
+ names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+ colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
+ coco91class = coco80_to_coco91_class()
+
+ s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
+ p, r, f1, mp, mr, map50, map, t_inf, t_nms = 0., 0., 0., 0., 0., 0., 0., 0., 0.
+
+ losses = AverageMeter()
+
+ da_acc_seg = AverageMeter()
+ da_IoU_seg = AverageMeter()
+ da_mIoU_seg = AverageMeter()
+
+ ll_acc_seg = AverageMeter()
+ ll_IoU_seg = AverageMeter()
+ ll_mIoU_seg = AverageMeter()
+
+ T_inf = AverageMeter()
+ T_nms = AverageMeter()
+
+ # switch to train mode
+ model.eval()
+ jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
+
+ for batch_i, (img, target, paths, shapes) in tqdm(enumerate(val_loader), total=len(val_loader)):
+ if not config.DEBUG:
+ img = img.to(device, non_blocking=True)
+ assign_target = []
+ for tgt in target:
+ assign_target.append(tgt.to(device))
+ target = assign_target
+ nb, _, height, width = img.shape #batch size, channel, height, width
+
+ with torch.no_grad():
+ pad_w, pad_h = shapes[0][1][1]
+ pad_w = int(pad_w)
+ pad_h = int(pad_h)
+ ratio = shapes[0][1][0][0]
+
+ t = time_synchronized()
+ det_out, da_seg_out, ll_seg_out= model(img)
+ t_inf = time_synchronized() - t
+ if batch_i > 0:
+ T_inf.update(t_inf/img.size(0),img.size(0))
+
+ inf_out,train_out = det_out
+
+ #driving area segment evaluation
+ _,da_predict=torch.max(da_seg_out, 1)
+ _,da_gt=torch.max(target[1], 1)
+ da_predict = da_predict[:, pad_h:height-pad_h, pad_w:width-pad_w]
+ da_gt = da_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
+
+ da_metric.reset()
+ da_metric.addBatch(da_predict.cpu(), da_gt.cpu())
+ da_acc = da_metric.pixelAccuracy()
+ da_IoU = da_metric.IntersectionOverUnion()
+ da_mIoU = da_metric.meanIntersectionOverUnion()
+
+ da_acc_seg.update(da_acc,img.size(0))
+ da_IoU_seg.update(da_IoU,img.size(0))
+ da_mIoU_seg.update(da_mIoU,img.size(0))
+
+ #lane line segment evaluation
+ _,ll_predict=torch.max(ll_seg_out, 1)
+ _,ll_gt=torch.max(target[2], 1)
+ ll_predict = ll_predict[:, pad_h:height-pad_h, pad_w:width-pad_w]
+ ll_gt = ll_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
+
+ ll_metric.reset()
+ ll_metric.addBatch(ll_predict.cpu(), ll_gt.cpu())
+ ll_acc = ll_metric.lineAccuracy()
+ ll_IoU = ll_metric.IntersectionOverUnion()
+ ll_mIoU = ll_metric.meanIntersectionOverUnion()
+
+ ll_acc_seg.update(ll_acc,img.size(0))
+ ll_IoU_seg.update(ll_IoU,img.size(0))
+ ll_mIoU_seg.update(ll_mIoU,img.size(0))
+
+ total_loss, head_losses = criterion((train_out,da_seg_out, ll_seg_out), target, shapes,model) #Compute loss
+ losses.update(total_loss.item(), img.size(0))
+
+ #NMS
+ t = time_synchronized()
+ target[0][:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
+ lb = [target[0][target[0][:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
+ output = non_max_suppression(inf_out, conf_thres= config.TEST.NMS_CONF_THRESHOLD, iou_thres=config.TEST.NMS_IOU_THRESHOLD, labels=lb)
+ #output = non_max_suppression(inf_out, conf_thres=0.001, iou_thres=0.6)
+ #output = non_max_suppression(inf_out, conf_thres=config.TEST.NMS_CONF_THRES, iou_thres=config.TEST.NMS_IOU_THRES)
+ t_nms = time_synchronized() - t
+ if batch_i > 0:
+ T_nms.update(t_nms/img.size(0),img.size(0))
+
+ if config.TEST.PLOTS:
+ if batch_i == 0:
+ for i in range(test_batch_size):
+ img_test = cv2.imread(paths[i])
+ da_seg_mask = da_seg_out[i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+ da_seg_mask = torch.nn.functional.interpolate(da_seg_mask, scale_factor=int(1/ratio), mode='bilinear')
+ _, da_seg_mask = torch.max(da_seg_mask, 1)
+
+ da_gt_mask = target[1][i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+ da_gt_mask = torch.nn.functional.interpolate(da_gt_mask, scale_factor=int(1/ratio), mode='bilinear')
+ _, da_gt_mask = torch.max(da_gt_mask, 1)
+
+ da_seg_mask = da_seg_mask.int().squeeze().cpu().numpy()
+ da_gt_mask = da_gt_mask.int().squeeze().cpu().numpy()
+ # seg_mask = seg_mask > 0.5
+ # plot_img_and_mask(img_test, seg_mask, i,epoch,save_dir)
+ img_test1 = img_test.copy()
+ _ = show_seg_result(img_test, da_seg_mask, i,epoch,save_dir)
+ _ = show_seg_result(img_test1, da_gt_mask, i, epoch, save_dir, is_gt=True)
+
+ img_ll = cv2.imread(paths[i])
+ ll_seg_mask = ll_seg_out[i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+ ll_seg_mask = torch.nn.functional.interpolate(ll_seg_mask, scale_factor=int(1/ratio), mode='bilinear')
+ _, ll_seg_mask = torch.max(ll_seg_mask, 1)
+
+ ll_gt_mask = target[2][i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+ ll_gt_mask = torch.nn.functional.interpolate(ll_gt_mask, scale_factor=int(1/ratio), mode='bilinear')
+ _, ll_gt_mask = torch.max(ll_gt_mask, 1)
+
+ ll_seg_mask = ll_seg_mask.int().squeeze().cpu().numpy()
+ ll_gt_mask = ll_gt_mask.int().squeeze().cpu().numpy()
+ # seg_mask = seg_mask > 0.5
+ # plot_img_and_mask(img_test, seg_mask, i,epoch,save_dir)
+ img_ll1 = img_ll.copy()
+ _ = show_seg_result(img_ll, ll_seg_mask, i,epoch,save_dir, is_ll=True)
+ _ = show_seg_result(img_ll1, ll_gt_mask, i, epoch, save_dir, is_ll=True, is_gt=True)
+
+ img_det = cv2.imread(paths[i])
+ img_gt = img_det.copy()
+ det = output[i].clone()
+ if len(det):
+ det[:,:4] = scale_coords(img[i].shape[1:],det[:,:4],img_det.shape).round()
+ for *xyxy,conf,cls in reversed(det):
+ #print(cls)
+ label_det_pred = f'{names[int(cls)]} {conf:.2f}'
+ plot_one_box(xyxy, img_det , label=label_det_pred, color=colors[int(cls)], line_thickness=3)
+ cv2.imwrite(save_dir+"/batch_{}_{}_det_pred.png".format(epoch,i),img_det)
+
+ labels = target[0][target[0][:, 0] == i, 1:]
+ # print(labels)
+ labels[:,1:5]=xywh2xyxy(labels[:,1:5])
+ if len(labels):
+ labels[:,1:5]=scale_coords(img[i].shape[1:],labels[:,1:5],img_gt.shape).round()
+ for cls,x1,y1,x2,y2 in labels:
+ #print(names)
+ #print(cls)
+ label_det_gt = f'{names[int(cls)]}'
+ xyxy = (x1,y1,x2,y2)
+ plot_one_box(xyxy, img_gt , label=label_det_gt, color=colors[int(cls)], line_thickness=3)
+ cv2.imwrite(save_dir+"/batch_{}_{}_det_gt.png".format(epoch,i),img_gt)
+
+ # Statistics per image
+ # output([xyxy,conf,cls])
+ # target[0] ([img_id,cls,xyxy])
+ for si, pred in enumerate(output):
+ labels = target[0][target[0][:, 0] == si, 1:] #all object in one image
+ nl = len(labels) # num of object
+ tcls = labels[:, 0].tolist() if nl else [] # target class
+ path = Path(paths[si])
+ seen += 1
+
+ if len(pred) == 0:
+ if nl:
+ stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+ continue
+
+ # Predictions
+ predn = pred.clone()
+ scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred
+
+ # Append to text file
+ if config.TEST.SAVE_TXT:
+ gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh
+ for *xyxy, conf, cls in predn.tolist():
+ xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
+ line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
+ with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
+ f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+ # W&B logging
+ if config.TEST.PLOTS and len(wandb_images) < log_imgs:
+ box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
+ "class_id": int(cls),
+ "box_caption": "%s %.3f" % (names[cls], conf),
+ "scores": {"class_score": conf},
+ "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
+ boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
+ wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name))
+
+ # Append to pycocotools JSON dictionary
+ if config.TEST.SAVE_JSON:
+ # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
+ image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+ box = xyxy2xywh(predn[:, :4]) # xywh
+ box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
+ for p, b in zip(pred.tolist(), box.tolist()):
+ jdict.append({'image_id': image_id,
+ 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
+ 'bbox': [round(x, 3) for x in b],
+ 'score': round(p[4], 5)})
+
+
+ # Assign all predictions as incorrect
+ correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
+ if nl:
+ detected = [] # target indices
+ tcls_tensor = labels[:, 0]
+
+ # target boxes
+ tbox = xywh2xyxy(labels[:, 1:5])
+ scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels
+ if config.TEST.PLOTS:
+ confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1))
+
+ # Per target class
+ for cls in torch.unique(tcls_tensor):
+ ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices
+ pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices
+
+ # Search for detections
+ if pi.shape[0]:
+ # Prediction to target ious
+ # n*m n:pred m:label
+ ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices
+ # Append detections
+ detected_set = set()
+ for j in (ious > iouv[0]).nonzero(as_tuple=False):
+ d = ti[i[j]] # detected target
+ if d.item() not in detected_set:
+ detected_set.add(d.item())
+ detected.append(d)
+ correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn
+ if len(detected) == nl: # all targets already located in image
+ break
+
+ # Append statistics (correct, conf, pcls, tcls)
+ stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
+
+ if config.TEST.PLOTS and batch_i < 3:
+ f = save_dir +'/'+ f'test_batch{batch_i}_labels.jpg' # labels
+ #Thread(target=plot_images, args=(img, target[0], paths, f, names), daemon=True).start()
+ f = save_dir +'/'+ f'test_batch{batch_i}_pred.jpg' # predictions
+ #Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start()
+
+ # Compute statistics
+ # stats : [[all_img_correct]...[all_img_tcls]]
+ stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy zip(*) :unzip
+
+ map70 = None
+ map75 = None
+ if len(stats) and stats[0].any():
+ p, r, ap, f1, ap_class = ap_per_class(*stats, plot=False, save_dir=save_dir, names=names)
+ ap50, ap70, ap75,ap = ap[:, 0], ap[:,4], ap[:,5],ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]
+ mp, mr, map50, map70, map75, map = p.mean(), r.mean(), ap50.mean(), ap70.mean(),ap75.mean(),ap.mean()
+ nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
+ else:
+ nt = torch.zeros(1)
+
+ # Print results
+ pf = '%20s' + '%12.3g' * 6 # print format
+ print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
+ #print(map70)
+ #print(map75)
+
+ # Print results per class
+ if (verbose or (nc <= 20 and not training)) and nc > 1 and len(stats):
+ for i, c in enumerate(ap_class):
+ print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+
+ # Print speeds
+ t = tuple(x / seen * 1E3 for x in (t_inf, t_nms, t_inf + t_nms)) + (imgsz, imgsz, batch_size) # tuple
+ if not training:
+ print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
+
+ # Plots
+ if config.TEST.PLOTS:
+ confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+ if wandb and wandb.run:
+ wandb.log({"Images": wandb_images})
+ wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]})
+
+ # Save JSON
+ if config.TEST.SAVE_JSON and len(jdict):
+ w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights
+ anno_json = '../coco/annotations/instances_val2017.json' # annotations json
+ pred_json = str(save_dir / f"{w}_predictions.json") # predictions json
+ print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
+ with open(pred_json, 'w') as f:
+ json.dump(jdict, f)
+
+ try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+ from pycocotools.coco import COCO
+ from pycocotools.cocoeval import COCOeval
+
+ anno = COCO(anno_json) # init annotations api
+ pred = anno.loadRes(pred_json) # init predictions api
+ eval = COCOeval(anno, pred, 'bbox')
+ if is_coco:
+ eval.params.imgIds = [int(Path(x).stem) for x in val_loader.dataset.img_files] # image IDs to evaluate
+ eval.evaluate()
+ eval.accumulate()
+ eval.summarize()
+ map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
+ except Exception as e:
+ print(f'pycocotools unable to run: {e}')
+
+ # Return results
+ if not training:
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if config.TEST.SAVE_TXT else ''
+ print(f"Results saved to {save_dir}{s}")
+ model.float() # for training
+ maps = np.zeros(nc) + map
+ for i, c in enumerate(ap_class):
+ maps[c] = ap[i]
+
+ da_segment_result = (da_acc_seg.avg,da_IoU_seg.avg,da_mIoU_seg.avg)
+ ll_segment_result = (ll_acc_seg.avg,ll_IoU_seg.avg,ll_mIoU_seg.avg)
+
+ # print(da_segment_result)
+ # print(ll_segment_result)
+ detect_result = np.asarray([mp, mr, map50, map])
+ # print('mp:{},mr:{},map50:{},map:{}'.format(mp, mr, map50, map))
+ #print segmet_result
+ t = [T_inf.avg, T_nms.avg]
+ return da_segment_result, ll_segment_result, detect_result, losses.avg, maps, t
+
+
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count if self.count != 0 else 0
\ No newline at end of file
diff --git a/lib/core/general.py b/lib/core/general.py
new file mode 100644
index 0000000000000000000000000000000000000000..4834490cdf298454ca542caded1ea5c09985e3cc
--- /dev/null
+++ b/lib/core/general.py
@@ -0,0 +1,466 @@
+import glob
+import logging
+import os
+import platform
+import random
+import re
+import shutil
+import subprocess
+import time
+import torchvision
+from contextlib import contextmanager
+from copy import copy
+from pathlib import Path
+
+import cv2
+import math
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn as nn
+import yaml
+from PIL import Image
+from scipy.cluster.vq import kmeans
+from scipy.signal import butter, filtfilt
+from tqdm import tqdm
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
+ # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+ box2 = box2.T
+
+ # Get the coordinates of bounding boxes
+ if x1y1x2y2: # x1, y1, x2, y2 = box1
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+ else: # transform from xywh to xyxy
+ b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+ b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+ b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+ b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+ # Intersection area
+ inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+
+ # Union Area
+ w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+ w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+ union = w1 * h1 + w2 * h2 - inter + eps
+
+ iou = inter / union
+ if GIoU or DIoU or CIoU:
+ cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
+ ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
+ if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+ c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
+ rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
+ (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
+ if DIoU:
+ return iou - rho2 / c2 # DIoU
+ elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+ v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+ with torch.no_grad():
+ alpha = v / ((1 + eps) - iou + v)
+ return iou - (rho2 / c2 + v * alpha) # CIoU
+ else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
+ c_area = cw * ch + eps # convex area
+ return iou - (c_area - union) / c_area # GIoU
+ else:
+ return iou # IoU
+
+
+def box_iou(box1, box2):
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+ """
+ Return intersection-over-union (Jaccard index) of boxes.
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+ Arguments:
+ box1 (Tensor[N, 4])
+ box2 (Tensor[M, 4])
+ Returns:
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
+ IoU values for every element in boxes1 and boxes2
+ """
+
+ def box_area(box):
+ # box = 4xn
+ return (box[2] - box[0]) * (box[3] - box[1]) #(x2-x1)*(y2-y1)
+
+ area1 = box_area(box1.T)
+ area2 = box_area(box2.T)
+
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+ inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+ return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
+
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
+ """Performs Non-Maximum Suppression (NMS) on inference results
+
+ Returns:
+ detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+ """
+
+ nc = prediction.shape[2] - 5 # number of classes
+ xc = prediction[..., 4] > conf_thres # candidates
+
+ # Settings
+ min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
+ max_det = 300 # maximum number of detections per image
+ max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
+ time_limit = 10.0 # seconds to quit after
+ redundant = True # require redundant detections
+ multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
+ merge = False # use merge-NMS
+
+ t = time.time()
+ output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+ for xi, x in enumerate(prediction): # image index, image inference
+ # Apply constraints
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
+ x = x[xc[xi]] # confidence
+
+ # Cat apriori labels if autolabelling
+ if labels and len(labels[xi]):
+ l = labels[xi]
+ v = torch.zeros((len(l), nc + 5), device=x.device)
+ v[:, :4] = l[:, 1:5] # box
+ v[:, 4] = 1.0 # conf
+ v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
+ x = torch.cat((x, v), 0)
+
+ # If none remain process next image
+ if not x.shape[0]:
+ continue
+
+ # Compute conf
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
+
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+ box = xywh2xyxy(x[:, :4])
+
+ # Detections matrix nx6 (xyxy, conf, cls)
+ if multi_label:
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+ else: # best class only
+ conf, j = x[:, 5:].max(1, keepdim=True)
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+
+ # Filter by class
+ if classes is not None:
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+ # Apply finite constraint
+ # if not torch.isfinite(x).all():
+ # x = x[torch.isfinite(x).all(1)]
+
+ # Check shape
+ n = x.shape[0] # number of boxes
+ if not n: # no boxes
+ continue
+ elif n > max_nms: # excess boxes
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
+
+ # Batched NMS
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
+ if i.shape[0] > max_det: # limit detections
+ i = i[:max_det]
+ if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
+ weights = iou * scores[None] # box weights
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
+ if redundant:
+ i = i[iou.sum(1) > 1] # require redundancy
+
+ output[xi] = x[i]
+ if (time.time() - t) > time_limit:
+ print(f'WARNING: NMS time limit {time_limit}s exceeded')
+ break # time limit exceeded
+
+ return output
+
+
+def xywh2xyxy(x):
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+ y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
+ return y
+
+def fitness(x):
+ # Returns fitness (for use with results.txt or evolve.txt)
+ w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
+ return (x[:, :4] * w).sum(1)
+
+def check_img_size(img_size, s=32):
+ # Verify img_size is a multiple of stride s
+ new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
+ if new_size != img_size:
+ print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+ return new_size
+
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+ # Rescale coords (xyxy) from img1_shape to img0_shape
+ if ratio_pad is None: # calculate from img0_shape
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
+ else:
+ gain = ratio_pad[0][0]
+ pad = ratio_pad[1]
+
+ coords[:, [0, 2]] -= pad[0] # x padding
+ coords[:, [1, 3]] -= pad[1] # y padding
+ coords[:, :4] /= gain
+ clip_coords(coords, img0_shape)
+ return coords
+
+def clip_coords(boxes, img_shape):
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
+
+def make_divisible(x, divisor):
+ # Returns x evenly divisible by divisor
+ return math.ceil(x / divisor) * divisor
+
+def xyxy2xywh(x):
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+ y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
+ y[:, 2] = x[:, 2] - x[:, 0] # width
+ y[:, 3] = x[:, 3] - x[:, 1] # height
+ return y
+
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
+ # Plot image grid with labels
+
+ if isinstance(images, torch.Tensor):
+ images = images.cpu().float().numpy()
+ if isinstance(targets, torch.Tensor):
+ targets = targets.cpu().numpy()
+
+ # un-normalise
+ if np.max(images[0]) <= 1:
+ images *= 255
+
+ tl = 3 # line thickness
+ tf = max(tl - 1, 1) # font thickness
+ bs, _, h, w = images.shape # batch size, _, height, width
+ bs = min(bs, max_subplots) # limit plot images
+ ns = np.ceil(bs ** 0.5) # number of subplots (square)
+
+ # Check if we should resize
+ scale_factor = max_size / max(h, w)
+ if scale_factor < 1:
+ h = math.ceil(scale_factor * h)
+ w = math.ceil(scale_factor * w)
+
+ colors = color_list() # list of colors
+ mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
+ for i, img in enumerate(images):
+ if i == max_subplots: # if last batch has fewer images than we expect
+ break
+
+ block_x = int(w * (i // ns))
+ block_y = int(h * (i % ns))
+
+ img = img.transpose(1, 2, 0)
+ if scale_factor < 1:
+ img = cv2.resize(img, (w, h))
+
+ mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
+ if len(targets) > 0:
+ image_targets = targets[targets[:, 0] == i]
+ boxes = xywh2xyxy(image_targets[:, 2:6]).T
+ classes = image_targets[:, 1].astype('int')
+ labels = image_targets.shape[1] == 6 # labels if no conf column
+ conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred)
+
+ if boxes.shape[1]:
+ if boxes.max() <= 1.01: # if normalized with tolerance 0.01
+ boxes[[0, 2]] *= w # scale to pixels
+ boxes[[1, 3]] *= h
+ elif scale_factor < 1: # absolute coords need scale if image scales
+ boxes *= scale_factor
+ boxes[[0, 2]] += block_x
+ boxes[[1, 3]] += block_y
+ for j, box in enumerate(boxes.T):
+ cls = int(classes[j])
+ color = colors[cls % len(colors)]
+ cls = names[cls] if names else cls
+ if labels or conf[j] > 0.25: # 0.25 conf thresh
+ label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
+ plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
+
+ # Draw image filename labels
+ if paths:
+ label = Path(paths[i]).name[:40] # trim to 40 char
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+ cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
+ lineType=cv2.LINE_AA)
+
+ # Image border
+ cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
+
+ if fname:
+ r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
+ mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
+ # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save
+ Image.fromarray(mosaic).save(fname) # PIL save
+ return mosaic
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+ # Plots one bounding box on image img
+ tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
+ color = color or [random.randint(0, 255) for _ in range(3)]
+ c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+ cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+ if label:
+ tf = max(tl - 1, 1) # font thickness
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+ c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+ cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
+ cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+
+def color_list():
+ # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
+ def hex2rgb(h):
+ return tuple(int(str(h[1 + i:1 + i + 2]), 16) for i in (0, 2, 4))
+
+ return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']]
+
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
+ """ Compute the average precision, given the recall and precision curves.
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+ # Arguments
+ tp: True positives (nparray, nx1 or nx10).
+ conf: Objectness value from 0-1 (nparray).
+ pred_cls: Predicted object classes (nparray).
+ target_cls: True object classes (nparray).
+ plot: Plot precision-recall curve at mAP@0.5
+ save_dir: Plot save directory
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+
+ # Sort by objectness
+ i = np.argsort(-conf)
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+ # Find unique classes
+ unique_classes = np.unique(target_cls)
+
+ # Create Precision-Recall curve and compute AP for each class
+ px, py = np.linspace(0, 1, 1000), [] # for plotting
+ pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
+ s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
+ ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000))
+ for ci, c in enumerate(unique_classes):
+ i = pred_cls == c
+ n_l = (target_cls == c).sum() # number of labels
+ n_p = i.sum() # number of predictions
+
+ if n_p == 0 or n_l == 0:
+ continue
+ else:
+ # Accumulate FPs and TPs
+ fpc = (1 - tp[i]).cumsum(0)
+ tpc = tp[i].cumsum(0)
+
+ # Recall
+ recall = tpc / (n_l + 1e-16) # recall curve
+ r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
+
+ # Precision
+ precision = tpc / (tpc + fpc) # precision curve
+ p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
+ # AP from recall-precision curve
+ for j in range(tp.shape[1]):
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+ if plot and (j == 0):
+ py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
+
+ # Compute F1 score (harmonic mean of precision and recall)
+ f1 = 2 * p * r / (p + r + 1e-16)
+ i=r.mean(0).argmax()
+
+ if plot:
+ plot_pr_curve(px, py, ap, save_dir, names)
+
+ return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
+
+def compute_ap(recall, precision):
+ """ Compute the average precision, given the recall and precision curves.
+ Source: https://github.com/rbgirshick/py-faster-rcnn.
+ # Arguments
+ recall: The recall curve (list).
+ precision: The precision curve (list).
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+
+ # Append sentinel values to beginning and end
+ mrec = np.concatenate(([0.], recall, [recall[-1] + 1E-3]))
+ mpre = np.concatenate(([1.], precision, [0.]))
+
+ # Compute the precision envelope
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+ # Integrate area under curve
+ method = 'interp' # methods: 'continuous', 'interp'
+ if method == 'interp':
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
+
+ else: # 'continuous'
+ i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
+
+ return ap, mpre, mrec
+
+def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
+ # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
+ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
+ # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
+ # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
+ # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
+ x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+ return x
+
+def output_to_target(output):
+ # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+ targets = []
+ for i, o in enumerate(output):
+ for *box, conf, cls in o.cpu().numpy():
+ targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
+ return np.array(targets)
+
+def plot_pr_curve(px, py, ap, save_dir='.', names=()):
+ fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+ py = np.stack(py, axis=1)
+
+ if 0 < len(names) < 21: # show mAP in legend if < 10 classes
+ for i, y in enumerate(py.T):
+ ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
+ else:
+ ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
+
+ ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
+ ax.set_xlabel('Recall')
+ ax.set_ylabel('Precision')
+ ax.set_xlim(0, 1)
+ ax.set_ylim(0, 1)
+ plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+ fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
\ No newline at end of file
diff --git a/lib/core/loss.py b/lib/core/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dbeaa925f94ba92c8547931a4e989c4e34caf56
--- /dev/null
+++ b/lib/core/loss.py
@@ -0,0 +1,237 @@
+import torch.nn as nn
+import torch
+from .general import bbox_iou
+from .postprocess import build_targets
+from lib.core.evaluate import SegmentationMetric
+
+class MultiHeadLoss(nn.Module):
+ """
+ collect all the loss we need
+ """
+ def __init__(self, losses, cfg, lambdas=None):
+ """
+ Inputs:
+ - losses: (list)[nn.Module, nn.Module, ...]
+ - cfg: config object
+ - lambdas: (list) + IoU loss, weight for each loss
+ """
+ super().__init__()
+ # lambdas: [cls, obj, iou, la_seg, ll_seg, ll_iou]
+ if not lambdas:
+ lambdas = [1.0 for _ in range(len(losses) + 3)]
+ assert all(lam >= 0.0 for lam in lambdas)
+
+ self.losses = nn.ModuleList(losses)
+ self.lambdas = lambdas
+ self.cfg = cfg
+
+ def forward(self, head_fields, head_targets, shapes, model):
+ """
+ Inputs:
+ - head_fields: (list) output from each task head
+ - head_targets: (list) ground-truth for each task head
+ - model:
+
+ Returns:
+ - total_loss: sum of all the loss
+ - head_losses: (tuple) contain all loss[loss1, loss2, ...]
+
+ """
+ # head_losses = [ll
+ # for l, f, t in zip(self.losses, head_fields, head_targets)
+ # for ll in l(f, t)]
+ #
+ # assert len(self.lambdas) == len(head_losses)
+ # loss_values = [lam * l
+ # for lam, l in zip(self.lambdas, head_losses)
+ # if l is not None]
+ # total_loss = sum(loss_values) if loss_values else None
+ # print(model.nc)
+ total_loss, head_losses = self._forward_impl(head_fields, head_targets, shapes, model)
+
+ return total_loss, head_losses
+
+ def _forward_impl(self, predictions, targets, shapes, model):
+ """
+
+ Args:
+ predictions: predicts of [[det_head1, det_head2, det_head3], drive_area_seg_head, lane_line_seg_head]
+ targets: gts [det_targets, segment_targets, lane_targets]
+ model:
+
+ Returns:
+ total_loss: sum of all the loss
+ head_losses: list containing losses
+
+ """
+ cfg = self.cfg
+ device = targets[0].device
+ lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
+ tcls, tbox, indices, anchors = build_targets(cfg, predictions[0], targets[0], model) # targets
+
+ # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+ cp, cn = smooth_BCE(eps=0.0)
+
+ BCEcls, BCEobj, BCEseg = self.losses
+
+ # Calculate Losses
+ nt = 0 # number of targets
+ no = len(predictions[0]) # number of outputs
+ balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
+
+ # calculate detection loss
+ for i, pi in enumerate(predictions[0]): # layer index, layer predictions
+ b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
+ tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
+
+ n = b.shape[0] # number of targets
+ if n:
+ nt += n # cumulative targets
+ ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
+
+ # Regression
+ pxy = ps[:, :2].sigmoid() * 2. - 0.5
+ pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+ pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
+ iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
+ lbox += (1.0 - iou).mean() # iou loss
+
+ # Objectness
+ tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio
+
+ # Classification
+ # print(model.nc)
+ if model.nc > 1: # cls loss (only if multiple classes)
+ t = torch.full_like(ps[:, 5:], cn, device=device) # targets
+ t[range(n), tcls[i]] = cp
+ lcls += BCEcls(ps[:, 5:], t) # BCE
+ lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
+
+ drive_area_seg_predicts = predictions[1].view(-1)
+ drive_area_seg_targets = targets[1].view(-1)
+ lseg_da = BCEseg(drive_area_seg_predicts, drive_area_seg_targets)
+
+ lane_line_seg_predicts = predictions[2].view(-1)
+ lane_line_seg_targets = targets[2].view(-1)
+ lseg_ll = BCEseg(lane_line_seg_predicts, lane_line_seg_targets)
+
+ metric = SegmentationMetric(2)
+ nb, _, height, width = targets[1].shape
+ pad_w, pad_h = shapes[0][1][1]
+ pad_w = int(pad_w)
+ pad_h = int(pad_h)
+ _,lane_line_pred=torch.max(predictions[2], 1)
+ _,lane_line_gt=torch.max(targets[2], 1)
+ lane_line_pred = lane_line_pred[:, pad_h:height-pad_h, pad_w:width-pad_w]
+ lane_line_gt = lane_line_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
+ metric.reset()
+ metric.addBatch(lane_line_pred.cpu(), lane_line_gt.cpu())
+ IoU = metric.IntersectionOverUnion()
+ liou_ll = 1 - IoU
+
+ s = 3 / no # output count scaling
+ lcls *= cfg.LOSS.CLS_GAIN * s * self.lambdas[0]
+ lobj *= cfg.LOSS.OBJ_GAIN * s * (1.4 if no == 4 else 1.) * self.lambdas[1]
+ lbox *= cfg.LOSS.BOX_GAIN * s * self.lambdas[2]
+
+ lseg_da *= cfg.LOSS.DA_SEG_GAIN * self.lambdas[3]
+ lseg_ll *= cfg.LOSS.LL_SEG_GAIN * self.lambdas[4]
+ liou_ll *= cfg.LOSS.LL_IOU_GAIN * self.lambdas[5]
+
+
+ if cfg.TRAIN.DET_ONLY or cfg.TRAIN.ENC_DET_ONLY or cfg.TRAIN.DET_ONLY:
+ lseg_da = 0 * lseg_da
+ lseg_ll = 0 * lseg_ll
+ liou_ll = 0 * liou_ll
+
+ if cfg.TRAIN.SEG_ONLY or cfg.TRAIN.ENC_SEG_ONLY:
+ lcls = 0 * lcls
+ lobj = 0 * lobj
+ lbox = 0 * lbox
+
+ if cfg.TRAIN.LANE_ONLY:
+ lcls = 0 * lcls
+ lobj = 0 * lobj
+ lbox = 0 * lbox
+ lseg_da = 0 * lseg_da
+
+ if cfg.TRAIN.DRIVABLE_ONLY:
+ lcls = 0 * lcls
+ lobj = 0 * lobj
+ lbox = 0 * lbox
+ lseg_ll = 0 * lseg_ll
+ liou_ll = 0 * liou_ll
+
+ loss = lbox + lobj + lcls + lseg_da + lseg_ll + liou_ll
+ # loss = lseg
+ # return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
+ return loss, (lbox.item(), lobj.item(), lcls.item(), lseg_da.item(), lseg_ll.item(), liou_ll.item(), loss.item())
+
+
+def get_loss(cfg, device):
+ """
+ get MultiHeadLoss
+
+ Inputs:
+ -cfg: configuration use the loss_name part or
+ function part(like regression classification)
+ -device: cpu or gpu device
+
+ Returns:
+ -loss: (MultiHeadLoss)
+
+ """
+ # class loss criteria
+ BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.CLS_POS_WEIGHT])).to(device)
+ # object loss criteria
+ BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.OBJ_POS_WEIGHT])).to(device)
+ # segmentation loss criteria
+ BCEseg = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.SEG_POS_WEIGHT])).to(device)
+ # Focal loss
+ gamma = cfg.LOSS.FL_GAMMA # focal loss gamma
+ if gamma > 0:
+ BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma)
+
+ loss_list = [BCEcls, BCEobj, BCEseg]
+ loss = MultiHeadLoss(loss_list, cfg=cfg, lambdas=cfg.LOSS.MULTI_HEAD_LAMBDA)
+ return loss
+
+# example
+# class L1_Loss(nn.Module)
+
+
+def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
+ # return positive, negative label smoothing BCE targets
+ return 1.0 - 0.5 * eps, 0.5 * eps
+
+
+class FocalLoss(nn.Module):
+ # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
+ def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
+ # alpha balance positive & negative samples
+ # gamma focus on difficult samples
+ super(FocalLoss, self).__init__()
+ self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
+ self.gamma = gamma
+ self.alpha = alpha
+ self.reduction = loss_fcn.reduction
+ self.loss_fcn.reduction = 'none' # required to apply FL to each element
+
+ def forward(self, pred, true):
+ loss = self.loss_fcn(pred, true)
+ # p_t = torch.exp(-loss)
+ # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
+
+ # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
+ pred_prob = torch.sigmoid(pred) # prob from logits
+ p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
+ alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
+ modulating_factor = (1.0 - p_t) ** self.gamma
+ loss *= alpha_factor * modulating_factor
+
+ if self.reduction == 'mean':
+ return loss.mean()
+ elif self.reduction == 'sum':
+ return loss.sum()
+ else: # 'none'
+ return loss
diff --git a/lib/core/postprocess.py b/lib/core/postprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..3050f48bf3d28434d88e20cd9fdda6273bb95f76
--- /dev/null
+++ b/lib/core/postprocess.py
@@ -0,0 +1,244 @@
+import torch
+from lib.utils import is_parallel
+import numpy as np
+np.set_printoptions(threshold=np.inf)
+import cv2
+from sklearn.cluster import DBSCAN
+
+
+def build_targets(cfg, predictions, targets, model):
+ '''
+ predictions
+ [16, 3, 32, 32, 85]
+ [16, 3, 16, 16, 85]
+ [16, 3, 8, 8, 85]
+ torch.tensor(predictions[i].shape)[[3, 2, 3, 2]]
+ [32,32,32,32]
+ [16,16,16,16]
+ [8,8,8,8]
+ targets[3,x,7]
+ t [index, class, x, y, w, h, head_index]
+ '''
+ # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+ det = model.module.model[model.module.detector_index] if is_parallel(model) \
+ else model.model[model.detector_index] # Detect() module
+ # print(type(model))
+ # det = model.model[model.detector_index]
+ # print(type(det))
+ na, nt = det.na, targets.shape[0] # number of anchors, targets
+ tcls, tbox, indices, anch = [], [], [], []
+ gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
+ ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
+ targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
+
+ g = 0.5 # bias
+ off = torch.tensor([[0, 0],
+ [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
+ # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
+ ], device=targets.device).float() * g # offsets
+
+ for i in range(det.nl):
+ anchors = det.anchors[i] #[3,2]
+ gain[2:6] = torch.tensor(predictions[i].shape)[[3, 2, 3, 2]] # xyxy gain
+ # Match targets to anchors
+ t = targets * gain
+
+ if nt:
+ # Matches
+ r = t[:, :, 4:6] / anchors[:, None] # wh ratio
+ j = torch.max(r, 1. / r).max(2)[0] < cfg.TRAIN.ANCHOR_THRESHOLD # compare
+ # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
+ t = t[j] # filter
+
+ # Offsets
+ gxy = t[:, 2:4] # grid xy
+ gxi = gain[[2, 3]] - gxy # inverse
+ j, k = ((gxy % 1. < g) & (gxy > 1.)).T
+ l, m = ((gxi % 1. < g) & (gxi > 1.)).T
+ j = torch.stack((torch.ones_like(j), j, k, l, m))
+ t = t.repeat((5, 1, 1))[j]
+ offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+ else:
+ t = targets[0]
+ offsets = 0
+
+ # Define
+ b, c = t[:, :2].long().T # image, class
+ gxy = t[:, 2:4] # grid xy
+ gwh = t[:, 4:6] # grid wh
+ gij = (gxy - offsets).long()
+ gi, gj = gij.T # grid xy indices
+
+ # Append
+ a = t[:, 6].long() # anchor indices
+ indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
+ tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
+ anch.append(anchors[a]) # anchors
+ tcls.append(c) # class
+
+ return tcls, tbox, indices, anch
+
+def morphological_process(image, kernel_size=5, func_type=cv2.MORPH_CLOSE):
+ """
+ morphological process to fill the hole in the binary segmentation result
+ :param image:
+ :param kernel_size:
+ :return:
+ """
+ if len(image.shape) == 3:
+ raise ValueError('Binary segmentation result image should be a single channel image')
+
+ if image.dtype is not np.uint8:
+ image = np.array(image, np.uint8)
+
+ kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size))
+
+ # close operation fille hole
+ closing = cv2.morphologyEx(image, func_type, kernel, iterations=1)
+
+ return closing
+
+def connect_components_analysis(image):
+ """
+ connect components analysis to remove the small components
+ :param image:
+ :return:
+ """
+ if len(image.shape) == 3:
+ gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ else:
+ gray_image = image
+ # print(gray_image.dtype)
+ return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
+
+# def if_y(samples_x):
+# for sample_x in samples_x:
+# if len(sample_x):
+# if len(sample_x) != (sample_x[-1] - sample_x[0] + 1):
+# return False
+# return True
+
+
+# def fitlane(mask, sel_labels, labels, stats):
+# for label_group in sel_labels:
+# states = [stats[k] for k in label_group]
+# x_max, y_max, w_max, h_max, _ = np.amax(np.array(states), axis=0)
+# x_min, y_min, w_min, h_min, _ = np.amin(np.array(states), axis=0)
+# # print(np.array(states))
+# x = x_min; y = y_min; w = w_max; h = h_max
+# if len(label_group) > 1:
+# # print(label_group)
+# for m in range(len(label_group)-1):
+# # print(label_group[m+1])
+# # print(label_group[0])
+# labels[labels == label_group[m+1]] = label_group[0]
+# t = label_group[0]
+# if (y_max + h - 1) > 720:
+# samples_y = np.linspace(y, 720-1, 20)
+# else:
+# samples_y = np.linspace(y, y_max+h-1, 20)
+
+# samples_x = [np.where(labels[int(sample_y)]==t)[0] for sample_y in samples_y]
+
+# if if_y(samples_x):
+# # print('in y')
+# samples_x = [int(np.mean(sample_x)) if len(sample_x) else -1 for sample_x in samples_x]
+# samples_x = np.array(samples_x)
+# samples_y = np.array(samples_y)
+# samples_y = samples_y[samples_x != -1]
+# samples_x = samples_x[samples_x != -1]
+# func = np.polyfit(samples_y, samples_x, 2)
+# # x_limits = np.polyval(func, 0)
+# # if x_limits < 0 or x_limits > 1280:
+# # if (y_max + h - 1) > 720:
+# draw_y = np.linspace(y, 720-1, 720-y)
+# # else:
+# # draw_y = np.linspace(y, y_max+h-1, y_max+h-y)
+# # draw_y = np.linspace(y, 720-1, 720-y)
+# draw_x = np.polyval(func, draw_y)
+# draw_y = draw_y[draw_x < 1280]
+# draw_x = draw_x[draw_x < 1280]
+
+# draw_points = (np.asarray([draw_x, draw_y]).T).astype(np.int32)
+# cv2.polylines(mask, [draw_points], False, 1, thickness=15)
+# else:
+# # print('in x')
+# if (x_max + w - 1) > 1280:
+# samples_x = np.linspace(x, 1280-1, 20)
+# else:
+# samples_x = np.linspace(x, x_max+w-1, 20)
+# samples_y = [np.where(labels[:, int(sample_x)]==t)[0] for sample_x in samples_x]
+# samples_y = [int(np.mean(sample_y)) if len(sample_y) else -1 for sample_y in samples_y]
+# samples_x = np.array(samples_x)
+# samples_y = np.array(samples_y)
+# samples_x = samples_x[samples_y != -1]
+# samples_y = samples_y[samples_y != -1]
+# func = np.polyfit(samples_x, samples_y, 2)
+# # y_limits = np.polyval(func, 0)
+# # if y_limits > 720 or y_limits < 0:
+# # if (x_max + w - 1) > 1280:
+# draw_x = np.linspace(x, 1280-1, 1280-x)
+# # else:
+# # y_limits = np.polyval(func, 0)
+# # if y_limits > 720 or y_limits < 0:
+# # draw_x = np.linspace(x, x_max+w-1, w+x_max-x)
+# # else:
+# # if x_max+w-1 < 640:
+# # draw_x = np.linspace(0, x_max+w-1, w+x_max-x)
+# # else:
+# # draw_x = np.linspace(x, 1280-1, 1280-x)
+# draw_y = np.polyval(func, draw_x)
+# draw_x = draw_x[draw_y < 720]
+# draw_y = draw_y[draw_y < 720]
+# draw_points = (np.asarray([draw_x, draw_y]).T).astype(np.int32)
+# cv2.polylines(mask, [draw_points], False, 1, thickness=15)
+# return mask
+
+# def connect_lane(image):
+# if len(image.shape) == 3:
+# gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+# else:
+# gray_image = image
+
+# mask = np.zeros((image.shape[0], image.shape[1]), np.uint8)
+# # print(gray_image.dtype)
+# num_labels, labels, stats, centers = cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
+# ratios = []
+# selected_label = []
+
+# for t in range(1, num_labels, 1):
+# x, y, w, h, area = stats[t]
+# center = centers[t]
+# if area > 400:
+# samples_y = [y, y+h-1]
+# selected_label.append(t)
+# samples_x = [np.where(labels[int(m)]==t)[0] for m in samples_y]
+# samples_x = [int(np.median(sample_x)) for sample_x in samples_x]
+# delta_x = samples_x[1] - samples_x[0]
+# if center[0]/1280 > 0.5:
+# ratios.append([0.7 * h / delta_x , h / w, 1.])
+# else:
+# ratios.append([0.7 * h / delta_x , h / w, 0.])
+
+# clustering = DBSCAN(eps=0.3, min_samples=1).fit(ratios)
+# # print(clustering.labels_)
+# split_labels = []
+# selected_label = np.array(selected_label)
+# for k in range(len(set(clustering.labels_))):
+# index = np.where(clustering.labels_==k)[0]
+# split_labels.append(selected_label[index])
+
+# # for i in range(1, num_labels, 1):
+# # if i not in set(selected_label):
+# # labels[labels == i] = 0
+# # print(split_labels)
+# mask_post = fitlane(mask, split_labels, labels, stats)
+# return mask_post
+
+
+
+
+
+
+
+
diff --git a/lib/dataset/AutoDriveDataset.py b/lib/dataset/AutoDriveDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2e7f95a8a451182c66a5b0474fddd33da09edfe
--- /dev/null
+++ b/lib/dataset/AutoDriveDataset.py
@@ -0,0 +1,264 @@
+import cv2
+import numpy as np
+# np.set_printoptions(threshold=np.inf)
+import random
+import torch
+import torchvision.transforms as transforms
+# from visualization import plot_img_and_mask,plot_one_box,show_seg_result
+from pathlib import Path
+from PIL import Image
+from torch.utils.data import Dataset
+from ..utils import letterbox, augment_hsv, random_perspective, xyxy2xywh, cutout
+
+
+class AutoDriveDataset(Dataset):
+ """
+ A general Dataset for some common function
+ """
+ def __init__(self, cfg, is_train, inputsize=640, transform=None):
+ """
+ initial all the characteristic
+
+ Inputs:
+ -cfg: configurations
+ -is_train(bool): whether train set or not
+ -transform: ToTensor and Normalize
+
+ Returns:
+ None
+ """
+ self.is_train = is_train
+ self.cfg = cfg
+ self.transform = transform
+ self.inputsize = inputsize
+ self.Tensor = transforms.ToTensor()
+ img_root = Path(cfg.DATASET.DATAROOT)
+ label_root = Path(cfg.DATASET.LABELROOT)
+ mask_root = Path(cfg.DATASET.MASKROOT)
+ lane_root = Path(cfg.DATASET.LANEROOT)
+ if is_train:
+ indicator = cfg.DATASET.TRAIN_SET
+ else:
+ indicator = cfg.DATASET.TEST_SET
+ self.img_root = img_root / indicator
+ self.label_root = label_root / indicator
+ self.mask_root = mask_root / indicator
+ self.lane_root = lane_root / indicator
+ # self.label_list = self.label_root.iterdir()
+ self.mask_list = self.mask_root.iterdir()
+
+ self.db = []
+
+ self.data_format = cfg.DATASET.DATA_FORMAT
+
+ self.scale_factor = cfg.DATASET.SCALE_FACTOR
+ self.rotation_factor = cfg.DATASET.ROT_FACTOR
+ self.flip = cfg.DATASET.FLIP
+ self.color_rgb = cfg.DATASET.COLOR_RGB
+
+ # self.target_type = cfg.MODEL.TARGET_TYPE
+ self.shapes = np.array(cfg.DATASET.ORG_IMG_SIZE)
+
+ def _get_db(self):
+ """
+ finished on children Dataset(for dataset which is not in Bdd100k format, rewrite children Dataset)
+ """
+ raise NotImplementedError
+
+ def evaluate(self, cfg, preds, output_dir):
+ """
+ finished on children dataset
+ """
+ raise NotImplementedError
+
+ def __len__(self,):
+ """
+ number of objects in the dataset
+ """
+ return len(self.db)
+
+ def __getitem__(self, idx):
+ """
+ Get input and groud-truth from database & add data augmentation on input
+
+ Inputs:
+ -idx: the index of image in self.db(database)(list)
+ self.db(list) [a,b,c,...]
+ a: (dictionary){'image':, 'information':}
+
+ Returns:
+ -image: transformed image, first passed the data augmentation in __getitem__ function(type:numpy), then apply self.transform
+ -target: ground truth(det_gt,seg_gt)
+
+ function maybe useful
+ cv2.imread
+ cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
+ cv2.warpAffine
+ """
+ data = self.db[idx]
+ img = cv2.imread(data["image"], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ # seg_label = cv2.imread(data["mask"], 0)
+ if self.cfg.num_seg_class == 3:
+ seg_label = cv2.imread(data["mask"])
+ else:
+ seg_label = cv2.imread(data["mask"], 0)
+ lane_label = cv2.imread(data["lane"], 0)
+ #print(lane_label.shape)
+ # print(seg_label.shape)
+ # print(lane_label.shape)
+ # print(seg_label.shape)
+ resized_shape = self.inputsize
+ if isinstance(resized_shape, list):
+ resized_shape = max(resized_shape)
+ h0, w0 = img.shape[:2] # orig hw
+ r = resized_shape / max(h0, w0) # resize image to img_size
+ if r != 1: # always resize down, only resize up if training with augmentation
+ interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+ seg_label = cv2.resize(seg_label, (int(w0 * r), int(h0 * r)), interpolation=interp)
+ lane_label = cv2.resize(lane_label, (int(w0 * r), int(h0 * r)), interpolation=interp)
+ h, w = img.shape[:2]
+
+ (img, seg_label, lane_label), ratio, pad = letterbox((img, seg_label, lane_label), resized_shape, auto=True, scaleup=self.is_train)
+ shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
+ # ratio = (w / w0, h / h0)
+ # print(resized_shape)
+
+ det_label = data["label"]
+ labels=[]
+
+ if det_label.size > 0:
+ # Normalized xywh to pixel xyxy format
+ labels = det_label.copy()
+ labels[:, 1] = ratio[0] * w * (det_label[:, 1] - det_label[:, 3] / 2) + pad[0] # pad width
+ labels[:, 2] = ratio[1] * h * (det_label[:, 2] - det_label[:, 4] / 2) + pad[1] # pad height
+ labels[:, 3] = ratio[0] * w * (det_label[:, 1] + det_label[:, 3] / 2) + pad[0]
+ labels[:, 4] = ratio[1] * h * (det_label[:, 2] + det_label[:, 4] / 2) + pad[1]
+
+ if self.is_train:
+ combination = (img, seg_label, lane_label)
+ (img, seg_label, lane_label), labels = random_perspective(
+ combination=combination,
+ targets=labels,
+ degrees=self.cfg.DATASET.ROT_FACTOR,
+ translate=self.cfg.DATASET.TRANSLATE,
+ scale=self.cfg.DATASET.SCALE_FACTOR,
+ shear=self.cfg.DATASET.SHEAR
+ )
+ #print(labels.shape)
+ augment_hsv(img, hgain=self.cfg.DATASET.HSV_H, sgain=self.cfg.DATASET.HSV_S, vgain=self.cfg.DATASET.HSV_V)
+ # img, seg_label, labels = cutout(combination=combination, labels=labels)
+
+ if len(labels):
+ # convert xyxy to xywh
+ labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
+
+ # Normalize coordinates 0 - 1
+ labels[:, [2, 4]] /= img.shape[0] # height
+ labels[:, [1, 3]] /= img.shape[1] # width
+
+ # if self.is_train:
+ # random left-right flip
+ lr_flip = True
+ if lr_flip and random.random() < 0.5:
+ img = np.fliplr(img)
+ seg_label = np.fliplr(seg_label)
+ lane_label = np.fliplr(lane_label)
+ if len(labels):
+ labels[:, 1] = 1 - labels[:, 1]
+
+ # random up-down flip
+ ud_flip = False
+ if ud_flip and random.random() < 0.5:
+ img = np.flipud(img)
+ seg_label = np.filpud(seg_label)
+ lane_label = np.filpud(lane_label)
+ if len(labels):
+ labels[:, 2] = 1 - labels[:, 2]
+
+ else:
+ if len(labels):
+ # convert xyxy to xywh
+ labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
+
+ # Normalize coordinates 0 - 1
+ labels[:, [2, 4]] /= img.shape[0] # height
+ labels[:, [1, 3]] /= img.shape[1] # width
+
+ labels_out = torch.zeros((len(labels), 6))
+ if len(labels):
+ labels_out[:, 1:] = torch.from_numpy(labels)
+ # Convert
+ # img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
+ # img = img.transpose(2, 0, 1)
+ img = np.ascontiguousarray(img)
+ # seg_label = np.ascontiguousarray(seg_label)
+ # if idx == 0:
+ # print(seg_label[:,:,0])
+
+ if self.cfg.num_seg_class == 3:
+ _,seg0 = cv2.threshold(seg_label[:,:,0],128,255,cv2.THRESH_BINARY)
+ _,seg1 = cv2.threshold(seg_label[:,:,1],1,255,cv2.THRESH_BINARY)
+ _,seg2 = cv2.threshold(seg_label[:,:,2],1,255,cv2.THRESH_BINARY)
+ else:
+ _,seg1 = cv2.threshold(seg_label,1,255,cv2.THRESH_BINARY)
+ _,seg2 = cv2.threshold(seg_label,1,255,cv2.THRESH_BINARY_INV)
+ _,lane1 = cv2.threshold(lane_label,1,255,cv2.THRESH_BINARY)
+ _,lane2 = cv2.threshold(lane_label,1,255,cv2.THRESH_BINARY_INV)
+# _,seg2 = cv2.threshold(seg_label[:,:,2],1,255,cv2.THRESH_BINARY)
+ # # seg1[cutout_mask] = 0
+ # # seg2[cutout_mask] = 0
+
+ # seg_label /= 255
+ # seg0 = self.Tensor(seg0)
+ if self.cfg.num_seg_class == 3:
+ seg0 = self.Tensor(seg0)
+ seg1 = self.Tensor(seg1)
+ seg2 = self.Tensor(seg2)
+ # seg1 = self.Tensor(seg1)
+ # seg2 = self.Tensor(seg2)
+ lane1 = self.Tensor(lane1)
+ lane2 = self.Tensor(lane2)
+
+ # seg_label = torch.stack((seg2[0], seg1[0]),0)
+ if self.cfg.num_seg_class == 3:
+ seg_label = torch.stack((seg0[0],seg1[0],seg2[0]),0)
+ else:
+ seg_label = torch.stack((seg2[0], seg1[0]),0)
+
+ lane_label = torch.stack((lane2[0], lane1[0]),0)
+ # _, gt_mask = torch.max(seg_label, 0)
+ # _ = show_seg_result(img, gt_mask, idx, 0, save_dir='debug', is_gt=True)
+
+
+ target = [labels_out, seg_label, lane_label]
+ img = self.transform(img)
+
+ return img, target, data["image"], shapes
+
+ def select_data(self, db):
+ """
+ You can use this function to filter useless images in the dataset
+
+ Inputs:
+ -db: (list)database
+
+ Returns:
+ -db_selected: (list)filtered dataset
+ """
+ db_selected = ...
+ return db_selected
+
+ @staticmethod
+ def collate_fn(batch):
+ img, label, paths, shapes= zip(*batch)
+ label_det, label_seg, label_lane = [], [], []
+ for i, l in enumerate(label):
+ l_det, l_seg, l_lane = l
+ l_det[:, 0] = i # add target image index for build_targets()
+ label_det.append(l_det)
+ label_seg.append(l_seg)
+ label_lane.append(l_lane)
+ return torch.stack(img, 0), [torch.cat(label_det, 0), torch.stack(label_seg, 0), torch.stack(label_lane, 0)], paths, shapes
+
diff --git a/lib/dataset/DemoDataset.py b/lib/dataset/DemoDataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a7f39bcfd9e19985a3c87f5c2498a4b982b9a71
--- /dev/null
+++ b/lib/dataset/DemoDataset.py
@@ -0,0 +1,188 @@
+import glob
+import os
+import random
+import shutil
+import time
+from pathlib import Path
+from threading import Thread
+
+import cv2
+import math
+import numpy as np
+import torch
+from PIL import Image, ExifTags
+from torch.utils.data import Dataset
+from tqdm import tqdm
+
+from ..utils import letterbox_for_img, clean_str
+
+img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
+vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
+
+class LoadImages: # for inference
+ def __init__(self, path, img_size=640):
+ p = str(Path(path)) # os-agnostic
+ p = os.path.abspath(p) # absolute path
+ if '*' in p:
+ files = sorted(glob.glob(p, recursive=True)) # glob
+ elif os.path.isdir(p):
+ files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
+ elif os.path.isfile(p):
+ files = [p] # files
+ else:
+ raise Exception('ERROR: %s does not exist' % p)
+
+ images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+ videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+ ni, nv = len(images), len(videos)
+
+ self.img_size = img_size
+ self.files = images + videos
+ self.nf = ni + nv # number of files
+ self.video_flag = [False] * ni + [True] * nv
+ self.mode = 'images'
+ if any(videos):
+ self.new_video(videos[0]) # new video
+ else:
+ self.cap = None
+ assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
+ (p, img_formats, vid_formats)
+
+ def __iter__(self):
+ self.count = 0
+ return self
+
+ def __next__(self):
+ if self.count == self.nf:
+ raise StopIteration
+ path = self.files[self.count]
+
+ if self.video_flag[self.count]:
+ # Read video
+ self.mode = 'video'
+ ret_val, img0 = self.cap.read()
+ if not ret_val:
+ self.count += 1
+ self.cap.release()
+ if self.count == self.nf: # last video
+ raise StopIteration
+ else:
+ path = self.files[self.count]
+ self.new_video(path)
+ ret_val, img0 = self.cap.read()
+ h0, w0 = img0.shape[:2]
+
+ self.frame += 1
+ print('\n video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
+
+ else:
+ # Read image
+ self.count += 1
+ img0 = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # BGR
+ #img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
+ assert img0 is not None, 'Image Not Found ' + path
+ print('image %g/%g %s: \n' % (self.count, self.nf, path), end='')
+ h0, w0 = img0.shape[:2]
+
+ # Padded resize
+ img, ratio, pad = letterbox_for_img(img0, new_shape=self.img_size, auto=True)
+ h, w = img.shape[:2]
+ shapes = (h0, w0), ((h / h0, w / w0), pad)
+
+ # Convert
+ #img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
+ img = np.ascontiguousarray(img)
+
+
+ # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
+ return path, img, img0, self.cap, shapes
+
+ def new_video(self, path):
+ self.frame = 0
+ self.cap = cv2.VideoCapture(path)
+ self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+ def __len__(self):
+ return self.nf # number of files
+
+
+
+class LoadStreams: # multiple IP or RTSP cameras
+ def __init__(self, sources='streams.txt', img_size=640, auto=True):
+ self.mode = 'stream'
+ self.img_size = img_size
+
+ if os.path.isfile(sources):
+ with open(sources, 'r') as f:
+ sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
+ else:
+ sources = [sources]
+
+ n = len(sources)
+ self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
+ self.sources = [clean_str(x) for x in sources] # clean source names for later
+ self.auto = auto
+ for i, s in enumerate(sources): # index, source
+ # Start thread to read frames from video stream
+ print(f'{i + 1}/{n}: {s}... ', end='')
+ s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
+ cap = cv2.VideoCapture(s)
+ assert cap.isOpened(), f'Failed to open {s}'
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback
+ self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
+
+ _, self.imgs[i] = cap.read() # guarantee first frame
+ self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
+ print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
+ self.threads[i].start()
+ print('') # newline
+
+ # check for common shapes
+
+ s = np.stack([letterbox_for_img(x, self.img_size, auto=self.auto)[0].shape for x in self.imgs], 0) # shapes
+ self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
+ if not self.rect:
+ print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
+
+ def update(self, i, cap):
+ # Read stream `i` frames in daemon thread
+ n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
+ while cap.isOpened() and n < f:
+ n += 1
+ # _, self.imgs[index] = cap.read()
+ cap.grab()
+ if n % read == 0:
+ success, im = cap.retrieve()
+ self.imgs[i] = im if success else self.imgs[i] * 0
+ time.sleep(1 / self.fps[i]) # wait time
+
+ def __iter__(self):
+ self.count = -1
+ return self
+
+ def __next__(self):
+ self.count += 1
+ if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
+ cv2.destroyAllWindows()
+ raise StopIteration
+
+ # Letterbox
+ img0 = self.imgs.copy()
+
+ h0, w0 = img0[0].shape[:2]
+ img, _, pad = letterbox_for_img(img0[0], self.img_size, auto=self.rect and self.auto)
+
+ # Stack
+ h, w = img.shape[:2]
+ shapes = (h0, w0), ((h / h0, w / w0), pad)
+
+ # Convert
+ #img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
+ img = np.ascontiguousarray(img)
+
+ return self.sources, img, img0[0], None, shapes
+
+ def __len__(self):
+ return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
\ No newline at end of file
diff --git a/lib/dataset/__init__.py b/lib/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9d39e5d658297f283a6c49b6a63c1f143442bc9
--- /dev/null
+++ b/lib/dataset/__init__.py
@@ -0,0 +1,3 @@
+from .bdd import BddDataset
+from .AutoDriveDataset import AutoDriveDataset
+from .DemoDataset import LoadImages, LoadStreams
diff --git a/lib/dataset/bdd.py b/lib/dataset/bdd.py
new file mode 100644
index 0000000000000000000000000000000000000000..538ab4d5cae57fbfac6d9a3ba07c8f72e083e8c5
--- /dev/null
+++ b/lib/dataset/bdd.py
@@ -0,0 +1,85 @@
+import numpy as np
+import json
+
+from .AutoDriveDataset import AutoDriveDataset
+from .convert import convert, id_dict, id_dict_single
+from tqdm import tqdm
+
+single_cls = True # just detect vehicle
+
+class BddDataset(AutoDriveDataset):
+ def __init__(self, cfg, is_train, inputsize, transform=None):
+ super().__init__(cfg, is_train, inputsize, transform)
+ self.db = self._get_db()
+ self.cfg = cfg
+
+ def _get_db(self):
+ """
+ get database from the annotation file
+
+ Inputs:
+
+ Returns:
+ gt_db: (list)database [a,b,c,...]
+ a: (dictionary){'image':, 'information':, ......}
+ image: image path
+ mask: path of the segmetation label
+ label: [cls_id, center_x//256, center_y//256, w//256, h//256] 256=IMAGE_SIZE
+ """
+ print('building database...')
+ gt_db = []
+ height, width = self.shapes
+ for mask in tqdm(list(self.mask_list)):
+ mask_path = str(mask)
+ label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
+ image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")
+ lane_path = mask_path.replace(str(self.mask_root), str(self.lane_root))
+ with open(label_path, 'r') as f:
+ label = json.load(f)
+ data = label['frames'][0]['objects']
+ data = self.filter_data(data)
+ gt = np.zeros((len(data), 5))
+ for idx, obj in enumerate(data):
+ category = obj['category']
+ if category == "traffic light":
+ color = obj['attributes']['trafficLightColor']
+ category = "tl_" + color
+ if category in id_dict.keys():
+ x1 = float(obj['box2d']['x1'])
+ y1 = float(obj['box2d']['y1'])
+ x2 = float(obj['box2d']['x2'])
+ y2 = float(obj['box2d']['y2'])
+ cls_id = id_dict[category]
+ if single_cls:
+ cls_id=0
+ gt[idx][0] = cls_id
+ box = convert((width, height), (x1, x2, y1, y2))
+ gt[idx][1:] = list(box)
+
+
+ rec = [{
+ 'image': image_path,
+ 'label': gt,
+ 'mask': mask_path,
+ 'lane': lane_path
+ }]
+
+ gt_db += rec
+ print('database build finish')
+ return gt_db
+
+ def filter_data(self, data):
+ remain = []
+ for obj in data:
+ if 'box2d' in obj.keys(): # obj.has_key('box2d'):
+ if single_cls:
+ if obj['category'] in id_dict_single.keys():
+ remain.append(obj)
+ else:
+ remain.append(obj)
+ return remain
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ """
+ """
+ pass
diff --git a/lib/dataset/convert.py b/lib/dataset/convert.py
new file mode 100644
index 0000000000000000000000000000000000000000..da92c95717e8e9554e110e2792a9189ad12d7c10
--- /dev/null
+++ b/lib/dataset/convert.py
@@ -0,0 +1,31 @@
+# bdd_labels = {
+# 'unlabeled':0, 'dynamic': 1, 'ego vehicle': 2, 'ground': 3,
+# 'static': 4, 'parking': 5, 'rail track': 6, 'road': 7,
+# 'sidewalk': 8, 'bridge': 9, 'building': 10, 'fence': 11,
+# 'garage': 12, 'guard rail': 13, 'tunnel': 14, 'wall': 15,
+# 'banner': 16, 'billboard': 17, 'lane divider': 18,'parking sign': 19,
+# 'pole': 20, 'polegroup': 21, 'street light': 22, 'traffic cone': 23,
+# 'traffic device': 24, 'traffic light': 25, 'traffic sign': 26, 'traffic sign frame': 27,
+# 'terrain': 28, 'vegetation': 29, 'sky': 30, 'person': 31,
+# 'rider': 32, 'bicycle': 33, 'bus': 34, 'car': 35,
+# 'caravan': 36, 'motorcycle': 37, 'trailer': 38, 'train': 39,
+# 'truck': 40
+# }
+id_dict = {'person': 0, 'rider': 1, 'car': 2, 'bus': 3, 'truck': 4,
+'bike': 5, 'motor': 6, 'tl_green': 7, 'tl_red': 8,
+'tl_yellow': 9, 'tl_none': 10, 'traffic sign': 11, 'train': 12}
+id_dict_single = {'car': 0, 'bus': 1, 'truck': 2,'train': 3}
+# id_dict = {'car': 0, 'bus': 1, 'truck': 2}
+
+def convert(size, box):
+ dw = 1./(size[0])
+ dh = 1./(size[1])
+ x = (box[0] + box[1])/2.0
+ y = (box[2] + box[3])/2.0
+ w = box[1] - box[0]
+ h = box[3] - box[2]
+ x = x*dw
+ w = w*dw
+ y = y*dh
+ h = h*dh
+ return (x,y,w,h)
diff --git a/lib/dataset/hust.py b/lib/dataset/hust.py
new file mode 100644
index 0000000000000000000000000000000000000000..145f931cf90c9d12a00c91ffa971ca742a7a91bf
--- /dev/null
+++ b/lib/dataset/hust.py
@@ -0,0 +1,87 @@
+import numpy as np
+import json
+
+from .AutoDriveDataset import AutoDriveDataset
+from .convert import convert, id_dict, id_dict_single
+from tqdm import tqdm
+import os
+
+single_cls = False # just detect vehicle
+
+class HustDataset(AutoDriveDataset):
+ def __init__(self, cfg, is_train, inputsize, transform=None):
+ super().__init__(cfg, is_train, inputsize, transform)
+ self.db = self._get_db()
+ self.cfg = cfg
+
+ def _get_db(self):
+ """
+ get database from the annotation file
+
+ Inputs:
+
+ Returns:
+ gt_db: (list)database [a,b,c,...]
+ a: (dictionary){'image':, 'information':, ......}
+ image: image path
+ mask: path of the segmetation label
+ label: [cls_id, center_x//256, center_y//256, w//256, h//256] 256=IMAGE_SIZE
+ """
+ print('building database...')
+ gt_db = []
+ height, width = self.shapes
+ for mask in tqdm(list(self.mask_list)):
+ mask_path = str(mask)
+ label_path = self.label_root
+ # label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
+ image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")
+ lane_path = mask_path.replace(str(self.mask_root), str(self.lane_root))
+ with open(label_path, 'r') as f:
+ label = json.load(f)
+ data = label[int(os.path.basename(image_path)[:-4])]["labels"]
+ data = self.filter_data(data)
+ gt = np.zeros((len(data), 5))
+ for idx, obj in enumerate(data):
+ category = obj['category']
+ if category == "traffic light":
+ color = obj['attributes']['Traffic Light Color'][0]
+ category = "tl_" + color
+ if category in id_dict.keys():
+ x1 = float(obj['box2d']['x1'])
+ y1 = float(obj['box2d']['y1'])
+ x2 = float(obj['box2d']['x2'])
+ y2 = float(obj['box2d']['y2'])
+ cls_id = id_dict[category]
+ if single_cls:
+ cls_id=0
+ gt[idx][0] = cls_id
+ box = convert((width, height), (x1, x2, y1, y2))
+ gt[idx][1:] = list(box)
+
+
+ rec = [{
+ 'image': image_path,
+ 'label': gt,
+ 'mask': mask_path,
+ 'lane': lane_path
+ }]
+
+ gt_db += rec
+ print('database build finish')
+ return gt_db
+
+ def filter_data(self, data):
+ remain = []
+ for obj in data:
+ if 'box2d' in obj.keys(): # obj.has_key('box2d'):
+ if single_cls:
+ if obj['category'] in id_dict_single.keys():
+ remain.append(obj)
+ else:
+ remain.append(obj)
+ return remain
+
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+ """
+ """
+ pass
diff --git a/lib/models/YOLOP.py b/lib/models/YOLOP.py
new file mode 100644
index 0000000000000000000000000000000000000000..d08e2e065ab6b8533b169b1e28408f1404ac4ff8
--- /dev/null
+++ b/lib/models/YOLOP.py
@@ -0,0 +1,596 @@
+import torch
+from torch import tensor
+import torch.nn as nn
+import sys,os
+import math
+import sys
+sys.path.append(os.getcwd())
+#sys.path.append("lib/models")
+#sys.path.append("lib/utils")
+#sys.path.append("/workspace/wh/projects/DaChuang")
+from lib.utils import initialize_weights
+# from lib.models.common2 import DepthSeperabelConv2d as Conv
+# from lib.models.common2 import SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect
+from lib.models.common import Conv, SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect, SharpenConv
+from torch.nn import Upsample
+from lib.utils import check_anchor_order
+from lib.core.evaluate import SegmentationMetric
+from lib.utils.utils import time_synchronized
+
+"""
+MCnet_SPP = [
+[ -1, Focus, [3, 32, 3]],
+[ -1, Conv, [32, 64, 3, 2]],
+[ -1, BottleneckCSP, [64, 64, 1]],
+[ -1, Conv, [64, 128, 3, 2]],
+[ -1, BottleneckCSP, [128, 128, 3]],
+[ -1, Conv, [128, 256, 3, 2]],
+[ -1, BottleneckCSP, [256, 256, 3]],
+[ -1, Conv, [256, 512, 3, 2]],
+[ -1, SPP, [512, 512, [5, 9, 13]]],
+[ -1, BottleneckCSP, [512, 512, 1, False]],
+[ -1, Conv,[512, 256, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1, 6], Concat, [1]],
+[ -1, BottleneckCSP, [512, 256, 1, False]],
+[ -1, Conv, [256, 128, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,4], Concat, [1]],
+[ -1, BottleneckCSP, [256, 128, 1, False]],
+[ -1, Conv, [128, 128, 3, 2]],
+[ [-1, 14], Concat, [1]],
+[ -1, BottleneckCSP, [256, 256, 1, False]],
+[ -1, Conv, [256, 256, 3, 2]],
+[ [-1, 10], Concat, [1]],
+[ -1, BottleneckCSP, [512, 512, 1, False]],
+# [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
+[ [17, 20, 23], Detect, [13, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
+[ 17, Conv, [128, 64, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,2], Concat, [1]],
+[ -1, BottleneckCSP, [128, 64, 1, False]],
+[ -1, Conv, [64, 32, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, Conv, [32, 16, 3, 1]],
+[ -1, BottleneckCSP, [16, 8, 1, False]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, SPP, [8, 2, [5, 9, 13]]] #segmentation output
+]
+# [2,6,3,9,5,13], [7,19,11,26,17,39], [28,64,44,103,61,183]
+
+MCnet_0 = [
+[ -1, Focus, [3, 32, 3]],
+[ -1, Conv, [32, 64, 3, 2]],
+[ -1, BottleneckCSP, [64, 64, 1]],
+[ -1, Conv, [64, 128, 3, 2]],
+[ -1, BottleneckCSP, [128, 128, 3]],
+[ -1, Conv, [128, 256, 3, 2]],
+[ -1, BottleneckCSP, [256, 256, 3]],
+[ -1, Conv, [256, 512, 3, 2]],
+[ -1, SPP, [512, 512, [5, 9, 13]]],
+[ -1, BottleneckCSP, [512, 512, 1, False]],
+[ -1, Conv,[512, 256, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1, 6], Concat, [1]],
+[ -1, BottleneckCSP, [512, 256, 1, False]],
+[ -1, Conv, [256, 128, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,4], Concat, [1]],
+[ -1, BottleneckCSP, [256, 128, 1, False]],
+[ -1, Conv, [128, 128, 3, 2]],
+[ [-1, 14], Concat, [1]],
+[ -1, BottleneckCSP, [256, 256, 1, False]],
+[ -1, Conv, [256, 256, 3, 2]],
+[ [-1, 10], Concat, [1]],
+[ -1, BottleneckCSP, [512, 512, 1, False]],
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [128, 64, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,2], Concat, [1]],
+[ -1, BottleneckCSP, [128, 64, 1, False]],
+[ -1, Conv, [64, 32, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, Conv, [32, 16, 3, 1]],
+[ -1, BottleneckCSP, [16, 8, 1, False]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, Conv, [8, 2, 3, 1]], #Driving area segmentation output
+
+[ 16, Conv, [128, 64, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,2], Concat, [1]],
+[ -1, BottleneckCSP, [128, 64, 1, False]],
+[ -1, Conv, [64, 32, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, Conv, [32, 16, 3, 1]],
+[ -1, BottleneckCSP, [16, 8, 1, False]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, Conv, [8, 2, 3, 1]], #Lane line segmentation output
+]
+
+
+# The lane line and the driving area segment branches share information with each other
+MCnet_share = [
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 64, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ [-1,2], Concat, [1]], #27
+[ -1, BottleneckCSP, [128, 64, 1, False]], #28
+[ -1, Conv, [64, 32, 3, 1]], #29
+[ -1, Upsample, [None, 2, 'nearest']], #30
+[ -1, Conv, [32, 16, 3, 1]], #31
+[ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
+
+[ 16, Conv, [256, 64, 3, 1]], #33
+[ -1, Upsample, [None, 2, 'nearest']], #34
+[ [-1,2], Concat, [1]], #35
+[ -1, BottleneckCSP, [128, 64, 1, False]], #36
+[ -1, Conv, [64, 32, 3, 1]], #37
+[ -1, Upsample, [None, 2, 'nearest']], #38
+[ -1, Conv, [32, 16, 3, 1]], #39
+[ -1, BottleneckCSP, [16, 8, 1, False]], #40 lane line segment neck
+
+[ [31,39], Concat, [1]], #41
+[ -1, Conv, [32, 8, 3, 1]], #42 Share_Block
+
+
+[ [32,42], Concat, [1]], #43
+[ -1, Upsample, [None, 2, 'nearest']], #44
+[ -1, Conv, [16, 2, 3, 1]], #45 Driving area segmentation output
+
+
+[ [40,42], Concat, [1]], #46
+[ -1, Upsample, [None, 2, 'nearest']], #47
+[ -1, Conv, [16, 2, 3, 1]] #48Lane line segmentation output
+]
+
+# The lane line and the driving area segment branches without share information with each other
+MCnet_no_share = [
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 64, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ [-1,2], Concat, [1]], #27
+[ -1, BottleneckCSP, [128, 64, 1, False]], #28
+[ -1, Conv, [64, 32, 3, 1]], #29
+[ -1, Upsample, [None, 2, 'nearest']], #30
+[ -1, Conv, [32, 16, 3, 1]], #31
+[ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #33
+[ -1, Conv, [8, 3, 3, 1]], #34 Driving area segmentation output
+
+[ 16, Conv, [256, 64, 3, 1]], #35
+[ -1, Upsample, [None, 2, 'nearest']], #36
+[ [-1,2], Concat, [1]], #37
+[ -1, BottleneckCSP, [128, 64, 1, False]], #38
+[ -1, Conv, [64, 32, 3, 1]], #39
+[ -1, Upsample, [None, 2, 'nearest']], #40
+[ -1, Conv, [32, 16, 3, 1]], #41
+[ -1, BottleneckCSP, [16, 8, 1, False]], #42 lane line segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #43
+[ -1, Conv, [8, 2, 3, 1]] #44 Lane line segmentation output
+]
+
+MCnet_feedback = [
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 128, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ -1, BottleneckCSP, [128, 64, 1, False]], #28
+[ -1, Conv, [64, 32, 3, 1]], #29
+[ -1, Upsample, [None, 2, 'nearest']], #30
+[ -1, Conv, [32, 16, 3, 1]], #31
+[ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #33
+[ -1, Conv, [8, 2, 3, 1]], #34 Driving area segmentation output
+
+[ 16, Conv, [256, 128, 3, 1]], #35
+[ -1, Upsample, [None, 2, 'nearest']], #36
+[ -1, BottleneckCSP, [128, 64, 1, False]], #38
+[ -1, Conv, [64, 32, 3, 1]], #39
+[ -1, Upsample, [None, 2, 'nearest']], #40
+[ -1, Conv, [32, 16, 3, 1]], #41
+[ -1, BottleneckCSP, [16, 8, 1, False]], #42 lane line segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #43
+[ -1, Conv, [8, 2, 3, 1]] #44 Lane line segmentation output
+]
+
+
+MCnet_Da_feedback1 = [
+[46, 26, 35], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16 backbone+fpn
+[ -1,Conv,[256,256,1,1]], #17
+
+
+[ 16, Conv, [256, 128, 3, 1]], #18
+[ -1, Upsample, [None, 2, 'nearest']], #19
+[ -1, BottleneckCSP, [128, 64, 1, False]], #20
+[ -1, Conv, [64, 32, 3, 1]], #21
+[ -1, Upsample, [None, 2, 'nearest']], #22
+[ -1, Conv, [32, 16, 3, 1]], #23
+[ -1, BottleneckCSP, [16, 8, 1, False]], #24 driving area segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #25
+[ -1, Conv, [8, 2, 3, 1]], #26 Driving area segmentation output
+
+
+[ 16, Conv, [256, 128, 3, 1]], #27
+[ -1, Upsample, [None, 2, 'nearest']], #28
+[ -1, BottleneckCSP, [128, 64, 1, False]], #29
+[ -1, Conv, [64, 32, 3, 1]], #30
+[ -1, Upsample, [None, 2, 'nearest']], #31
+[ -1, Conv, [32, 16, 3, 1]], #32
+[ -1, BottleneckCSP, [16, 8, 1, False]], #33 lane line segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #34
+[ -1, Conv, [8, 2, 3, 1]], #35Lane line segmentation output
+
+
+[ 23, Conv, [16, 16, 3, 2]], #36
+[ -1, Conv, [16, 32, 3, 2]], #2 times 2xdownsample 37
+
+[ [-1,17], Concat, [1]], #38
+[ -1, BottleneckCSP, [288, 128, 1, False]], #39
+[ -1, Conv, [128, 128, 3, 2]], #40
+[ [-1, 14], Concat, [1]], #41
+[ -1, BottleneckCSP, [256, 256, 1, False]], #42
+[ -1, Conv, [256, 256, 3, 2]], #43
+[ [-1, 10], Concat, [1]], #44
+[ -1, BottleneckCSP, [512, 512, 1, False]], #45
+[ [39, 42, 45], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]] #Detect output 46
+]
+
+
+
+# The lane line and the driving area segment branches share information with each other and feedback to det_head
+MCnet_Da_feedback2 = [
+[47, 26, 35], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
+[25, 28, 31, 33], #layer in Da_branch to do SAD
+[34, 37, 40, 42], #layer in LL_branch to do SAD
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16 backbone+fpn
+[ -1,Conv,[256,256,1,1]], #17
+
+
+[ 16, Conv, [256, 128, 3, 1]], #18
+[ -1, Upsample, [None, 2, 'nearest']], #19
+[ -1, BottleneckCSP, [128, 64, 1, False]], #20
+[ -1, Conv, [64, 32, 3, 1]], #21
+[ -1, Upsample, [None, 2, 'nearest']], #22
+[ -1, Conv, [32, 16, 3, 1]], #23
+[ -1, BottleneckCSP, [16, 8, 1, False]], #24 driving area segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #25
+[ -1, Conv, [8, 2, 3, 1]], #26 Driving area segmentation output
+
+
+[ 16, Conv, [256, 128, 3, 1]], #27
+[ -1, Upsample, [None, 2, 'nearest']], #28
+[ -1, BottleneckCSP, [128, 64, 1, False]], #29
+[ -1, Conv, [64, 32, 3, 1]], #30
+[ -1, Upsample, [None, 2, 'nearest']], #31
+[ -1, Conv, [32, 16, 3, 1]], #32
+[ -1, BottleneckCSP, [16, 8, 1, False]], #33 lane line segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #34
+[ -1, Conv, [8, 2, 3, 1]], #35Lane line segmentation output
+
+
+[ 23, Conv, [16, 64, 3, 2]], #36
+[ -1, Conv, [64, 256, 3, 2]], #2 times 2xdownsample 37
+
+[ [-1,17], Concat, [1]], #38
+
+[-1, Conv, [512, 256, 3, 1]], #39
+[ -1, BottleneckCSP, [256, 128, 1, False]], #40
+[ -1, Conv, [128, 128, 3, 2]], #41
+[ [-1, 14], Concat, [1]], #42
+[ -1, BottleneckCSP, [256, 256, 1, False]], #43
+[ -1, Conv, [256, 256, 3, 2]], #44
+[ [-1, 10], Concat, [1]], #45
+[ -1, BottleneckCSP, [512, 512, 1, False]], #46
+[ [40, 42, 45], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]] #Detect output 47
+]
+
+MCnet_share1 = [
+[24, 33, 45], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
+[25, 28, 31, 33], #layer in Da_branch to do SAD
+[34, 37, 40, 42], #layer in LL_branch to do SAD
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 128, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ -1, BottleneckCSP, [128, 64, 1, False]], #27
+[ -1, Conv, [64, 32, 3, 1]], #28
+[ -1, Upsample, [None, 2, 'nearest']], #29
+[ -1, Conv, [32, 16, 3, 1]], #30
+
+[ -1, BottleneckCSP, [16, 8, 1, False]], #31 driving area segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #32
+[ -1, Conv, [8, 2, 3, 1]], #33 Driving area segmentation output
+
+[ 16, Conv, [256, 128, 3, 1]], #34
+[ -1, Upsample, [None, 2, 'nearest']], #35
+[ -1, BottleneckCSP, [128, 64, 1, False]], #36
+[ -1, Conv, [64, 32, 3, 1]], #37
+[ -1, Upsample, [None, 2, 'nearest']], #38
+[ -1, Conv, [32, 16, 3, 1]], #39
+
+[ 30, SharpenConv, [16,16, 3, 1]], #40
+[ -1, Conv, [16, 16, 3, 1]], #41
+[ [-1, 39], Concat, [1]], #42
+[ -1, BottleneckCSP, [32, 8, 1, False]], #43 lane line segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #44
+[ -1, Conv, [8, 2, 3, 1]] #45 Lane line segmentation output
+]"""
+
+
+# The lane line and the driving area segment branches without share information with each other and without link
+YOLOP = [
+[24, 33, 42], #Det_out_idx, Da_Segout_idx, LL_Segout_idx
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16 #Encoder
+
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detection head 24
+
+[ 16, Conv, [256, 128, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ -1, BottleneckCSP, [128, 64, 1, False]], #27
+[ -1, Conv, [64, 32, 3, 1]], #28
+[ -1, Upsample, [None, 2, 'nearest']], #29
+[ -1, Conv, [32, 16, 3, 1]], #30
+[ -1, BottleneckCSP, [16, 8, 1, False]], #31
+[ -1, Upsample, [None, 2, 'nearest']], #32
+[ -1, Conv, [8, 2, 3, 1]], #33 Driving area segmentation head
+
+[ 16, Conv, [256, 128, 3, 1]], #34
+[ -1, Upsample, [None, 2, 'nearest']], #35
+[ -1, BottleneckCSP, [128, 64, 1, False]], #36
+[ -1, Conv, [64, 32, 3, 1]], #37
+[ -1, Upsample, [None, 2, 'nearest']], #38
+[ -1, Conv, [32, 16, 3, 1]], #39
+[ -1, BottleneckCSP, [16, 8, 1, False]], #40
+[ -1, Upsample, [None, 2, 'nearest']], #41
+[ -1, Conv, [8, 2, 3, 1]] #42 Lane line segmentation head
+]
+
+
+class MCnet(nn.Module):
+ def __init__(self, block_cfg, **kwargs):
+ super(MCnet, self).__init__()
+ layers, save= [], []
+ self.nc = 1
+ self.detector_index = -1
+ self.det_out_idx = block_cfg[0][0]
+ self.seg_out_idx = block_cfg[0][1:]
+
+
+ # Build model
+ for i, (from_, block, args) in enumerate(block_cfg[1:]):
+ block = eval(block) if isinstance(block, str) else block # eval strings
+ if block is Detect:
+ self.detector_index = i
+ block_ = block(*args)
+ block_.index, block_.from_ = i, from_
+ layers.append(block_)
+ save.extend(x % i for x in ([from_] if isinstance(from_, int) else from_) if x != -1) # append to savelist
+ assert self.detector_index == block_cfg[0][0]
+
+ self.model, self.save = nn.Sequential(*layers), sorted(save)
+ self.names = [str(i) for i in range(self.nc)]
+
+ # set stride、anchor for detector
+ Detector = self.model[self.detector_index] # detector
+ if isinstance(Detector, Detect):
+ s = 128 # 2x min stride
+ # for x in self.forward(torch.zeros(1, 3, s, s)):
+ # print (x.shape)
+ with torch.no_grad():
+ model_out = self.forward(torch.zeros(1, 3, s, s))
+ detects, _, _= model_out
+ Detector.stride = torch.tensor([s / x.shape[-2] for x in detects]) # forward
+ # print("stride"+str(Detector.stride ))
+ Detector.anchors /= Detector.stride.view(-1, 1, 1) # Set the anchors for the corresponding scale
+ check_anchor_order(Detector)
+ self.stride = Detector.stride
+ self._initialize_biases()
+
+ initialize_weights(self)
+
+ def forward(self, x):
+ cache = []
+ out = []
+ det_out = None
+ Da_fmap = []
+ LL_fmap = []
+ for i, block in enumerate(self.model):
+ if block.from_ != -1:
+ x = cache[block.from_] if isinstance(block.from_, int) else [x if j == -1 else cache[j] for j in block.from_] #calculate concat detect
+ x = block(x)
+ if i in self.seg_out_idx: #save driving area segment result
+ m=nn.Sigmoid()
+ out.append(m(x))
+ if i == self.detector_index:
+ det_out = x
+ cache.append(x if block.index in self.save else None)
+ out.insert(0,det_out)
+ return out
+
+
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
+ # https://arxiv.org/abs/1708.02002 section 3.3
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+ # m = self.model[-1] # Detect() module
+ m = self.model[self.detector_index] # Detect() module
+ for mi, s in zip(m.m, m.stride): # from
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
+ b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
+ b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+def get_net(cfg, **kwargs):
+ m_block_cfg = YOLOP
+ model = MCnet(m_block_cfg, **kwargs)
+ return model
+
+
+if __name__ == "__main__":
+ from torch.utils.tensorboard import SummaryWriter
+ model = get_net(False)
+ input_ = torch.randn((1, 3, 256, 256))
+ gt_ = torch.rand((1, 2, 256, 256))
+ metric = SegmentationMetric(2)
+ model_out,SAD_out = model(input_)
+ detects, dring_area_seg, lane_line_seg = model_out
+ Da_fmap, LL_fmap = SAD_out
+ for det in detects:
+ print(det.shape)
+ print(dring_area_seg.shape)
+ print(lane_line_seg.shape)
+
diff --git a/lib/models/__init__.py b/lib/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cb745fbca0956e288df6e958b481683d8424091
--- /dev/null
+++ b/lib/models/__init__.py
@@ -0,0 +1 @@
+from .YOLOP import get_net
diff --git a/lib/models/common.py b/lib/models/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..968ba69e0068bd1453f8d888f97fd9d5e1001ae5
--- /dev/null
+++ b/lib/models/common.py
@@ -0,0 +1,265 @@
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+from PIL import Image, ImageDraw
+
+
+def autopad(k, p=None): # kernel, padding
+ # Pad to 'same'
+ if p is None:
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
+ return p
+
+
+class DepthSeperabelConv2d(nn.Module):
+ """
+ DepthSeperable Convolution 2d with residual connection
+ """
+
+ def __init__(self, inplanes, planes, kernel_size=3, stride=1, downsample=None, act=True):
+ super(DepthSeperabelConv2d, self).__init__()
+ self.depthwise = nn.Sequential(
+ nn.Conv2d(inplanes, inplanes, kernel_size, stride=stride, groups=inplanes, padding=kernel_size//2, bias=False),
+ nn.BatchNorm2d(inplanes, momentum=BN_MOMENTUM)
+ )
+ # self.depthwise = nn.Conv2d(inplanes, inplanes, kernel_size, stride=stride, groups=inplanes, padding=1, bias=False)
+ # self.pointwise = nn.Conv2d(inplanes, planes, 1, bias=False)
+
+ self.pointwise = nn.Sequential(
+ nn.Conv2d(inplanes, planes, 1, bias=False),
+ nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ )
+ self.downsample = downsample
+ self.stride = stride
+ try:
+ self.act = nn.Hardswish() if act else nn.Identity()
+ except:
+ self.act = nn.Identity()
+
+ def forward(self, x):
+ #residual = x
+
+ out = self.depthwise(x)
+ out = self.act(out)
+ out = self.pointwise(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+ out = self.act(out)
+
+ return out
+
+
+
+class SharpenConv(nn.Module):
+ # SharpenConv convolution
+ def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
+ super(SharpenConv, self).__init__()
+ sobel_kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32')
+ kenel_weight = np.vstack([sobel_kernel]*c2*c1).reshape(c2,c1,3,3)
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+ self.conv.weight.data = torch.from_numpy(kenel_weight)
+ self.conv.weight.requires_grad = False
+ self.bn = nn.BatchNorm2d(c2)
+ try:
+ self.act = nn.Hardswish() if act else nn.Identity()
+ except:
+ self.act = nn.Identity()
+
+ def forward(self, x):
+ return self.act(self.bn(self.conv(x)))
+
+ def fuseforward(self, x):
+ return self.act(self.conv(x))
+
+
+class Conv(nn.Module):
+ # Standard convolution
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
+ super(Conv, self).__init__()
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+ self.bn = nn.BatchNorm2d(c2)
+ try:
+ self.act = nn.Hardswish() if act else nn.Identity()
+ except:
+ self.act = nn.Identity()
+
+ def forward(self, x):
+ return self.act(self.bn(self.conv(x)))
+
+ def fuseforward(self, x):
+ return self.act(self.conv(x))
+
+
+class Bottleneck(nn.Module):
+ # Standard bottleneck
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
+ super(Bottleneck, self).__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
+ self.add = shortcut and c1 == c2
+
+ def forward(self, x):
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
+ super(BottleneckCSP, self).__init__()
+ c_ = int(c2 * e) # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
+ self.act = nn.LeakyReLU(0.1, inplace=True)
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+ def forward(self, x):
+ y1 = self.cv3(self.m(self.cv1(x)))
+ y2 = self.cv2(x)
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class SPP(nn.Module):
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
+ def __init__(self, c1, c2, k=(5, 9, 13)):
+ super(SPP, self).__init__()
+ c_ = c1 // 2 # hidden channels
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+ def forward(self, x):
+ x = self.cv1(x)
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class Focus(nn.Module):
+ # Focus wh information into c-space
+ # slice concat conv
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
+ super(Focus, self).__init__()
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+ return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+
+
+class Concat(nn.Module):
+ # Concatenate a list of tensors along dimension
+ def __init__(self, dimension=1):
+ super(Concat, self).__init__()
+ self.d = dimension
+
+ def forward(self, x):
+ """ print("***********************")
+ for f in x:
+ print(f.shape) """
+ return torch.cat(x, self.d)
+
+
+class Detect(nn.Module):
+ stride = None # strides computed during build
+
+ def __init__(self, nc=13, anchors=(), ch=()): # detection layer
+ super(Detect, self).__init__()
+ self.nc = nc # number of classes
+ self.no = nc + 5 # number of outputs per anchor 85
+ self.nl = len(anchors) # number of detection layers 3
+ self.na = len(anchors[0]) // 2 # number of anchors 3
+ self.grid = [torch.zeros(1)] * self.nl # init grid
+ a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+ self.register_buffer('anchors', a) # shape(nl,na,2)
+ self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
+
+ def forward(self, x):
+ z = [] # inference output
+ for i in range(self.nl):
+ x[i] = self.m[i](x[i]) # conv
+ # print(str(i)+str(x[i].shape))
+ bs, _, ny, nx = x[i].shape # x(bs,255,w,w) to x(bs,3,w,w,85)
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+ # print(str(i)+str(x[i].shape))
+
+ if not self.training: # inference
+ if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+ self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+ y = x[i].sigmoid()
+ #print("**")
+ #print(y.shape) #[1, 3, w, h, 85]
+ #print(self.grid[i].shape) #[1, 3, w, h, 2]
+ y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
+ """print("**")
+ print(y.shape) #[1, 3, w, h, 85]
+ print(y.view(bs, -1, self.no).shape) #[1, 3*w*h, 85]"""
+ z.append(y.view(bs, -1, self.no))
+ return x if self.training else (torch.cat(z, 1), x)
+
+ @staticmethod
+ def _make_grid(nx=20, ny=20):
+
+ yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+ return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+
+
+"""class Detections:
+ # detections class for YOLOv5 inference results
+ def __init__(self, imgs, pred, names=None):
+ super(Detections, self).__init__()
+ d = pred[0].device # device
+ gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
+ self.imgs = imgs # list of images as numpy arrays
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
+ self.names = names # class names
+ self.xyxy = pred # xyxy pixels
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
+ self.n = len(self.pred)
+
+ def display(self, pprint=False, show=False, save=False):
+ colors = color_list()
+ for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
+ str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
+ if pred is not None:
+ for c in pred[:, -1].unique():
+ n = (pred[:, -1] == c).sum() # detections per class
+ str += f'{n} {self.names[int(c)]}s, ' # add to string
+ if show or save:
+ img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
+ for *box, conf, cls in pred: # xyxy, confidence, class
+ # str += '%s %.2f, ' % (names[int(cls)], conf) # label
+ ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot
+ if save:
+ f = f'results{i}.jpg'
+ str += f"saved to '{f}'"
+ img.save(f) # save
+ if show:
+ img.show(f'Image {i}') # show
+ if pprint:
+ print(str)
+
+ def print(self):
+ self.display(pprint=True) # print results
+
+ def show(self):
+ self.display(show=True) # show results
+
+ def save(self):
+ self.display(save=True) # save results
+
+ def __len__(self):
+ return self.n
+
+ def tolist(self):
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
+ x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)]
+ for d in x:
+ for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
+ setattr(d, k, getattr(d, k)[0]) # pop out of list"""
\ No newline at end of file
diff --git a/lib/models/light.py b/lib/models/light.py
new file mode 100644
index 0000000000000000000000000000000000000000..13a96ad894e7d840b9375d41d6ba37e194617225
--- /dev/null
+++ b/lib/models/light.py
@@ -0,0 +1,496 @@
+import torch
+from torch import tensor
+import torch.nn as nn
+import sys,os
+import math
+import sys
+sys.path.append(os.getcwd())
+from lib.utils import initialize_weights
+# from lib.models.common2 import DepthSeperabelConv2d as Conv
+# from lib.models.common2 import SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect
+from lib.models.common import Conv, SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect
+from torch.nn import Upsample
+from lib.utils import check_anchor_order
+from lib.core.evaluate import SegmentationMetric
+from lib.utils.utils import time_synchronized
+
+CSPDarknet_s = [
+[ -1, Focus, [3, 32, 3]],
+[ -1, Conv, [32, 64, 3, 2]],
+[ -1, BottleneckCSP, [64, 64, 1]],
+[ -1, Conv, [64, 128, 3, 2]],
+[ -1, BottleneckCSP, [128, 128, 3]],
+[ -1, Conv, [128, 256, 3, 2]],
+[ -1, BottleneckCSP, [256, 256, 3]],
+[ -1, Conv, [256, 512, 3, 2]],
+[ -1, SPP, [512, 512, [5, 9, 13]]],
+[ -1, BottleneckCSP, [512, 512, 1, False]]
+]
+
+# MCnet = [
+# [ -1, Focus, [3, 32, 3]],
+# [ -1, Conv, [32, 64, 3, 2]],
+# [ -1, BottleneckCSP, [64, 64, 1]],
+# [ -1, Conv, [64, 128, 3, 2]],
+# [ -1, BottleneckCSP, [128, 128, 3]],
+# [ -1, Conv, [128, 256, 3, 2]],
+# [ -1, BottleneckCSP, [256, 256, 3]],
+# [ -1, Conv, [256, 512, 3, 2]],
+# [ -1, SPP, [512, 512, [5, 9, 13]]],
+# [ -1, BottleneckCSP, [512, 512, 1, False]],
+# [ -1, Conv,[512, 256, 1, 1]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ [-1, 6], Concat, [1]],
+# [ -1, BottleneckCSP, [512, 256, 1, False]],
+# [ -1, Conv, [256, 128, 1, 1]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ [-1,4], Concat, [1]],
+# [ -1, BottleneckCSP, [256, 128, 1, False]],
+# [ -1, Conv, [128, 128, 3, 2]],
+# [ [-1, 14], Concat, [1]],
+# [ -1, BottleneckCSP, [256, 256, 1, False]],
+# [ -1, Conv, [256, 256, 3, 2]],
+# [ [-1, 10], Concat, [1]],
+# [ -1, BottleneckCSP, [512, 512, 1, False]],
+# [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
+# [ 17, Conv, [128, 64, 3, 1]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ [-1,2], Concat, [1]],
+# [ -1, BottleneckCSP, [128, 64, 1, False]],
+# [ -1, Conv, [64, 32, 3, 1]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ -1, Conv, [32, 16, 3, 1]],
+# [ -1, BottleneckCSP, [16, 8, 1, False]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ -1, Conv, [8, 2, 3, 1]] #segmentation output
+# ]
+
+MCnet_SPP = [
+[ -1, Focus, [3, 32, 3]],
+[ -1, Conv, [32, 64, 3, 2]],
+[ -1, BottleneckCSP, [64, 64, 1]],
+[ -1, Conv, [64, 128, 3, 2]],
+[ -1, BottleneckCSP, [128, 128, 3]],
+[ -1, Conv, [128, 256, 3, 2]],
+[ -1, BottleneckCSP, [256, 256, 3]],
+[ -1, Conv, [256, 512, 3, 2]],
+[ -1, SPP, [512, 512, [5, 9, 13]]],
+[ -1, BottleneckCSP, [512, 512, 1, False]],
+[ -1, Conv,[512, 256, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1, 6], Concat, [1]],
+[ -1, BottleneckCSP, [512, 256, 1, False]],
+[ -1, Conv, [256, 128, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,4], Concat, [1]],
+[ -1, BottleneckCSP, [256, 128, 1, False]],
+[ -1, Conv, [128, 128, 3, 2]],
+[ [-1, 14], Concat, [1]],
+[ -1, BottleneckCSP, [256, 256, 1, False]],
+[ -1, Conv, [256, 256, 3, 2]],
+[ [-1, 10], Concat, [1]],
+[ -1, BottleneckCSP, [512, 512, 1, False]],
+# [ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
+[ [17, 20, 23], Detect, [13, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]],
+[ 17, Conv, [128, 64, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,2], Concat, [1]],
+[ -1, BottleneckCSP, [128, 64, 1, False]],
+[ -1, Conv, [64, 32, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, Conv, [32, 16, 3, 1]],
+[ -1, BottleneckCSP, [16, 8, 1, False]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ -1, SPP, [8, 2, [5, 9, 13]]] #segmentation output
+]
+# [2,6,3,9,5,13], [7,19,11,26,17,39], [28,64,44,103,61,183]
+MCnet_fast = [
+[ -1, Focus, [3, 32, 3]],#0
+[ -1, Conv, [32, 64, 3, 2]],#1
+[ -1, BottleneckCSP, [64, 128, 1, True, True]],#2
+[ -1, BottleneckCSP, [128, 256, 1, True, True]],#4
+[ -1, BottleneckCSP, [256, 512, 1, True, True]],#6
+[ -1, SPP, [512, 512, [5, 9, 13]]],#8
+[ -1, BottleneckCSP, [512, 512, 1, False]],#9
+[ -1, Conv,[512, 256, 1, 1]],#10
+[ -1, Upsample, [None, 2, 'nearest']],#11
+[ [-1, 6], Concat, [1]],#12
+[ -1, BottleneckCSP, [512, 256, 1, False]],#13
+[ -1, Conv, [256, 128, 1, 1]],#14
+[ -1, Upsample, [None, 2, 'nearest']],#15
+[ [-1,4], Concat, [1]],#16
+[ -1, BottleneckCSP, [256, 128, 1, False, True]],#17
+[ [-1, 14], Concat, [1]],#19
+[ -1, BottleneckCSP, [256, 256, 1, False, True]],#20
+[ [-1, 10], Concat, [1]],#22
+[ -1, BottleneckCSP, [512, 512, 1, False]],#23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 64, 3, 1]],#25
+[ -1, Upsample, [None, 2, 'nearest']],#26
+[ [-1,2], Concat, [1]],#27
+[ -1, BottleneckCSP, [128, 32, 1, False]],#28
+# [ -1, Conv, [64, 32, 1, 1]],#29
+[ -1, Upsample, [None, 2, 'nearest']],#30
+# [ -1, Conv, [32, 16, 1, 1]],#31
+[ -1, BottleneckCSP, [32, 8, 1, False]],#32
+[ -1, Upsample, [None, 2, 'nearest']],#33
+[ -1, Conv, [8, 2, 1, 1]], #Driving area segmentation output#34
+
+[ 16, Conv, [256, 64, 3, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+[ [-1,2], Concat, [1]],
+[ -1, BottleneckCSP, [128, 32, 1, False]],
+# [ -1, Conv, [64, 32, 1, 1]],
+[ -1, Upsample, [None, 2, 'nearest']],
+# [ -1, Conv, [32, 16, 1, 1]],
+[ 31, BottleneckCSP, [32, 8, 1, False]],#35
+[ -1, Upsample, [None, 2, 'nearest']],#36
+[ -1, Conv, [8, 2, 1, 1]], #Lane line segmentation output #37
+]
+
+MCnet_light = [
+[ -1, Focus, [3, 32, 3]],#0
+[ -1, Conv, [32, 64, 3, 2]],#1
+[ -1, BottleneckCSP, [64, 64, 1]],#2
+[ -1, Conv, [64, 128, 3, 2]],#3
+[ -1, BottleneckCSP, [128, 128, 3]],#4
+[ -1, Conv, [128, 256, 3, 2]],#5
+[ -1, BottleneckCSP, [256, 256, 3]],#6
+[ -1, Conv, [256, 512, 3, 2]],#7
+[ -1, SPP, [512, 512, [5, 9, 13]]],#8
+[ -1, BottleneckCSP, [512, 512, 1, False]],#9
+[ -1, Conv,[512, 256, 1, 1]],#10
+[ -1, Upsample, [None, 2, 'nearest']],#11
+[ [-1, 6], Concat, [1]],#12
+[ -1, BottleneckCSP, [512, 256, 1, False]],#13
+[ -1, Conv, [256, 128, 1, 1]],#14
+[ -1, Upsample, [None, 2, 'nearest']],#15
+[ [-1,4], Concat, [1]],#16
+[ -1, BottleneckCSP, [256, 128, 1, False]],#17
+[ -1, Conv, [128, 128, 3, 2]],#18
+[ [-1, 14], Concat, [1]],#19
+[ -1, BottleneckCSP, [256, 256, 1, False]],#20
+[ -1, Conv, [256, 256, 3, 2]],#21
+[ [-1, 10], Concat, [1]],#22
+[ -1, BottleneckCSP, [512, 512, 1, False]],#23
+[ [17, 20, 23], Detect, [1, [[4,12,6,18,10,27], [15,38,24,59,39,78], [51,125,73,168,97,292]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 128, 3, 1]],#25
+[ -1, Upsample, [None, 2, 'nearest']],#26
+# [ [-1,2], Concat, [1]],#27
+[ -1, BottleneckCSP, [128, 64, 1, False]],#27
+[ -1, Conv, [64, 32, 3, 1]],#28
+[ -1, Upsample, [None, 2, 'nearest']],#29
+[ -1, Conv, [32, 16, 3, 1]],#30
+[ -1, BottleneckCSP, [16, 8, 1, False]],#31
+[ -1, Upsample, [None, 2, 'nearest']],#32
+[ -1, Conv, [8, 3, 3, 1]], #Driving area segmentation output#33
+
+# [ 16, Conv, [128, 64, 3, 1]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ [-1,2], Concat, [1]],
+# [ -1, BottleneckCSP, [128, 64, 1, False]],
+# [ -1, Conv, [64, 32, 3, 1]],
+# [ -1, Upsample, [None, 2, 'nearest']],
+# [ -1, Conv, [32, 16, 3, 1]],
+[ 30, BottleneckCSP, [16, 8, 1, False]],#34
+[ -1, Upsample, [None, 2, 'nearest']],#35
+[ -1, Conv, [8, 2, 3, 1]], #Lane line segmentation output #36
+]
+
+
+# The lane line and the driving area segment branches share information with each other
+MCnet_share = [
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [1, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 64, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ [-1,2], Concat, [1]], #27
+[ -1, BottleneckCSP, [128, 64, 1, False]], #28
+[ -1, Conv, [64, 32, 3, 1]], #29
+[ -1, Upsample, [None, 2, 'nearest']], #30
+[ -1, Conv, [32, 16, 3, 1]], #31
+[ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
+
+[ 16, Conv, [256, 64, 3, 1]], #33
+[ -1, Upsample, [None, 2, 'nearest']], #34
+[ [-1,2], Concat, [1]], #35
+[ -1, BottleneckCSP, [128, 64, 1, False]], #36
+[ -1, Conv, [64, 32, 3, 1]], #37
+[ -1, Upsample, [None, 2, 'nearest']], #38
+[ -1, Conv, [32, 16, 3, 1]], #39
+[ -1, BottleneckCSP, [16, 8, 1, False]], #40 lane line segment neck
+
+[ [31,39], Concat, [1]], #41
+[ -1, Conv, [32, 8, 3, 1]], #42 Share_Block
+
+
+[ [32,42], Concat, [1]], #43
+[ -1, Upsample, [None, 2, 'nearest']], #44
+[ -1, Conv, [16, 2, 3, 1]], #45 Driving area segmentation output
+
+
+[ [40,42], Concat, [1]], #46
+[ -1, Upsample, [None, 2, 'nearest']], #47
+[ -1, Conv, [16, 2, 3, 1]] #48Lane line segmentation output
+]
+
+# The lane line and the driving area segment branches without share information with each other
+MCnet_no_share = [
+[ -1, Focus, [3, 32, 3]], #0
+[ -1, Conv, [32, 64, 3, 2]], #1
+[ -1, BottleneckCSP, [64, 64, 1]], #2
+[ -1, Conv, [64, 128, 3, 2]], #3
+[ -1, BottleneckCSP, [128, 128, 3]], #4
+[ -1, Conv, [128, 256, 3, 2]], #5
+[ -1, BottleneckCSP, [256, 256, 3]], #6
+[ -1, Conv, [256, 512, 3, 2]], #7
+[ -1, SPP, [512, 512, [5, 9, 13]]], #8
+[ -1, BottleneckCSP, [512, 512, 1, False]], #9
+[ -1, Conv,[512, 256, 1, 1]], #10
+[ -1, Upsample, [None, 2, 'nearest']], #11
+[ [-1, 6], Concat, [1]], #12
+[ -1, BottleneckCSP, [512, 256, 1, False]], #13
+[ -1, Conv, [256, 128, 1, 1]], #14
+[ -1, Upsample, [None, 2, 'nearest']], #15
+[ [-1,4], Concat, [1]], #16
+[ -1, BottleneckCSP, [256, 128, 1, False]], #17
+[ -1, Conv, [128, 128, 3, 2]], #18
+[ [-1, 14], Concat, [1]], #19
+[ -1, BottleneckCSP, [256, 256, 1, False]], #20
+[ -1, Conv, [256, 256, 3, 2]], #21
+[ [-1, 10], Concat, [1]], #22
+[ -1, BottleneckCSP, [512, 512, 1, False]], #23
+[ [17, 20, 23], Detect, [13, [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]], [128, 256, 512]]], #Detect output 24
+
+[ 16, Conv, [256, 64, 3, 1]], #25
+[ -1, Upsample, [None, 2, 'nearest']], #26
+[ [-1,2], Concat, [1]], #27
+[ -1, BottleneckCSP, [128, 64, 1, False]], #28
+[ -1, Conv, [64, 32, 3, 1]], #29
+[ -1, Upsample, [None, 2, 'nearest']], #30
+[ -1, Conv, [32, 16, 3, 1]], #31
+[ -1, BottleneckCSP, [16, 8, 1, False]], #32 driving area segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #33
+[ -1, Conv, [8, 3, 3, 1]], #34 Driving area segmentation output
+
+[ 16, Conv, [256, 64, 3, 1]], #35
+[ -1, Upsample, [None, 2, 'nearest']], #36
+[ [-1,2], Concat, [1]], #37
+[ -1, BottleneckCSP, [128, 64, 1, False]], #38
+[ -1, Conv, [64, 32, 3, 1]], #39
+[ -1, Upsample, [None, 2, 'nearest']], #40
+[ -1, Conv, [32, 16, 3, 1]], #41
+[ -1, BottleneckCSP, [16, 8, 1, False]], #42 lane line segment neck
+[ -1, Upsample, [None, 2, 'nearest']], #43
+[ -1, Conv, [8, 2, 3, 1]] #44 Lane line segmentation output
+]
+
+
+
+class MCnet(nn.Module):
+ def __init__(self, block_cfg, **kwargs):
+ super(MCnet, self).__init__()
+ layers, save= [], []
+ self.nc = 13
+ self.detector_index = -1
+ self.Da_out_idx = 45 if len(block_cfg)==49 else 34
+ # self.Da_out_idx = 37
+
+ # Build model
+ # print(block_cfg)
+ for i, (from_, block, args) in enumerate(block_cfg):
+ block = eval(block) if isinstance(block, str) else block # eval strings
+ if block is Detect:
+ self.detector_index = i
+ block_ = block(*args)
+ block_.index, block_.from_ = i, from_
+ layers.append(block_)
+ save.extend(x % i for x in ([from_] if isinstance(from_, int) else from_) if x != -1) # append to savelist
+ self.model, self.save = nn.Sequential(*layers), sorted(save)
+ self.names = [str(i) for i in range(self.nc)]
+
+ # set stride、anchor for detector
+ Detector = self.model[self.detector_index] # detector
+ if isinstance(Detector, Detect):
+ s = 128 # 2x min stride
+ # for x in self.forward(torch.zeros(1, 3, s, s)):
+ # print (x.shape)
+ with torch.no_grad():
+ detects, _, _= self.forward(torch.zeros(1, 3, s, s))
+ Detector.stride = torch.tensor([s / x.shape[-2] for x in detects]) # forward
+ # print("stride"+str(Detector.stride ))
+ Detector.anchors /= Detector.stride.view(-1, 1, 1) # Set the anchors for the corresponding scale
+ check_anchor_order(Detector)
+ self.stride = Detector.stride
+ self._initialize_biases()
+
+ initialize_weights(self)
+
+ def forward(self, x):
+ cache = []
+ out = []
+ #times = []
+ for i, block in enumerate(self.model):
+ #t0 = time_synchronized()
+ if block.from_ != -1:
+ x = cache[block.from_] if isinstance(block.from_, int) else [x if j == -1 else cache[j] for j in block.from_] #calculate concat detect
+ x = block(x)
+ if isinstance(block, Detect): # save detector result
+ out.append(x)
+ if i == self.Da_out_idx: #save driving area segment result
+ m=nn.Sigmoid()
+ out.append(m(x))
+ cache.append(x if block.index in self.save else None)
+ """t1 = time_synchronized()
+ print(str(i) + " : " + str(t1-t0))
+ times.append(t1-t0)
+ print(sum(times[:25]))
+ print(sum(times[25:33]))
+ print(sum(times[33:41]))
+ print(sum(times[41:43]))
+ print(sum(times[43:46]))
+ print(sum(times[46:]))"""
+ m=nn.Sigmoid()
+ out.append(m(x))
+ return out
+
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
+ # https://arxiv.org/abs/1708.02002 section 3.3
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+ # m = self.model[-1] # Detect() module
+ m = self.model[self.detector_index] # Detect() module
+ for mi, s in zip(m.m, m.stride): # from
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
+ b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
+ b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+class CSPDarknet(nn.Module):
+ def __init__(self, block_cfg, **kwargs):
+ super(CSPDarknet, self).__init__()
+ layers, save= [], []
+ # self.nc = 13 #output category num
+ self.nc = 1
+ self.detector_index = -1
+
+ # Build model
+ for i, (from_, block, args) in enumerate(block_cfg):
+ block = eval(block) if isinstance(block, str) else block # eval strings
+ if block is Detect:
+ self.detector_index = i
+ block_ = block(*args)
+ block_.index, block_.from_ = i, from_
+ layers.append(block_)
+ save.extend(x % i for x in ([from_] if isinstance(from_, int) else from_) if x != -1) # append to savelist
+ self.model, self.save = nn.Sequential(*layers), sorted(save)
+ self.names = [str(i) for i in range(self.nc)]
+
+ # set stride、anchor for detector
+ Detector = self.model[self.detector_index] # detector
+ if isinstance(Detector, Detect):
+ s = 128 # 2x min stride
+ # for x in self.forward(torch.zeros(1, 3, s, s)):
+ # print (x.shape)
+ with torch.no_grad():
+ detects, _ = self.forward(torch.zeros(1, 3, s, s))
+ Detector.stride = torch.tensor([s / x.shape[-2] for x in detects]) # forward
+ # print("stride"+str(Detector.stride ))
+ Detector.anchors /= Detector.stride.view(-1, 1, 1) # Set the anchors for the corresponding scale
+ check_anchor_order(Detector)
+ self.stride = Detector.stride
+ self._initialize_biases()
+
+ initialize_weights(self)
+
+ def forward(self, x):
+ cache = []
+ out = []
+ for i, block in enumerate(self.model):
+ if block.from_ != -1:
+ x = cache[block.from_] if isinstance(block.from_, int) else [x if j == -1 else cache[j] for j in block.from_] #calculate concat detect
+ start = time.time()
+ x = block(x)
+ end = time.time()
+ print(start-end)
+ """y = None if isinstance(x, list) else x.shape"""
+ if isinstance(block, Detect): # save detector result
+ out.append(x)
+ cache.append(x if block.index in self.save else None)
+ m=nn.Sigmoid()
+ out.append(m(x))
+ # out.append(x)
+ # print(out[0][0].shape, out[0][1].shape, out[0][2].shape)
+ return out
+
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
+ # https://arxiv.org/abs/1708.02002 section 3.3
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+ # m = self.model[-1] # Detect() module
+ m = self.model[self.detector_index] # Detect() module
+ for mi, s in zip(m.m, m.stride): # from
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
+ b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
+ b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+
+def get_net(cfg, **kwargs):
+ # m_block_cfg = MCnet_share if cfg.MODEL.STRU_WITHSHARE else MCnet_no_share
+ m_block_cfg = MCnet_no_share
+ model = MCnet(m_block_cfg, **kwargs)
+ return model
+
+
+if __name__ == "__main__":
+ from torch.utils.tensorboard import SummaryWriter
+ model = get_net(False)
+ input_ = torch.randn((1, 3, 256, 256))
+ gt_ = torch.rand((1, 2, 256, 256))
+ metric = SegmentationMetric(2)
+
+ detects, dring_area_seg, lane_line_seg = model(input_)
+ for det in detects:
+ print(det.shape)
+ print(dring_area_seg.shape)
+ print(dring_area_seg.view(-1).shape)
+ _,predict=torch.max(dring_area_seg, 1)
+ print(predict.shape)
+ print(lane_line_seg.shape)
+
+ _,lane_line_pred=torch.max(lane_line_seg, 1)
+ _,lane_line_gt=torch.max(gt_, 1)
+ metric.reset()
+ metric.addBatch(lane_line_pred.cpu(), lane_line_gt.cpu())
+ acc = metric.pixelAccuracy()
+ meanAcc = metric.meanPixelAccuracy()
+ mIoU = metric.meanIntersectionOverUnion()
+ FWIoU = metric.Frequency_Weighted_Intersection_over_Union()
+ IoU = metric.IntersectionOverUnion()
+ print(IoU)
+ print(mIoU)
\ No newline at end of file
diff --git a/lib/utils/__init__.py b/lib/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd59e9977011221ab0466eb7ca75afe9634f4ec8
--- /dev/null
+++ b/lib/utils/__init__.py
@@ -0,0 +1,4 @@
+from .utils import initialize_weights, xyxy2xywh, is_parallel, DataLoaderX, torch_distributed_zero_first, clean_str
+from .autoanchor import check_anchor_order, run_anchor, kmean_anchors
+from .augmentations import augment_hsv, random_perspective, cutout, letterbox,letterbox_for_img
+from .plot import plot_img_and_mask,plot_one_box,show_seg_result
diff --git a/lib/utils/augmentations.py b/lib/utils/augmentations.py
new file mode 100644
index 0000000000000000000000000000000000000000..707ac2b2bcc589046fab3be0547661d8925e6b59
--- /dev/null
+++ b/lib/utils/augmentations.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import cv2
+import random
+import math
+
+
+def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
+ """change color hue, saturation, value"""
+ r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
+ hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
+ dtype = img.dtype # uint8
+
+ x = np.arange(0, 256, dtype=np.int16)
+ lut_hue = ((x * r[0]) % 180).astype(dtype)
+ lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+ lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+ img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
+ cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
+
+ # Histogram equalization
+ # if random.random() < 0.2:
+ # for i in range(3):
+ # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
+
+
+def random_perspective(combination, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
+ """combination of img transform"""
+ # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+ # targets = [cls, xyxy]
+ img, gray, line = combination
+ height = img.shape[0] + border[0] * 2 # shape(h,w,c)
+ width = img.shape[1] + border[1] * 2
+
+ # Center
+ C = np.eye(3)
+ C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
+ C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
+
+ # Perspective
+ P = np.eye(3)
+ P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
+ P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
+
+ # Rotation and Scale
+ R = np.eye(3)
+ a = random.uniform(-degrees, degrees)
+ # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
+ s = random.uniform(1 - scale, 1 + scale)
+ # s = 2 ** random.uniform(-scale, scale)
+ R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+ # Shear
+ S = np.eye(3)
+ S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
+ S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
+
+ # Translation
+ T = np.eye(3)
+ T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
+ T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
+
+ # Combined rotation matrix
+ M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
+ if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
+ if perspective:
+ img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
+ gray = cv2.warpPerspective(gray, M, dsize=(width, height), borderValue=0)
+ line = cv2.warpPerspective(line, M, dsize=(width, height), borderValue=0)
+ else: # affine
+ img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+ gray = cv2.warpAffine(gray, M[:2], dsize=(width, height), borderValue=0)
+ line = cv2.warpAffine(line, M[:2], dsize=(width, height), borderValue=0)
+
+ # Visualize
+ # import matplotlib.pyplot as plt
+ # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
+ # ax[0].imshow(img[:, :, ::-1]) # base
+ # ax[1].imshow(img2[:, :, ::-1]) # warped
+
+ # Transform label coordinates
+ n = len(targets)
+ if n:
+ # warp points
+ xy = np.ones((n * 4, 3))
+ xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
+ xy = xy @ M.T # transform
+ if perspective:
+ xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
+ else: # affine
+ xy = xy[:, :2].reshape(n, 8)
+
+ # create new boxes
+ x = xy[:, [0, 2, 4, 6]]
+ y = xy[:, [1, 3, 5, 7]]
+ xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+ # # apply angle-based reduction of bounding boxes
+ # radians = a * math.pi / 180
+ # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+ # x = (xy[:, 2] + xy[:, 0]) / 2
+ # y = (xy[:, 3] + xy[:, 1]) / 2
+ # w = (xy[:, 2] - xy[:, 0]) * reduction
+ # h = (xy[:, 3] - xy[:, 1]) * reduction
+ # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+ # clip boxes
+ xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
+ xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
+
+ # filter candidates
+ i = _box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
+ targets = targets[i]
+ targets[:, 1:5] = xy[i]
+
+ combination = (img, gray, line)
+ return combination, targets
+
+
+def cutout(combination, labels):
+ # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
+ image, gray = combination
+ h, w = image.shape[:2]
+
+ def bbox_ioa(box1, box2):
+ # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
+ box2 = box2.transpose()
+
+ # Get the coordinates of bounding boxes
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+
+ # Intersection area
+ inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
+ (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
+
+ # box2 area
+ box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
+
+ # Intersection over box2 area
+ return inter_area / box2_area
+
+ # create random masks
+ scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
+ for s in scales:
+ mask_h = random.randint(1, int(h * s))
+ mask_w = random.randint(1, int(w * s))
+
+ # box
+ xmin = max(0, random.randint(0, w) - mask_w // 2)
+ ymin = max(0, random.randint(0, h) - mask_h // 2)
+ xmax = min(w, xmin + mask_w)
+ ymax = min(h, ymin + mask_h)
+ # print('xmin:{},ymin:{},xmax:{},ymax:{}'.format(xmin,ymin,xmax,ymax))
+
+ # apply random color mask
+ image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+ gray[ymin:ymax, xmin:xmax] = -1
+
+ # return unobscured labels
+ if len(labels) and s > 0.03:
+ box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+ ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
+ labels = labels[ioa < 0.60] # remove >60% obscured labels
+
+ return image, gray, labels
+
+
+def letterbox(combination, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
+ """Resize the input image and automatically padding to suitable shape :https://zhuanlan.zhihu.com/p/172121380"""
+ # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+ img, gray, line = combination
+ shape = img.shape[:2] # current shape [height, width]
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+
+ # Scale ratio (new / old)
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+ if not scaleup: # only scale down, do not scale up (for better test mAP)
+ r = min(r, 1.0)
+
+ # Compute padding
+ ratio = r, r # width, height ratios
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
+ if auto: # minimum rectangle
+ dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
+ elif scaleFill: # stretch
+ dw, dh = 0.0, 0.0
+ new_unpad = (new_shape[1], new_shape[0])
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
+
+ dw /= 2 # divide padding into 2 sides
+ dh /= 2
+
+ if shape[::-1] != new_unpad: # resize
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+ gray = cv2.resize(gray, new_unpad, interpolation=cv2.INTER_LINEAR)
+ line = cv2.resize(line, new_unpad, interpolation=cv2.INTER_LINEAR)
+
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
+ gray = cv2.copyMakeBorder(gray, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0) # add border
+ line = cv2.copyMakeBorder(line, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0) # add border
+ # print(img.shape)
+
+ combination = (img, gray, line)
+ return combination, ratio, (dw, dh)
+
+def letterbox_for_img(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
+ # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+ shape = img.shape[:2] # current shape [height, width]
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+
+ # Scale ratio (new / old)
+
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+ if not scaleup: # only scale down, do not scale up (for better test mAP)
+ r = min(r, 1.0)
+
+ # Compute padding
+ ratio = r, r # width, height ratios
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
+ if auto: # minimum rectangle
+ dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
+ elif scaleFill: # stretch
+ dw, dh = 0.0, 0.0
+ new_unpad = (new_shape[1], new_shape[0])
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
+
+ dw /= 2 # divide padding into 2 sides
+ dh /= 2
+ if shape[::-1] != new_unpad: # resize
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_AREA)
+
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
+ return img, ratio, (dw, dh)
+
+
+def _box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
+ # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+ w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+ w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+ ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
+ return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
diff --git a/lib/utils/autoanchor.py b/lib/utils/autoanchor.py
new file mode 100644
index 0000000000000000000000000000000000000000..7820153eacb1ca1cc86f881ab1cddd1e601f0856
--- /dev/null
+++ b/lib/utils/autoanchor.py
@@ -0,0 +1,134 @@
+# Auto-anchor utils
+
+import numpy as np
+import torch
+import yaml
+from scipy.cluster.vq import kmeans
+from tqdm import tqdm
+from lib.utils import is_parallel
+
+
+def check_anchor_order(m):
+ # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
+ a = m.anchor_grid.prod(-1).view(-1) # anchor area
+ da = a[-1] - a[0] # delta a
+ ds = m.stride[-1] - m.stride[0] # delta s
+ if da.sign() != ds.sign(): # same order
+ print('Reversing anchor order')
+ m.anchors[:] = m.anchors.flip(0)
+ m.anchor_grid[:] = m.anchor_grid.flip(0)
+
+
+def run_anchor(logger,dataset, model, thr=4.0, imgsz=640):
+ det = model.module.model[model.module.detector_index] if is_parallel(model) \
+ else model.model[model.detector_index]
+ anchor_num = det.na * det.nl
+ new_anchors = kmean_anchors(dataset, n=anchor_num, img_size=imgsz, thr=thr, gen=1000, verbose=False)
+ new_anchors = torch.tensor(new_anchors, device=det.anchors.device).type_as(det.anchors)
+ det.anchor_grid[:] = new_anchors.clone().view_as(det.anchor_grid) # for inference
+ det.anchors[:] = new_anchors.clone().view_as(det.anchors) / det.stride.to(det.anchors.device).view(-1, 1, 1) # loss
+ check_anchor_order(det)
+ logger.info(str(det.anchors))
+ print('New anchors saved to model. Update model config to use these anchors in the future.')
+
+
+def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
+ """ Creates kmeans-evolved anchors from training dataset
+
+ Arguments:
+ path: path to dataset *.yaml, or a loaded dataset
+ n: number of anchors
+ img_size: image size used for training
+ thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
+ gen: generations to evolve anchors using genetic algorithm
+ verbose: print all results
+
+ Return:
+ k: kmeans evolved anchors
+
+ Usage:
+ from utils.autoanchor import *; _ = kmean_anchors()
+ """
+ thr = 1. / thr
+
+ def metric(k, wh): # compute metrics
+ r = wh[:, None] / k[None]
+ x = torch.min(r, 1. / r).min(2)[0] # ratio metric
+ # x = wh_iou(wh, torch.tensor(k)) # iou metric
+ return x, x.max(1)[0] # x, best_x
+
+ def anchor_fitness(k): # mutation fitness
+ _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
+ return (best * (best > thr).float()).mean() # fitness
+
+ def print_results(k):
+ k = k[np.argsort(k.prod(1))] # sort small to large
+ x, best = metric(k, wh0)
+ bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
+ print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
+ print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
+ (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
+ for i, x in enumerate(k):
+ print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
+ return k
+
+ if isinstance(path, str): # not class
+ raise TypeError('Dataset must be class, but found str')
+ else:
+ dataset = path # dataset
+
+ labels = [db['label'] for db in dataset.db]
+ labels = np.vstack(labels)
+ if not (labels[:, 1:] <= 1).all():
+ # normalize label
+ labels[:, [2, 4]] /= dataset.shapes[0]
+ labels[:, [1, 3]] /= dataset.shapes[1]
+ # Get label wh
+ shapes = img_size * dataset.shapes / dataset.shapes.max()
+ # wh0 = np.concatenate([l[:, 3:5] * shapes for l in labels]) # wh
+ wh0 = labels[:, 3:5] * shapes
+ # Filter
+ i = (wh0 < 3.0).any(1).sum()
+ if i:
+ print('WARNING: Extremely small objects found. '
+ '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
+ wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
+
+ # Kmeans calculation
+ print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
+ s = wh.std(0) # sigmas for whitening
+ k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
+ k *= s
+ wh = torch.tensor(wh, dtype=torch.float32) # filtered
+ wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
+ k = print_results(k)
+
+ # Plot
+ # k, d = [None] * 20, [None] * 20
+ # for i in tqdm(range(1, 21)):
+ # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
+ # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
+ # ax = ax.ravel()
+ # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
+ # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
+ # ax[0].hist(wh[wh[:, 0]<100, 0],400)
+ # ax[1].hist(wh[wh[:, 1]<100, 1],400)
+ # fig.savefig('wh.png', dpi=200)
+
+ # Evolve
+ npr = np.random
+ f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
+ pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar
+ for _ in pbar:
+ v = np.ones(sh)
+ while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
+ v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
+ kg = (k.copy() * v).clip(min=2.0)
+ fg = anchor_fitness(kg)
+ if fg > f:
+ f, k = fg, kg.copy()
+ pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
+ if verbose:
+ print_results(k)
+
+ return print_results(k)
diff --git a/lib/utils/plot.py b/lib/utils/plot.py
new file mode 100644
index 0000000000000000000000000000000000000000..b759ef4f731bd8fee1aa8894dab31508adafd34e
--- /dev/null
+++ b/lib/utils/plot.py
@@ -0,0 +1,113 @@
+## 处理pred结果的.json文件,画图
+import matplotlib.pyplot as plt
+import cv2
+import numpy as np
+import random
+
+
+def plot_img_and_mask(img, mask, index,epoch,save_dir):
+ classes = mask.shape[2] if len(mask.shape) > 2 else 1
+ fig, ax = plt.subplots(1, classes + 1)
+ ax[0].set_title('Input image')
+ ax[0].imshow(img)
+ if classes > 1:
+ for i in range(classes):
+ ax[i+1].set_title(f'Output mask (class {i+1})')
+ ax[i+1].imshow(mask[:, :, i])
+ else:
+ ax[1].set_title(f'Output mask')
+ ax[1].imshow(mask)
+ plt.xticks([]), plt.yticks([])
+ # plt.show()
+ plt.savefig(save_dir+"/batch_{}_{}_seg.png".format(epoch,index))
+
+def show_seg_result(img, result, index, epoch, save_dir=None, is_ll=False,palette=None,is_demo=False,is_gt=False):
+ # img = mmcv.imread(img)
+ # img = img.copy()
+ # seg = result[0]
+ if palette is None:
+ palette = np.random.randint(
+ 0, 255, size=(3, 3))
+ palette[0] = [0, 0, 0]
+ palette[1] = [0, 255, 0]
+ palette[2] = [255, 0, 0]
+ palette = np.array(palette)
+ assert palette.shape[0] == 3 # len(classes)
+ assert palette.shape[1] == 3
+ assert len(palette.shape) == 2
+
+ if not is_demo:
+ color_seg = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8)
+ for label, color in enumerate(palette):
+ color_seg[result == label, :] = color
+ else:
+ color_area = np.zeros((result[0].shape[0], result[0].shape[1], 3), dtype=np.uint8)
+
+ # for label, color in enumerate(palette):
+ # color_area[result[0] == label, :] = color
+
+ color_area[result[0] == 1] = [0, 255, 0]
+ color_area[result[1] ==1] = [255, 0, 0]
+ color_seg = color_area
+
+ # convert to BGR
+ color_seg = color_seg[..., ::-1]
+ # print(color_seg.shape)
+ color_mask = np.mean(color_seg, 2)
+ img[color_mask != 0] = img[color_mask != 0] * 0.5 + color_seg[color_mask != 0] * 0.5
+ # img = img * 0.5 + color_seg * 0.5
+ img = img.astype(np.uint8)
+ img = cv2.resize(img, (1280,720), interpolation=cv2.INTER_LINEAR)
+
+ if not is_demo:
+ if not is_gt:
+ if not is_ll:
+ cv2.imwrite(save_dir+"/batch_{}_{}_da_segresult.png".format(epoch,index), img)
+ else:
+ cv2.imwrite(save_dir+"/batch_{}_{}_ll_segresult.png".format(epoch,index), img)
+ else:
+ if not is_ll:
+ cv2.imwrite(save_dir+"/batch_{}_{}_da_seg_gt.png".format(epoch,index), img)
+ else:
+ cv2.imwrite(save_dir+"/batch_{}_{}_ll_seg_gt.png".format(epoch,index), img)
+ return img
+
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+ # Plots one bounding box on image img
+ tl = line_thickness or round(0.0001 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
+ color = color or [random.randint(0, 255) for _ in range(3)]
+ c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+ cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+ # if label:
+ # tf = max(tl - 1, 1) # font thickness
+ # t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+ # c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+ # cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
+ # print(label)
+ # cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+
+
+if __name__ == "__main__":
+ pass
+# def plot():
+# cudnn.benchmark = cfg.CUDNN.BENCHMARK
+# torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+# torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+# device = select_device(logger, batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU) if not cfg.DEBUG \
+# else select_device(logger, 'cpu')
+
+# if args.local_rank != -1:
+# assert torch.cuda.device_count() > args.local_rank
+# torch.cuda.set_device(args.local_rank)
+# device = torch.device('cuda', args.local_rank)
+# dist.init_process_group(backend='nccl', init_method='env://') # distributed backend
+
+# model = get_net(cfg).to(device)
+# model_file = '/home/zwt/DaChuang/weights/epoch--2.pth'
+# checkpoint = torch.load(model_file)
+# model.load_state_dict(checkpoint['state_dict'])
+# if rank == -1 and torch.cuda.device_count() > 1:
+# model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
+# if rank != -1:
+# model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank)
\ No newline at end of file
diff --git a/lib/utils/split_dataset.py b/lib/utils/split_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeb7432916542c78fbad632f30c285e3bd03a1d0
--- /dev/null
+++ b/lib/utils/split_dataset.py
@@ -0,0 +1,30 @@
+import random
+import shutil
+import os
+
+def split(path, mask_path, lane_path):
+ os.mkdir(path + 'train')
+ os.mkdir(path + 'val')
+ os.mkdir(mask_path + 'train')
+ os.mkdir(mask_path + 'val')
+ os.mkdir(lane_path + 'train')
+ os.mkdir(lane_path + 'val')
+ val_index = random.sample(range(660), 200)
+ for i in range(660):
+ if i in val_index:
+ shutil.move(path+'{}.png'.format(i), path + 'val')
+ shutil.move(mask_path+'{}.png'.format(i), mask_path + 'val')
+ shutil.move(lane_path+'{}.png'.format(i), lane_path + 'val')
+ else:
+ shutil.move(path+'{}.png'.format(i), path + 'train')
+ shutil.move(mask_path+'{}.png'.format(i), mask_path + 'train')
+ shutil.move(lane_path+'{}.png'.format(i), lane_path + 'train')
+
+
+if __name__ == '__main__':
+ path = "/home/wqm/bdd/data_hust/"
+ mask_path = "/home/wqm/bdd/hust_area/"
+ lane_path = "/home/wqm/bdd/hust_lane/"
+ split(path, mask_path, lane_path)
+
+
diff --git a/lib/utils/utils.py b/lib/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca095ce96fb3cdd72cda7ba78caaffeb277d288c
--- /dev/null
+++ b/lib/utils/utils.py
@@ -0,0 +1,163 @@
+import os
+import logging
+import time
+from collections import namedtuple
+from pathlib import Path
+
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import numpy as np
+from torch.utils.data import DataLoader
+from prefetch_generator import BackgroundGenerator
+from contextlib import contextmanager
+import re
+
+def clean_str(s):
+ # Cleans a string by replacing special characters with underscore _
+ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
+
+def create_logger(cfg, cfg_path, phase='train', rank=-1):
+ # set up logger dir
+ dataset = cfg.DATASET.DATASET
+ dataset = dataset.replace(':', '_')
+ model = cfg.MODEL.NAME
+ cfg_path = os.path.basename(cfg_path).split('.')[0]
+
+ if rank in [-1, 0]:
+ time_str = time.strftime('%Y-%m-%d-%H-%M')
+ log_file = '{}_{}_{}.log'.format(cfg_path, time_str, phase)
+ # set up tensorboard_log_dir
+ tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
+ (cfg_path + '_' + time_str)
+ final_output_dir = tensorboard_log_dir
+ if not tensorboard_log_dir.exists():
+ print('=> creating {}'.format(tensorboard_log_dir))
+ tensorboard_log_dir.mkdir(parents=True)
+
+ final_log_file = tensorboard_log_dir / log_file
+ head = '%(asctime)-15s %(message)s'
+ logging.basicConfig(filename=str(final_log_file),
+ format=head)
+ logger = logging.getLogger()
+ logger.setLevel(logging.INFO)
+ console = logging.StreamHandler()
+ logging.getLogger('').addHandler(console)
+
+ return logger, str(final_output_dir), str(tensorboard_log_dir)
+ else:
+ return None, None, None
+
+
+def select_device(logger, device='', batch_size=None):
+ # device = 'cpu' or '0' or '0,1,2,3'
+ cpu_request = device.lower() == 'cpu'
+ if device and not cpu_request: # if device requested other than 'cpu'
+ os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
+ assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
+
+ cuda = False if cpu_request else torch.cuda.is_available()
+ if cuda:
+ c = 1024 ** 2 # bytes to MB
+ ng = torch.cuda.device_count()
+ if ng > 1 and batch_size: # check that batch_size is compatible with device_count
+ assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
+ x = [torch.cuda.get_device_properties(i) for i in range(ng)]
+ s = f'Using torch {torch.__version__} '
+ for i in range(0, ng):
+ if i == 1:
+ s = ' ' * len(s)
+ if logger:
+ logger.info("%sCUDA:%g (%s, %dMB)" % (s, i, x[i].name, x[i].total_memory / c))
+ else:
+ logger.info(f'Using torch {torch.__version__} CPU')
+
+ if logger:
+ logger.info('') # skip a line
+ return torch.device('cuda:0' if cuda else 'cpu')
+
+
+def get_optimizer(cfg, model):
+ optimizer = None
+ if cfg.TRAIN.OPTIMIZER == 'sgd':
+ optimizer = optim.SGD(
+ filter(lambda p: p.requires_grad, model.parameters()),
+ lr=cfg.TRAIN.LR0,
+ momentum=cfg.TRAIN.MOMENTUM,
+ weight_decay=cfg.TRAIN.WD,
+ nesterov=cfg.TRAIN.NESTEROV
+ )
+ elif cfg.TRAIN.OPTIMIZER == 'adam':
+ optimizer = optim.Adam(
+ filter(lambda p: p.requires_grad, model.parameters()),
+ #model.parameters(),
+ lr=cfg.TRAIN.LR0,
+ betas=(cfg.TRAIN.MOMENTUM, 0.999)
+ )
+
+ return optimizer
+
+
+def save_checkpoint(epoch, name, model, optimizer, output_dir, filename, is_best=False):
+ model_state = model.module.state_dict() if is_parallel(model) else model.state_dict()
+ checkpoint = {
+ 'epoch': epoch,
+ 'model': name,
+ 'state_dict': model_state,
+ # 'best_state_dict': model.module.state_dict(),
+ # 'perf': perf_indicator,
+ 'optimizer': optimizer.state_dict(),
+ }
+ torch.save(checkpoint, os.path.join(output_dir, filename))
+ if is_best and 'state_dict' in checkpoint:
+ torch.save(checkpoint['best_state_dict'],
+ os.path.join(output_dir, 'model_best.pth'))
+
+
+def initialize_weights(model):
+ for m in model.modules():
+ t = type(m)
+ if t is nn.Conv2d:
+ pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+ elif t is nn.BatchNorm2d:
+ m.eps = 1e-3
+ m.momentum = 0.03
+ elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+ # elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+ m.inplace = True
+
+
+def xyxy2xywh(x):
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
+ y[:, 2] = x[:, 2] - x[:, 0] # width
+ y[:, 3] = x[:, 3] - x[:, 1] # height
+ return y
+
+
+def is_parallel(model):
+ return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+
+
+def time_synchronized():
+ torch.cuda.synchronize() if torch.cuda.is_available() else None
+ return time.time()
+
+
+class DataLoaderX(DataLoader):
+ """prefetch dataloader"""
+ def __iter__(self):
+ return BackgroundGenerator(super().__iter__())
+
+@contextmanager
+def torch_distributed_zero_first(local_rank: int):
+ """
+ Decorator to make all processes in distributed training wait for each local_master to do something.
+ """
+ if local_rank not in [-1, 0]:
+ torch.distributed.barrier()
+ yield
+ if local_rank == 0:
+ torch.distributed.barrier()
diff --git a/pictures/da.png b/pictures/da.png
new file mode 100644
index 0000000000000000000000000000000000000000..37e72f1721f77d4d873efabf3f82280987a1f7b1
Binary files /dev/null and b/pictures/da.png differ
diff --git a/pictures/detect.png b/pictures/detect.png
new file mode 100644
index 0000000000000000000000000000000000000000..152106e2d849bf87ad15d1f8d2b9669f01af026d
Binary files /dev/null and b/pictures/detect.png differ
diff --git a/pictures/input1.gif b/pictures/input1.gif
new file mode 100644
index 0000000000000000000000000000000000000000..b5d6e3e0cb470b61dae012da68cc0ed40310b95f
Binary files /dev/null and b/pictures/input1.gif differ
diff --git a/pictures/input2.gif b/pictures/input2.gif
new file mode 100644
index 0000000000000000000000000000000000000000..b2dc7f96565f4e82c2501878740a72da0bd9b72d
Binary files /dev/null and b/pictures/input2.gif differ
diff --git a/pictures/ll.png b/pictures/ll.png
new file mode 100644
index 0000000000000000000000000000000000000000..be55e8d25d23b3e9cbb73ededb9222a481a4f89e
Binary files /dev/null and b/pictures/ll.png differ
diff --git a/pictures/output1.gif b/pictures/output1.gif
new file mode 100644
index 0000000000000000000000000000000000000000..1a611694c3acb1de5e9c5eaaf7a083f60389ee45
Binary files /dev/null and b/pictures/output1.gif differ
diff --git a/pictures/output2.gif b/pictures/output2.gif
new file mode 100644
index 0000000000000000000000000000000000000000..d322f1db51b5f03fdce8a2948cf1a04681b9f41f
Binary files /dev/null and b/pictures/output2.gif differ
diff --git a/pictures/yolop.png b/pictures/yolop.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a6088452dc74d1681dcca73dfa39a31d881bf71
Binary files /dev/null and b/pictures/yolop.png differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..256d56b893d89472b9dcbda8490b1d8fae9ab443
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,15 @@
+scipy
+tqdm
+yacs
+Cython
+matplotlib>=3.2.2
+numpy>=1.18.5
+opencv-python>=4.1.2
+Pillow
+PyYAML>=5.3
+scipy>=1.4.1
+tensorboardX
+seaborn
+prefetch_generator
+imageio
+scikit-learn
\ No newline at end of file
diff --git a/toolkits/deploy/CMakeLists.txt b/toolkits/deploy/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cb892bd503315560475749cca2a8192b3d7ac13
--- /dev/null
+++ b/toolkits/deploy/CMakeLists.txt
@@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 2.6)
+
+project(mcnet)
+
+add_definitions(-std=c++11)
+
+option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Release)
+
+
+find_package(ZED 3 REQUIRED)
+find_package(CUDA ${ZED_CUDA_VERSION} EXACT REQUIRED)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+
+# cuda
+include_directories(/usr/local/cuda-10.2/include)
+link_directories(/usr/local/cuda-10.2/lib64)
+# tensorrt
+include_directories(/usr/include/aarch64-linux-gnu/)
+link_directories(/usr/lib/aarch64-linux-gnu/)
+# zed
+include_directories(/usr/local/zed/include)
+link_directories(/usr/local/zed/lib)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
+
+set(ZED_LIBS ${ZED_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUDART_LIBRARY})
+
+coda_add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu)
+target_link_libraries(myplugins nvinfer cudart)
+
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+add_executable(mcnet ${PROJECT_SOURCE_DIR}/main.cpp)
+target_link_libraries(mcnet nvinfer)
+target_link_libraries(mcnet ${ZED_LIBS})
+target_link_libraries(mcnet cudart)
+target_link_libraries(mcnet myplugins)
+target_link_libraries(mcnet ${OpenCV_LIBS})
+
+add_definitions(-O3 -pthread)
+
diff --git a/toolkits/deploy/common.hpp b/toolkits/deploy/common.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..78b75e36aeb67aa1685d7336e4f226a8a9ae4314
--- /dev/null
+++ b/toolkits/deploy/common.hpp
@@ -0,0 +1,359 @@
+#ifndef YOLOV5_COMMON_H_
+#define YOLOV5_COMMON_H_
+
+#include
+#include