oliverlibaw princeml commited on
Commit
7fafb91
·
0 Parent(s):

Duplicate from princeml/object_detection_using_yolov8

Browse files

Co-authored-by: Prince Kumar <[email protected]>

Files changed (10) hide show
  1. xml_to_txt.py +42 -0
  2. .gitattributes +34 -0
  3. README.md +13 -0
  4. app.py +43 -0
  5. best.pt +3 -0
  6. coco_classes.txt +80 -0
  7. config.py +17 -0
  8. models.py +530 -0
  9. requirements.txt +97 -0
  10. utils.py +471 -0
xml_to_txt.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xml.etree.ElementTree as ET
2
+ import os
3
+ from glob import glob
4
+
5
+ XML_PATH = './dataset/xml'
6
+ CLASSES_PATH = './class_names/classes.txt'
7
+ TXT_PATH = './dataset/txt/anno.txt'
8
+
9
+
10
+ '''loads the classes'''
11
+ def get_classes(classes_path):
12
+ with open(classes_path) as f:
13
+ class_names = f.readlines()
14
+ class_names = [c.strip() for c in class_names]
15
+ return class_names
16
+
17
+
18
+ classes = get_classes(CLASSES_PATH)
19
+ assert len(classes) > 0, 'no class names detected!'
20
+ print(f'num classes: {len(classes)}')
21
+
22
+ # output file
23
+ list_file = open(TXT_PATH, 'w')
24
+
25
+ for path in glob(os.path.join(XML_PATH, '*.xml')):
26
+ in_file = open(path)
27
+
28
+ # Parse .xml file
29
+ tree = ET.parse(in_file)
30
+ root = tree.getroot()
31
+ # Write object information to .txt file
32
+ file_name = root.find('filename').text
33
+ print(file_name)
34
+ list_file.write(file_name)
35
+ for obj in root.iter('object'):
36
+ cls = obj.find('name').text
37
+ cls_id = classes.index(cls)
38
+ xmlbox = obj.find('bndbox')
39
+ b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
40
+ list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
41
+ list_file.write('\n')
42
+ list_file.close()
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 📷 Webcam Object Recognition Yolo Coco 🔍 Live Gradio
3
+ emoji: 📷Live
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.16.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: princeml/object_detection_using_yolov8
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import cv2
3
+ import numpy as np
4
+ from glob import glob
5
+ # from models import Yolov4
6
+ import gradio as gr
7
+ # model = Yolov4(weight_path="best.pt", class_name_path='coco_classes.txt')
8
+
9
+
10
+ from ultralytics import YOLO
11
+
12
+ # Load a model
13
+ model = YOLO("best.pt") # load a custom model
14
+
15
+ # Predict with the model
16
+ # results = model("image.jpg", save = True) # predict on an image
17
+
18
+
19
+ def gradio_wrapper(img):
20
+ global model
21
+ #print(np.shape(img))
22
+ results = model.predict(img) # predict on an image
23
+ try:
24
+ if max(results[0].boxes.cls) == 0:
25
+ text = "Man"
26
+ if max(results[0].boxes.cls) == 1:
27
+ text = "Women"
28
+ except:
29
+ pass
30
+
31
+ return cv2.putText(img, text,(00, 185), cv2.FONT_HERSHEY_SIMPLEX, 1,
32
+ (0, 0, 255), 2, cv2.LINE_AA, False)
33
+ # return results
34
+
35
+ demo = gr.Interface(
36
+ gradio_wrapper,
37
+ #gr.Image(source="webcam", streaming=True, flip=True),
38
+ gr.Image(source="webcam", streaming=True),
39
+ "image",
40
+ live=True
41
+ )
42
+
43
+ demo.launch()
best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ffa274de93f7a9dd047ad3c723346ff4f16e21e260cab47d7141367ca259b9
3
+ size 6211256
coco_classes.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorbike
5
+ aeroplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ sofa
59
+ pottedplant
60
+ bed
61
+ diningtable
62
+ toilet
63
+ tvmonitor
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ yolo_config = {
2
+ # Basic
3
+ 'img_size': (416, 416, 3),
4
+ 'anchors': [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
5
+ 'strides': [8, 16, 32],
6
+ 'xyscale': [1.2, 1.1, 1.05],
7
+
8
+ # Training
9
+ 'iou_loss_thresh': 0.5,
10
+ 'batch_size': 8,
11
+ 'num_gpu': 1, # 2,
12
+
13
+ # Inference
14
+ 'max_boxes': 100,
15
+ 'iou_threshold': 0.413,
16
+ 'score_threshold': 0.3,
17
+ }
models.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import os
4
+ import json
5
+ from tqdm import tqdm
6
+ from glob import glob
7
+ import matplotlib.pyplot as plt
8
+ import tensorflow as tf
9
+ from tensorflow.keras import layers, models, optimizers
10
+
11
+ from custom_layers import yolov4_neck, yolov4_head, nms
12
+ from utils import load_weights, get_detection_data, draw_bbox, voc_ap, draw_plot_func, read_txt_to_list
13
+ from config import yolo_config
14
+ from loss import yolo_loss
15
+
16
+
17
+ class Yolov4(object):
18
+ def __init__(self,
19
+ weight_path=None,
20
+ class_name_path='coco_classes.txt',
21
+ config=yolo_config,
22
+ ):
23
+ assert config['img_size'][0] == config['img_size'][1], 'not support yet'
24
+ assert config['img_size'][0] % config['strides'][-1] == 0, 'must be a multiple of last stride'
25
+ self.class_names = [line.strip() for line in open(class_name_path).readlines()]
26
+ self.img_size = yolo_config['img_size']
27
+ self.num_classes = len(self.class_names)
28
+ self.weight_path = weight_path
29
+ self.anchors = np.array(yolo_config['anchors']).reshape((3, 3, 2))
30
+ self.xyscale = yolo_config['xyscale']
31
+ self.strides = yolo_config['strides']
32
+ self.output_sizes = [self.img_size[0] // s for s in self.strides]
33
+ self.class_color = {name: list(np.random.random(size=3)*255) for name in self.class_names}
34
+ # Training
35
+ self.max_boxes = yolo_config['max_boxes']
36
+ self.iou_loss_thresh = yolo_config['iou_loss_thresh']
37
+ self.config = yolo_config
38
+ assert self.num_classes > 0, 'no classes detected!'
39
+
40
+ tf.keras.backend.clear_session()
41
+ if yolo_config['num_gpu'] > 1:
42
+ mirrored_strategy = tf.distribute.MirroredStrategy()
43
+ with mirrored_strategy.scope():
44
+ self.build_model(load_pretrained=True if self.weight_path else False)
45
+ else:
46
+ self.build_model(load_pretrained=True if self.weight_path else False)
47
+
48
+ def build_model(self, load_pretrained=True):
49
+ # core yolo model
50
+ input_layer = layers.Input(self.img_size)
51
+ yolov4_output = yolov4_neck(input_layer, self.num_classes)
52
+ self.yolo_model = models.Model(input_layer, yolov4_output)
53
+
54
+ # Build training model
55
+ y_true = [
56
+ layers.Input(name='input_2', shape=(52, 52, 3, (self.num_classes + 5))), # label small boxes
57
+ layers.Input(name='input_3', shape=(26, 26, 3, (self.num_classes + 5))), # label medium boxes
58
+ layers.Input(name='input_4', shape=(13, 13, 3, (self.num_classes + 5))), # label large boxes
59
+ layers.Input(name='input_5', shape=(self.max_boxes, 4)), # true bboxes
60
+ ]
61
+ loss_list = tf.keras.layers.Lambda(yolo_loss, name='yolo_loss',
62
+ arguments={'num_classes': self.num_classes,
63
+ 'iou_loss_thresh': self.iou_loss_thresh,
64
+ 'anchors': self.anchors})([*self.yolo_model.output, *y_true])
65
+ self.training_model = models.Model([self.yolo_model.input, *y_true], loss_list)
66
+
67
+ # Build inference model
68
+ yolov4_output = yolov4_head(yolov4_output, self.num_classes, self.anchors, self.xyscale)
69
+ # output: [boxes, scores, classes, valid_detections]
70
+ self.inference_model = models.Model(input_layer,
71
+ nms(yolov4_output, self.img_size, self.num_classes,
72
+ iou_threshold=self.config['iou_threshold'],
73
+ score_threshold=self.config['score_threshold']))
74
+
75
+ if load_pretrained and self.weight_path and self.weight_path.endswith('.weights'):
76
+ if self.weight_path.endswith('.weights'):
77
+ load_weights(self.yolo_model, self.weight_path)
78
+ print(f'load from {self.weight_path}')
79
+ elif self.weight_path.endswith('.h5'):
80
+ self.training_model.load_weights(self.weight_path)
81
+ print(f'load from {self.weight_path}')
82
+
83
+ self.training_model.compile(optimizer=optimizers.Adam(lr=1e-3),
84
+ loss={'yolo_loss': lambda y_true, y_pred: y_pred})
85
+
86
+ def load_model(self, path):
87
+ self.yolo_model = models.load_model(path, compile=False)
88
+ yolov4_output = yolov4_head(self.yolo_model.output, self.num_classes, self.anchors, self.xyscale)
89
+ self.inference_model = models.Model(self.yolo_model.input,
90
+ nms(yolov4_output, self.img_size, self.num_classes)) # [boxes, scores, classes, valid_detections]
91
+
92
+ def save_model(self, path):
93
+ self.yolo_model.save(path)
94
+
95
+ def preprocess_img(self, img):
96
+ img = cv2.resize(img, self.img_size[:2])
97
+ img = img / 255.
98
+ return img
99
+
100
+ def fit(self, train_data_gen, epochs, val_data_gen=None, initial_epoch=0, callbacks=None):
101
+ self.training_model.fit(train_data_gen,
102
+ steps_per_epoch=len(train_data_gen),
103
+ validation_data=val_data_gen,
104
+ validation_steps=len(val_data_gen),
105
+ epochs=epochs,
106
+ callbacks=callbacks,
107
+ initial_epoch=initial_epoch)
108
+ # raw_img: RGB
109
+ def predict_img(self, raw_img, random_color=True, plot_img=True, figsize=(10, 10), show_text=True, return_output=True):
110
+ print('img shape: ', raw_img.shape)
111
+ img = self.preprocess_img(raw_img)
112
+ imgs = np.expand_dims(img, axis=0)
113
+ pred_output = self.inference_model.predict(imgs)
114
+ detections = get_detection_data(img=raw_img,
115
+ model_outputs=pred_output,
116
+ class_names=self.class_names)
117
+
118
+ output_img = draw_bbox(raw_img, detections, cmap=self.class_color, random_color=random_color, figsize=figsize,
119
+ show_text=show_text, show_img=False)
120
+ if return_output:
121
+ return output_img, detections
122
+ else:
123
+ return detections
124
+
125
+ def predict(self, img_path, random_color=True, plot_img=True, figsize=(10, 10), show_text=True):
126
+ raw_img = img_path
127
+ return self.predict_img(raw_img, random_color, plot_img, figsize, show_text)
128
+
129
+ def export_gt(self, annotation_path, gt_folder_path):
130
+ with open(annotation_path) as file:
131
+ for line in file:
132
+ line = line.split(' ')
133
+ filename = line[0].split(os.sep)[-1].split('.')[0]
134
+ objs = line[1:]
135
+ # export txt file
136
+ with open(os.path.join(gt_folder_path, filename + '.txt'), 'w') as output_file:
137
+ for obj in objs:
138
+ x_min, y_min, x_max, y_max, class_id = [float(o) for o in obj.strip().split(',')]
139
+ output_file.write(f'{self.class_names[int(class_id)]} {x_min} {y_min} {x_max} {y_max}\n')
140
+
141
+ def export_prediction(self, annotation_path, pred_folder_path, img_folder_path, bs=2):
142
+ with open(annotation_path) as file:
143
+ img_paths = [os.path.join(img_folder_path, line.split(' ')[0].split(os.sep)[-1]) for line in file]
144
+ # print(img_paths[:20])
145
+ for batch_idx in tqdm(range(0, len(img_paths), bs)):
146
+ # print(len(img_paths), batch_idx, batch_idx*bs, (batch_idx+1)*bs)
147
+ paths = img_paths[batch_idx:batch_idx+bs]
148
+ # print(paths)
149
+ # read and process img
150
+ imgs = np.zeros((len(paths), *self.img_size))
151
+ raw_img_shapes = []
152
+ for j, path in enumerate(paths):
153
+ img = cv2.imread(path)
154
+ raw_img_shapes.append(img.shape)
155
+ img = self.preprocess_img(img)
156
+ imgs[j] = img
157
+
158
+ # process batch output
159
+ b_boxes, b_scores, b_classes, b_valid_detections = self.inference_model.predict(imgs)
160
+ for k in range(len(paths)):
161
+ num_boxes = b_valid_detections[k]
162
+ raw_img_shape = raw_img_shapes[k]
163
+ boxes = b_boxes[k, :num_boxes]
164
+ classes = b_classes[k, :num_boxes]
165
+ scores = b_scores[k, :num_boxes]
166
+ # print(raw_img_shape)
167
+ boxes[:, [0, 2]] = (boxes[:, [0, 2]] * raw_img_shape[1]) # w
168
+ boxes[:, [1, 3]] = (boxes[:, [1, 3]] * raw_img_shape[0]) # h
169
+ cls_names = [self.class_names[int(c)] for c in classes]
170
+ # print(raw_img_shape, boxes.astype(int), cls_names, scores)
171
+
172
+ img_path = paths[k]
173
+ filename = img_path.split(os.sep)[-1].split('.')[0]
174
+ # print(filename)
175
+ output_path = os.path.join(pred_folder_path, filename+'.txt')
176
+ with open(output_path, 'w') as pred_file:
177
+ for box_idx in range(num_boxes):
178
+ b = boxes[box_idx]
179
+ pred_file.write(f'{cls_names[box_idx]} {scores[box_idx]} {b[0]} {b[1]} {b[2]} {b[3]}\n')
180
+
181
+
182
+ def eval_map(self, gt_folder_path, pred_folder_path, temp_json_folder_path, output_files_path):
183
+ """Process Gt"""
184
+ ground_truth_files_list = glob(gt_folder_path + '/*.txt')
185
+ assert len(ground_truth_files_list) > 0, 'no ground truth file'
186
+ ground_truth_files_list.sort()
187
+ # dictionary with counter per class
188
+ gt_counter_per_class = {}
189
+ counter_images_per_class = {}
190
+
191
+ gt_files = []
192
+ for txt_file in ground_truth_files_list:
193
+ file_id = txt_file.split(".txt", 1)[0]
194
+ file_id = os.path.basename(os.path.normpath(file_id))
195
+ # check if there is a correspondent detection-results file
196
+ temp_path = os.path.join(pred_folder_path, (file_id + ".txt"))
197
+ assert os.path.exists(temp_path), "Error. File not found: {}\n".format(temp_path)
198
+ lines_list = read_txt_to_list(txt_file)
199
+ # create ground-truth dictionary
200
+ bounding_boxes = []
201
+ is_difficult = False
202
+ already_seen_classes = []
203
+ for line in lines_list:
204
+ class_name, left, top, right, bottom = line.split()
205
+ # check if class is in the ignore list, if yes skip
206
+ bbox = left + " " + top + " " + right + " " + bottom
207
+ bounding_boxes.append({"class_name": class_name, "bbox": bbox, "used": False})
208
+ # count that object
209
+ if class_name in gt_counter_per_class:
210
+ gt_counter_per_class[class_name] += 1
211
+ else:
212
+ # if class didn't exist yet
213
+ gt_counter_per_class[class_name] = 1
214
+
215
+ if class_name not in already_seen_classes:
216
+ if class_name in counter_images_per_class:
217
+ counter_images_per_class[class_name] += 1
218
+ else:
219
+ # if class didn't exist yet
220
+ counter_images_per_class[class_name] = 1
221
+ already_seen_classes.append(class_name)
222
+
223
+ # dump bounding_boxes into a ".json" file
224
+ new_temp_file = os.path.join(temp_json_folder_path, file_id+"_ground_truth.json") #TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
225
+ gt_files.append(new_temp_file)
226
+ with open(new_temp_file, 'w') as outfile:
227
+ json.dump(bounding_boxes, outfile)
228
+
229
+ gt_classes = list(gt_counter_per_class.keys())
230
+ # let's sort the classes alphabetically
231
+ gt_classes = sorted(gt_classes)
232
+ n_classes = len(gt_classes)
233
+ print(gt_classes, gt_counter_per_class)
234
+
235
+ """Process prediction"""
236
+
237
+ dr_files_list = sorted(glob(os.path.join(pred_folder_path, '*.txt')))
238
+
239
+ for class_index, class_name in enumerate(gt_classes):
240
+ bounding_boxes = []
241
+ for txt_file in dr_files_list:
242
+ # the first time it checks if all the corresponding ground-truth files exist
243
+ file_id = txt_file.split(".txt", 1)[0]
244
+ file_id = os.path.basename(os.path.normpath(file_id))
245
+ temp_path = os.path.join(gt_folder_path, (file_id + ".txt"))
246
+ if class_index == 0:
247
+ if not os.path.exists(temp_path):
248
+ error_msg = f"Error. File not found: {temp_path}\n"
249
+ print(error_msg)
250
+ lines = read_txt_to_list(txt_file)
251
+ for line in lines:
252
+ try:
253
+ tmp_class_name, confidence, left, top, right, bottom = line.split()
254
+ except ValueError:
255
+ error_msg = f"""Error: File {txt_file} in the wrong format.\n
256
+ Expected: <class_name> <confidence> <left> <top> <right> <bottom>\n
257
+ Received: {line} \n"""
258
+ print(error_msg)
259
+ if tmp_class_name == class_name:
260
+ # print("match")
261
+ bbox = left + " " + top + " " + right + " " + bottom
262
+ bounding_boxes.append({"confidence": confidence, "file_id": file_id, "bbox": bbox})
263
+ # sort detection-results by decreasing confidence
264
+ bounding_boxes.sort(key=lambda x: float(x['confidence']), reverse=True)
265
+ with open(temp_json_folder_path + "/" + class_name + "_dr.json", 'w') as outfile:
266
+ json.dump(bounding_boxes, outfile)
267
+
268
+ """
269
+ Calculate the AP for each class
270
+ """
271
+ sum_AP = 0.0
272
+ ap_dictionary = {}
273
+ # open file to store the output
274
+ with open(output_files_path + "/output.txt", 'w') as output_file:
275
+ output_file.write("# AP and precision/recall per class\n")
276
+ count_true_positives = {}
277
+ for class_index, class_name in enumerate(gt_classes):
278
+ count_true_positives[class_name] = 0
279
+ """
280
+ Load detection-results of that class
281
+ """
282
+ dr_file = temp_json_folder_path + "/" + class_name + "_dr.json"
283
+ dr_data = json.load(open(dr_file))
284
+
285
+ """
286
+ Assign detection-results to ground-truth objects
287
+ """
288
+ nd = len(dr_data)
289
+ tp = [0] * nd # creates an array of zeros of size nd
290
+ fp = [0] * nd
291
+ for idx, detection in enumerate(dr_data):
292
+ file_id = detection["file_id"]
293
+ gt_file = temp_json_folder_path + "/" + file_id + "_ground_truth.json"
294
+ ground_truth_data = json.load(open(gt_file))
295
+ ovmax = -1
296
+ gt_match = -1
297
+ # load detected object bounding-box
298
+ bb = [float(x) for x in detection["bbox"].split()]
299
+ for obj in ground_truth_data:
300
+ # look for a class_name match
301
+ if obj["class_name"] == class_name:
302
+ bbgt = [float(x) for x in obj["bbox"].split()]
303
+ bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])]
304
+ iw = bi[2] - bi[0] + 1
305
+ ih = bi[3] - bi[1] + 1
306
+ if iw > 0 and ih > 0:
307
+ # compute overlap (IoU) = area of intersection / area of union
308
+ ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + \
309
+ (bbgt[2] - bbgt[0]+ 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
310
+ ov = iw * ih / ua
311
+ if ov > ovmax:
312
+ ovmax = ov
313
+ gt_match = obj
314
+
315
+ min_overlap = 0.5
316
+ if ovmax >= min_overlap:
317
+ # if "difficult" not in gt_match:
318
+ if not bool(gt_match["used"]):
319
+ # true positive
320
+ tp[idx] = 1
321
+ gt_match["used"] = True
322
+ count_true_positives[class_name] += 1
323
+ # update the ".json" file
324
+ with open(gt_file, 'w') as f:
325
+ f.write(json.dumps(ground_truth_data))
326
+ else:
327
+ # false positive (multiple detection)
328
+ fp[idx] = 1
329
+ else:
330
+ fp[idx] = 1
331
+
332
+
333
+ # compute precision/recall
334
+ cumsum = 0
335
+ for idx, val in enumerate(fp):
336
+ fp[idx] += cumsum
337
+ cumsum += val
338
+ print('fp ', cumsum)
339
+ cumsum = 0
340
+ for idx, val in enumerate(tp):
341
+ tp[idx] += cumsum
342
+ cumsum += val
343
+ print('tp ', cumsum)
344
+ rec = tp[:]
345
+ for idx, val in enumerate(tp):
346
+ rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
347
+ print('recall ', cumsum)
348
+ prec = tp[:]
349
+ for idx, val in enumerate(tp):
350
+ prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
351
+ print('prec ', cumsum)
352
+
353
+ ap, mrec, mprec = voc_ap(rec[:], prec[:])
354
+ sum_AP += ap
355
+ text = "{0:.2f}%".format(
356
+ ap * 100) + " = " + class_name + " AP " # class_name + " AP = {0:.2f}%".format(ap*100)
357
+
358
+ print(text)
359
+ ap_dictionary[class_name] = ap
360
+
361
+ n_images = counter_images_per_class[class_name]
362
+ # lamr, mr, fppi = log_average_miss_rate(np.array(prec), np.array(rec), n_images)
363
+ # lamr_dictionary[class_name] = lamr
364
+
365
+ """
366
+ Draw plot
367
+ """
368
+ if True:
369
+ plt.plot(rec, prec, '-o')
370
+ # add a new penultimate point to the list (mrec[-2], 0.0)
371
+ # since the last line segment (and respective area) do not affect the AP value
372
+ area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
373
+ area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
374
+ plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
375
+ # set window title
376
+ fig = plt.gcf() # gcf - get current figure
377
+ fig.canvas.set_window_title('AP ' + class_name)
378
+ # set plot title
379
+ plt.title('class: ' + text)
380
+ # plt.suptitle('This is a somewhat long figure title', fontsize=16)
381
+ # set axis titles
382
+ plt.xlabel('Recall')
383
+ plt.ylabel('Precision')
384
+ # optional - set axes
385
+ axes = plt.gca() # gca - get current axes
386
+ axes.set_xlim([0.0, 1.0])
387
+ axes.set_ylim([0.0, 1.05]) # .05 to give some extra space
388
+ # Alternative option -> wait for button to be pressed
389
+ # while not plt.waitforbuttonpress(): pass # wait for key display
390
+ # Alternative option -> normal display
391
+ plt.show()
392
+ # save the plot
393
+ # fig.savefig(output_files_path + "/classes/" + class_name + ".png")
394
+ # plt.cla() # clear axes for next plot
395
+
396
+ # if show_animation:
397
+ # cv2.destroyAllWindows()
398
+
399
+ output_file.write("\n# mAP of all classes\n")
400
+ mAP = sum_AP / n_classes
401
+ text = "mAP = {0:.2f}%".format(mAP * 100)
402
+ output_file.write(text + "\n")
403
+ print(text)
404
+
405
+ """
406
+ Count total of detection-results
407
+ """
408
+ # iterate through all the files
409
+ det_counter_per_class = {}
410
+ for txt_file in dr_files_list:
411
+ # get lines to list
412
+ lines_list = read_txt_to_list(txt_file)
413
+ for line in lines_list:
414
+ class_name = line.split()[0]
415
+ # check if class is in the ignore list, if yes skip
416
+ # if class_name in args.ignore:
417
+ # continue
418
+ # count that object
419
+ if class_name in det_counter_per_class:
420
+ det_counter_per_class[class_name] += 1
421
+ else:
422
+ # if class didn't exist yet
423
+ det_counter_per_class[class_name] = 1
424
+ # print(det_counter_per_class)
425
+ dr_classes = list(det_counter_per_class.keys())
426
+
427
+ """
428
+ Plot the total number of occurences of each class in the ground-truth
429
+ """
430
+ if True:
431
+ window_title = "ground-truth-info"
432
+ plot_title = "ground-truth\n"
433
+ plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
434
+ x_label = "Number of objects per class"
435
+ output_path = output_files_path + "/ground-truth-info.png"
436
+ to_show = False
437
+ plot_color = 'forestgreen'
438
+ draw_plot_func(
439
+ gt_counter_per_class,
440
+ n_classes,
441
+ window_title,
442
+ plot_title,
443
+ x_label,
444
+ output_path,
445
+ to_show,
446
+ plot_color,
447
+ '',
448
+ )
449
+
450
+ """
451
+ Finish counting true positives
452
+ """
453
+ for class_name in dr_classes:
454
+ # if class exists in detection-result but not in ground-truth then there are no true positives in that class
455
+ if class_name not in gt_classes:
456
+ count_true_positives[class_name] = 0
457
+ # print(count_true_positives)
458
+
459
+ """
460
+ Plot the total number of occurences of each class in the "detection-results" folder
461
+ """
462
+ if True:
463
+ window_title = "detection-results-info"
464
+ # Plot title
465
+ plot_title = "detection-results\n"
466
+ plot_title += "(" + str(len(dr_files_list)) + " files and "
467
+ count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
468
+ plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
469
+ # end Plot title
470
+ x_label = "Number of objects per class"
471
+ output_path = output_files_path + "/detection-results-info.png"
472
+ to_show = False
473
+ plot_color = 'forestgreen'
474
+ true_p_bar = count_true_positives
475
+ draw_plot_func(
476
+ det_counter_per_class,
477
+ len(det_counter_per_class),
478
+ window_title,
479
+ plot_title,
480
+ x_label,
481
+ output_path,
482
+ to_show,
483
+ plot_color,
484
+ true_p_bar
485
+ )
486
+
487
+ """
488
+ Draw mAP plot (Show AP's of all classes in decreasing order)
489
+ """
490
+ if True:
491
+ window_title = "mAP"
492
+ plot_title = "mAP = {0:.2f}%".format(mAP * 100)
493
+ x_label = "Average Precision"
494
+ output_path = output_files_path + "/mAP.png"
495
+ to_show = True
496
+ plot_color = 'royalblue'
497
+ draw_plot_func(
498
+ ap_dictionary,
499
+ n_classes,
500
+ window_title,
501
+ plot_title,
502
+ x_label,
503
+ output_path,
504
+ to_show,
505
+ plot_color,
506
+ ""
507
+ )
508
+
509
+ def predict_raw(self, img_path):
510
+ raw_img = cv2.imread(img_path)
511
+ print('img shape: ', raw_img.shape)
512
+ img = self.preprocess_img(raw_img)
513
+ imgs = np.expand_dims(img, axis=0)
514
+ return self.yolo_model.predict(imgs)
515
+
516
+ def predict_nonms(self, img_path, iou_threshold=0.413, score_threshold=0.1):
517
+ raw_img = cv2.imread(img_path)
518
+ print('img shape: ', raw_img.shape)
519
+ img = self.preprocess_img(raw_img)
520
+ imgs = np.expand_dims(img, axis=0)
521
+ yolov4_output = self.yolo_model.predict(imgs)
522
+ output = yolov4_head(yolov4_output, self.num_classes, self.anchors, self.xyscale)
523
+ pred_output = nms(output, self.img_size, self.num_classes, iou_threshold, score_threshold)
524
+ pred_output = [p.numpy() for p in pred_output]
525
+ detections = get_detection_data(img=raw_img,
526
+ model_outputs=pred_output,
527
+ class_names=self.class_names)
528
+ draw_bbox(raw_img, detections, cmap=self.class_color, random_color=True)
529
+ return detections
530
+
requirements.txt ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.3.0
2
+ aiohttp==3.8.3
3
+ aiosignal==1.3.1
4
+ anyio==3.6.2
5
+ astunparse==1.6.3
6
+ async-timeout==4.0.2
7
+ attrs==22.1.0
8
+ bcrypt==4.0.1
9
+ cachetools==5.2.0
10
+ certifi==2022.9.24
11
+ cffi==1.15.1
12
+ charset-normalizer==2.1.1
13
+ click==8.1.3
14
+ colorama==0.4.6
15
+ contourpy==1.0.6
16
+ cryptography==38.0.3
17
+ cycler==0.11.0
18
+ fastapi==0.87.0
19
+ ffmpy==0.3.0
20
+ flatbuffers==22.10.26
21
+ fonttools==4.38.0
22
+ frozenlist==1.3.3
23
+ fsspec==2022.11.0
24
+ gast==0.4.0
25
+ google-auth==2.14.1
26
+ google-auth-oauthlib==0.4.6
27
+ google-pasta==0.2.0
28
+ gradio==3.10.0
29
+ grpcio==1.50.0
30
+ h11==0.12.0
31
+ h5py==3.7.0
32
+ httpcore==0.15.0
33
+ httpx==0.23.1
34
+ idna==3.4
35
+ importlib-metadata==5.0.0
36
+ Jinja2==3.1.2
37
+ joblib==1.2.0
38
+ keras==2.11.0
39
+ kiwisolver==1.4.4
40
+ libclang==14.0.6
41
+ linkify-it-py==1.0.3
42
+ Markdown==3.4.1
43
+ markdown-it-py==2.1.0
44
+ MarkupSafe==2.1.1
45
+ matplotlib==3.6.2
46
+ mdit-py-plugins==0.3.1
47
+ mdurl==0.1.2
48
+ multidict==6.0.2
49
+ numpy==1.23.4
50
+ oauthlib==3.2.2
51
+ opencv-python==4.6.0.66
52
+ opt-einsum==3.3.0
53
+ orjson==3.8.1
54
+ packaging==21.3
55
+ pandas==1.5.1
56
+ paramiko==2.12.0
57
+ Pillow==9.3.0
58
+ protobuf==3.19.6
59
+ pyasn1==0.4.8
60
+ pyasn1-modules==0.2.8
61
+ pycparser==2.21
62
+ pycryptodome==3.15.0
63
+ pydantic==1.10.2
64
+ pydub==0.25.1
65
+ PyNaCl==1.5.0
66
+ pyparsing==3.0.9
67
+ python-dateutil==2.8.2
68
+ python-multipart==0.0.5
69
+ pytz==2022.6
70
+ PyYAML==6.0
71
+ requests==2.28.1
72
+ requests-oauthlib==1.3.1
73
+ rfc3986==1.5.0
74
+ rsa==4.9
75
+ scikit-learn==1.1.3
76
+ scipy==1.9.3
77
+ six==1.16.0
78
+ sniffio==1.3.0
79
+ starlette==0.21.0
80
+ tensorboard==2.11.0
81
+ tensorboard-data-server==0.6.1
82
+ tensorboard-plugin-wit==1.8.1
83
+ tensorflow==2.11.0
84
+ tensorflow-estimator==2.11.0
85
+ termcolor==2.1.0
86
+ threadpoolctl==3.1.0
87
+ tqdm==4.64.1
88
+ typing_extensions==4.4.0
89
+ uc-micro-py==1.0.1
90
+ urllib3==1.26.12
91
+ uvicorn==0.19.0
92
+ websockets==10.4
93
+ Werkzeug==2.2.2
94
+ wrapt==1.14.1
95
+ yarl==1.8.1
96
+ zipp==3.10.0
97
+ ultralytics
utils.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import pandas as pd
4
+ import operator
5
+ import matplotlib.pyplot as plt
6
+ import os
7
+ from sklearn.model_selection import train_test_split
8
+ from tensorflow.keras.utils import Sequence
9
+ from config import yolo_config
10
+
11
+
12
+ def load_weights(model, weights_file_path):
13
+ conv_layer_size = 110
14
+ conv_output_idxs = [93, 101, 109]
15
+ with open(weights_file_path, 'rb') as file:
16
+ major, minor, revision, seen, _ = np.fromfile(file, dtype=np.int32, count=5)
17
+
18
+ bn_idx = 0
19
+ for conv_idx in range(conv_layer_size):
20
+ conv_layer_name = f'conv2d_{conv_idx}' if conv_idx > 0 else 'conv2d'
21
+ bn_layer_name = f'batch_normalization_{bn_idx}' if bn_idx > 0 else 'batch_normalization'
22
+
23
+ conv_layer = model.get_layer(conv_layer_name)
24
+ filters = conv_layer.filters
25
+ kernel_size = conv_layer.kernel_size[0]
26
+ input_dims = conv_layer.input_shape[-1]
27
+
28
+ if conv_idx not in conv_output_idxs:
29
+ # darknet bn layer weights: [beta, gamma, mean, variance]
30
+ bn_weights = np.fromfile(file, dtype=np.float32, count=4 * filters)
31
+ # tf bn layer weights: [gamma, beta, mean, variance]
32
+ bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
33
+ bn_layer = model.get_layer(bn_layer_name)
34
+ bn_idx += 1
35
+ else:
36
+ conv_bias = np.fromfile(file, dtype=np.float32, count=filters)
37
+
38
+ # darknet shape: (out_dim, input_dims, height, width)
39
+ # tf shape: (height, width, input_dims, out_dim)
40
+ conv_shape = (filters, input_dims, kernel_size, kernel_size)
41
+ conv_weights = np.fromfile(file, dtype=np.float32, count=np.product(conv_shape))
42
+ conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
43
+
44
+ if conv_idx not in conv_output_idxs:
45
+ conv_layer.set_weights([conv_weights])
46
+ bn_layer.set_weights(bn_weights)
47
+ else:
48
+ conv_layer.set_weights([conv_weights, conv_bias])
49
+
50
+ if len(file.read()) == 0:
51
+ print('all weights read')
52
+ else:
53
+ print(f'failed to read all weights, # of unread weights: {len(file.read())}')
54
+
55
+
56
+ def get_detection_data(img, model_outputs, class_names):
57
+ """
58
+ :param img: target raw image
59
+ :param model_outputs: outputs from inference_model
60
+ :param class_names: list of object class names
61
+ :return:
62
+ """
63
+
64
+ num_bboxes = model_outputs[-1][0]
65
+ boxes, scores, classes = [output[0][:num_bboxes] for output in model_outputs[:-1]]
66
+
67
+ h, w = img.shape[:2]
68
+ df = pd.DataFrame(boxes, columns=['x1', 'y1', 'x2', 'y2'])
69
+ df[['x1', 'x2']] = (df[['x1', 'x2']] * w).astype('int64')
70
+ df[['y1', 'y2']] = (df[['y1', 'y2']] * h).astype('int64')
71
+ df['class_name'] = np.array(class_names)[classes.astype('int64')]
72
+ df['score'] = scores
73
+ df['w'] = df['x2'] - df['x1']
74
+ df['h'] = df['y2'] - df['y1']
75
+
76
+ print(f'# of bboxes: {num_bboxes}')
77
+ return df
78
+
79
+ def read_annotation_lines(annotation_path, test_size=None, random_seed=5566):
80
+ with open(annotation_path) as f:
81
+ lines = f.readlines()
82
+ if test_size:
83
+ return train_test_split(lines, test_size=test_size, random_state=random_seed)
84
+ else:
85
+ return lines
86
+
87
+ def draw_bbox(img, detections, cmap, random_color=True, figsize=(10, 10), show_img=True, show_text=True):
88
+ """
89
+ Draw bounding boxes on the img.
90
+ :param img: BGR img.
91
+ :param detections: pandas DataFrame containing detections
92
+ :param random_color: assign random color for each objects
93
+ :param cmap: object colormap
94
+ :param plot_img: if plot img with bboxes
95
+ :return: None
96
+ """
97
+ img = np.array(img)
98
+ scale = max(img.shape[0:2]) / 416
99
+ line_width = int(2 * scale)
100
+
101
+ for _, row in detections.iterrows():
102
+ x1, y1, x2, y2, cls, score, w, h = row.values
103
+ color = list(np.random.random(size=3) * 255) if random_color else cmap[cls]
104
+ cv2.rectangle(img, (x1, y1), (x2, y2), color, line_width)
105
+ if show_text:
106
+ text = f'{cls} {score:.2f}'
107
+ font = cv2.FONT_HERSHEY_DUPLEX
108
+ font_scale = max(0.3 * scale, 0.3)
109
+ thickness = max(int(1 * scale), 1)
110
+ (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]
111
+ cv2.rectangle(img, (x1 - line_width//2, y1 - text_height), (x1 + text_width, y1), color, cv2.FILLED)
112
+ cv2.putText(img, text, (x1, y1), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
113
+ if show_img:
114
+ plt.figure(figsize=figsize)
115
+ plt.imshow(img)
116
+ plt.show()
117
+ return img
118
+
119
+
120
+ class DataGenerator(Sequence):
121
+ """
122
+ Generates data for Keras
123
+ ref: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
124
+ """
125
+ def __init__(self,
126
+ annotation_lines,
127
+ class_name_path,
128
+ folder_path,
129
+ max_boxes=100,
130
+ shuffle=True):
131
+ self.annotation_lines = annotation_lines
132
+ self.class_name_path = class_name_path
133
+ self.num_classes = len([line.strip() for line in open(class_name_path).readlines()])
134
+ self.num_gpu = yolo_config['num_gpu']
135
+ self.batch_size = yolo_config['batch_size'] * self.num_gpu
136
+ self.target_img_size = yolo_config['img_size']
137
+ self.anchors = np.array(yolo_config['anchors']).reshape((9, 2))
138
+ self.shuffle = shuffle
139
+ self.indexes = np.arange(len(self.annotation_lines))
140
+ self.folder_path = folder_path
141
+ self.max_boxes = max_boxes
142
+ self.on_epoch_end()
143
+
144
+ def __len__(self):
145
+ 'number of batches per epoch'
146
+ return int(np.ceil(len(self.annotation_lines) / self.batch_size))
147
+
148
+ def __getitem__(self, index):
149
+ 'Generate one batch of data'
150
+
151
+ # Generate indexes of the batch
152
+ idxs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
153
+
154
+ # Find list of IDs
155
+ lines = [self.annotation_lines[i] for i in idxs]
156
+
157
+ # Generate data
158
+ X, y_tensor, y_bbox = self.__data_generation(lines)
159
+
160
+ return [X, *y_tensor, y_bbox], np.zeros(len(lines))
161
+
162
+ def on_epoch_end(self):
163
+ 'Updates indexes after each epoch'
164
+ if self.shuffle:
165
+ np.random.shuffle(self.indexes)
166
+
167
+ def __data_generation(self, annotation_lines):
168
+ """
169
+ Generates data containing batch_size samples
170
+ :param annotation_lines:
171
+ :return:
172
+ """
173
+
174
+ X = np.empty((len(annotation_lines), *self.target_img_size), dtype=np.float32)
175
+ y_bbox = np.empty((len(annotation_lines), self.max_boxes, 5), dtype=np.float32) # x1y1x2y2
176
+
177
+ for i, line in enumerate(annotation_lines):
178
+ img_data, box_data = self.get_data(line)
179
+ X[i] = img_data
180
+ y_bbox[i] = box_data
181
+
182
+ y_tensor, y_true_boxes_xywh = preprocess_true_boxes(y_bbox, self.target_img_size[:2], self.anchors, self.num_classes)
183
+
184
+ return X, y_tensor, y_true_boxes_xywh
185
+
186
+ def get_data(self, annotation_line):
187
+ line = annotation_line.split()
188
+ img_path = line[0]
189
+ img = cv2.imread(os.path.join(self.folder_path, img_path))[:, :, ::-1]
190
+ ih, iw = img.shape[:2]
191
+ h, w, c = self.target_img_size
192
+ boxes = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]], dtype=np.float32) # x1y1x2y2
193
+ scale_w, scale_h = w / iw, h / ih
194
+ img = cv2.resize(img, (w, h))
195
+ image_data = np.array(img) / 255.
196
+
197
+ # correct boxes coordinates
198
+ box_data = np.zeros((self.max_boxes, 5))
199
+ if len(boxes) > 0:
200
+ np.random.shuffle(boxes)
201
+ boxes = boxes[:self.max_boxes]
202
+ boxes[:, [0, 2]] = boxes[:, [0, 2]] * scale_w # + dx
203
+ boxes[:, [1, 3]] = boxes[:, [1, 3]] * scale_h # + dy
204
+ box_data[:len(boxes)] = boxes
205
+
206
+ return image_data, box_data
207
+
208
+
209
+ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
210
+ '''Preprocess true boxes to training input format
211
+ Parameters
212
+ ----------
213
+ true_boxes: array, shape=(bs, max boxes per img, 5)
214
+ Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
215
+ input_shape: array-like, hw, multiples of 32
216
+ anchors: array, shape=(N, 2), (9, wh)
217
+ num_classes: int
218
+ Returns
219
+ -------
220
+ y_true: list of array, shape like yolo_outputs, xywh are reletive value
221
+ '''
222
+
223
+ num_stages = 3 # default setting for yolo, tiny yolo will be 2
224
+ anchor_mask = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
225
+ bbox_per_grid = 3
226
+ true_boxes = np.array(true_boxes, dtype='float32')
227
+ true_boxes_abs = np.array(true_boxes, dtype='float32')
228
+ input_shape = np.array(input_shape, dtype='int32')
229
+ true_boxes_xy = (true_boxes_abs[..., 0:2] + true_boxes_abs[..., 2:4]) // 2 # (100, 2)
230
+ true_boxes_wh = true_boxes_abs[..., 2:4] - true_boxes_abs[..., 0:2] # (100, 2)
231
+
232
+ # Normalize x,y,w, h, relative to img size -> (0~1)
233
+ true_boxes[..., 0:2] = true_boxes_xy/input_shape[::-1] # xy
234
+ true_boxes[..., 2:4] = true_boxes_wh/input_shape[::-1] # wh
235
+
236
+ bs = true_boxes.shape[0]
237
+ grid_sizes = [input_shape//{0:8, 1:16, 2:32}[stage] for stage in range(num_stages)]
238
+ y_true = [np.zeros((bs,
239
+ grid_sizes[s][0],
240
+ grid_sizes[s][1],
241
+ bbox_per_grid,
242
+ 5+num_classes), dtype='float32')
243
+ for s in range(num_stages)]
244
+ # [(?, 52, 52, 3, 5+num_classes) (?, 26, 26, 3, 5+num_classes) (?, 13, 13, 3, 5+num_classes) ]
245
+ y_true_boxes_xywh = np.concatenate((true_boxes_xy, true_boxes_wh), axis=-1)
246
+ # Expand dim to apply broadcasting.
247
+ anchors = np.expand_dims(anchors, 0) # (1, 9 , 2)
248
+ anchor_maxes = anchors / 2. # (1, 9 , 2)
249
+ anchor_mins = -anchor_maxes # (1, 9 , 2)
250
+ valid_mask = true_boxes_wh[..., 0] > 0 # (1, 100)
251
+
252
+ for batch_idx in range(bs):
253
+ # Discard zero rows.
254
+ wh = true_boxes_wh[batch_idx, valid_mask[batch_idx]] # (# of bbox, 2)
255
+ num_boxes = len(wh)
256
+ if num_boxes == 0: continue
257
+ wh = np.expand_dims(wh, -2) # (# of bbox, 1, 2)
258
+ box_maxes = wh / 2. # (# of bbox, 1, 2)
259
+ box_mins = -box_maxes # (# of bbox, 1, 2)
260
+
261
+ # Compute IoU between each anchors and true boxes for responsibility assignment
262
+ intersect_mins = np.maximum(box_mins, anchor_mins) # (# of bbox, 9, 2)
263
+ intersect_maxes = np.minimum(box_maxes, anchor_maxes)
264
+ intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
265
+ intersect_area = np.prod(intersect_wh, axis=-1) # (9,)
266
+ box_area = wh[..., 0] * wh[..., 1] # (# of bbox, 1)
267
+ anchor_area = anchors[..., 0] * anchors[..., 1] # (1, 9)
268
+ iou = intersect_area / (box_area + anchor_area - intersect_area) # (# of bbox, 9)
269
+
270
+ # Find best anchor for each true box
271
+ best_anchors = np.argmax(iou, axis=-1) # (# of bbox,)
272
+ for box_idx in range(num_boxes):
273
+ best_anchor = best_anchors[box_idx]
274
+ for stage in range(num_stages):
275
+ if best_anchor in anchor_mask[stage]:
276
+ x_offset = true_boxes[batch_idx, box_idx, 0]*grid_sizes[stage][1]
277
+ y_offset = true_boxes[batch_idx, box_idx, 1]*grid_sizes[stage][0]
278
+ # Grid Index
279
+ grid_col = np.floor(x_offset).astype('int32')
280
+ grid_row = np.floor(y_offset).astype('int32')
281
+ anchor_idx = anchor_mask[stage].index(best_anchor)
282
+ class_idx = true_boxes[batch_idx, box_idx, 4].astype('int32')
283
+ # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 0] = x_offset - grid_col # x
284
+ # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 1] = y_offset - grid_row # y
285
+ # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, :4] = true_boxes_abs[batch_idx, box_idx, :4] # abs xywh
286
+ y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, :2] = true_boxes_xy[batch_idx, box_idx, :] # abs xy
287
+ y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 2:4] = true_boxes_wh[batch_idx, box_idx, :] # abs wh
288
+ y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 4] = 1 # confidence
289
+
290
+ y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 5+class_idx] = 1 # one-hot encoding
291
+ # smooth
292
+ # onehot = np.zeros(num_classes, dtype=np.float)
293
+ # onehot[class_idx] = 1.0
294
+ # uniform_distribution = np.full(num_classes, 1.0 / num_classes)
295
+ # delta = 0.01
296
+ # smooth_onehot = onehot * (1 - delta) + delta * uniform_distribution
297
+ # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 5:] = smooth_onehot
298
+
299
+ return y_true, y_true_boxes_xywh
300
+
301
+ """
302
+ Calculate the AP given the recall and precision array
303
+ 1st) We compute a version of the measured precision/recall curve with
304
+ precision monotonically decreasing
305
+ 2nd) We compute the AP as the area under this curve by numerical integration.
306
+ """
307
+ def voc_ap(rec, prec):
308
+ """
309
+ --- Official matlab code VOC2012---
310
+ mrec=[0 ; rec ; 1];
311
+ mpre=[0 ; prec ; 0];
312
+ for i=numel(mpre)-1:-1:1
313
+ mpre(i)=max(mpre(i),mpre(i+1));
314
+ end
315
+ i=find(mrec(2:end)~=mrec(1:end-1))+1;
316
+ ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
317
+ """
318
+ rec.insert(0, 0.0) # insert 0.0 at begining of list
319
+ rec.append(1.0) # insert 1.0 at end of list
320
+ mrec = rec[:]
321
+ prec.insert(0, 0.0) # insert 0.0 at begining of list
322
+ prec.append(0.0) # insert 0.0 at end of list
323
+ mpre = prec[:]
324
+ """
325
+ This part makes the precision monotonically decreasing
326
+ (goes from the end to the beginning)
327
+ matlab: for i=numel(mpre)-1:-1:1
328
+ mpre(i)=max(mpre(i),mpre(i+1));
329
+ """
330
+ # matlab indexes start in 1 but python in 0, so I have to do:
331
+ # range(start=(len(mpre) - 2), end=0, step=-1)
332
+ # also the python function range excludes the end, resulting in:
333
+ # range(start=(len(mpre) - 2), end=-1, step=-1)
334
+ for i in range(len(mpre)-2, -1, -1):
335
+ mpre[i] = max(mpre[i], mpre[i+1])
336
+ """
337
+ This part creates a list of indexes where the recall changes
338
+ matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
339
+ """
340
+ i_list = []
341
+ for i in range(1, len(mrec)):
342
+ if mrec[i] != mrec[i-1]:
343
+ i_list.append(i) # if it was matlab would be i + 1
344
+ """
345
+ The Average Precision (AP) is the area under the curve
346
+ (numerical integration)
347
+ matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
348
+ """
349
+ ap = 0.0
350
+ for i in i_list:
351
+ ap += ((mrec[i]-mrec[i-1])*mpre[i])
352
+ return ap, mrec, mpre
353
+
354
+ """
355
+ Draw plot using Matplotlib
356
+ """
357
+ def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
358
+ # sort the dictionary by decreasing value, into a list of tuples
359
+ sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
360
+ print(sorted_dic_by_value)
361
+ # unpacking the list of tuples into two lists
362
+ sorted_keys, sorted_values = zip(*sorted_dic_by_value)
363
+ #
364
+ if true_p_bar != "":
365
+ """
366
+ Special case to draw in:
367
+ - green -> TP: True Positives (object detected and matches ground-truth)
368
+ - red -> FP: False Positives (object detected but does not match ground-truth)
369
+ - pink -> FN: False Negatives (object not detected but present in the ground-truth)
370
+ """
371
+ fp_sorted = []
372
+ tp_sorted = []
373
+ for key in sorted_keys:
374
+ fp_sorted.append(dictionary[key] - true_p_bar[key])
375
+ tp_sorted.append(true_p_bar[key])
376
+ plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
377
+ plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
378
+ # add legend
379
+ plt.legend(loc='lower right')
380
+ """
381
+ Write number on side of bar
382
+ """
383
+ fig = plt.gcf() # gcf - get current figure
384
+ axes = plt.gca()
385
+ r = fig.canvas.get_renderer()
386
+ for i, val in enumerate(sorted_values):
387
+ fp_val = fp_sorted[i]
388
+ tp_val = tp_sorted[i]
389
+ fp_str_val = " " + str(fp_val)
390
+ tp_str_val = fp_str_val + " " + str(tp_val)
391
+ # trick to paint multicolor with offset:
392
+ # first paint everything and then repaint the first number
393
+ t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
394
+ plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
395
+ if i == (len(sorted_values)-1): # largest bar
396
+ adjust_axes(r, t, fig, axes)
397
+ else:
398
+ plt.barh(range(n_classes), sorted_values, color=plot_color)
399
+ """
400
+ Write number on side of bar
401
+ """
402
+ fig = plt.gcf() # gcf - get current figure
403
+ axes = plt.gca()
404
+ r = fig.canvas.get_renderer()
405
+ for i, val in enumerate(sorted_values):
406
+ str_val = " " + str(val) # add a space before
407
+ if val < 1.0:
408
+ str_val = " {0:.2f}".format(val)
409
+ t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
410
+ # re-set axes to show number inside the figure
411
+ if i == (len(sorted_values)-1): # largest bar
412
+ adjust_axes(r, t, fig, axes)
413
+ # set window title
414
+ fig.canvas.set_window_title(window_title)
415
+ # write classes in y axis
416
+ tick_font_size = 12
417
+ plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
418
+ """
419
+ Re-scale height accordingly
420
+ """
421
+ init_height = fig.get_figheight()
422
+ # comput the matrix height in points and inches
423
+ dpi = fig.dpi
424
+ height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
425
+ height_in = height_pt / dpi
426
+ # compute the required figure height
427
+ top_margin = 0.15 # in percentage of the figure height
428
+ bottom_margin = 0.05 # in percentage of the figure height
429
+ figure_height = height_in / (1 - top_margin - bottom_margin)
430
+ # set new height
431
+ if figure_height > init_height:
432
+ fig.set_figheight(figure_height)
433
+
434
+ # set plot title
435
+ plt.title(plot_title, fontsize=14)
436
+ # set axis titles
437
+ # plt.xlabel('classes')
438
+ plt.xlabel(x_label, fontsize='large')
439
+ # adjust size of window
440
+ fig.tight_layout()
441
+ # save the plot
442
+ fig.savefig(output_path)
443
+ # show image
444
+ # if to_show:
445
+ plt.show()
446
+ # close the plot
447
+ # plt.close()
448
+
449
+ """
450
+ Plot - adjust axes
451
+ """
452
+ def adjust_axes(r, t, fig, axes):
453
+ # get text width for re-scaling
454
+ bb = t.get_window_extent(renderer=r)
455
+ text_width_inches = bb.width / fig.dpi
456
+ # get axis width in inches
457
+ current_fig_width = fig.get_figwidth()
458
+ new_fig_width = current_fig_width + text_width_inches
459
+ propotion = new_fig_width / current_fig_width
460
+ # get axis limit
461
+ x_lim = axes.get_xlim()
462
+ axes.set_xlim([x_lim[0], x_lim[1]*propotion])
463
+
464
+
465
+ def read_txt_to_list(path):
466
+ # open txt file lines to a list
467
+ with open(path) as f:
468
+ content = f.readlines()
469
+ # remove whitespace characters like `\n` at the end of each line
470
+ content = [x.strip() for x in content]
471
+ return content