Spaces:
Runtime error
Runtime error
faceplugin
commited on
Commit
•
5143658
1
Parent(s):
772b8a5
v1.00
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +1 -1
- app.py +37 -1
- face_recognition/app.py +0 -94
- face_recognition/extract.py +0 -88
- face_recognition/face_detect/check_gt_box.py +0 -59
- face_recognition/face_detect/create_fd_result.py +0 -99
- face_recognition/face_detect/detect_imgs.py +0 -65
- face_recognition/face_detect/models/pretrained/version-RFB-320.pth +0 -3
- face_recognition/face_detect/models/pretrained/version-RFB-640.pth +0 -3
- face_recognition/face_detect/models/pretrained/version-slim-320.pth +0 -3
- face_recognition/face_detect/models/pretrained/version-slim-640.pth +0 -3
- face_recognition/face_detect/models/voc-model-labels.txt +0 -2
- face_recognition/face_detect/requirements.txt +0 -11
- face_recognition/face_detect/vision/__init__.py +0 -0
- face_recognition/face_detect/vision/datasets/__init__.py +0 -0
- face_recognition/face_detect/vision/datasets/caffe_pb2.py +0 -0
- face_recognition/face_detect/vision/datasets/voc_dataset.py +0 -146
- face_recognition/face_detect/vision/nn/__init__.py +0 -0
- face_recognition/face_detect/vision/nn/mb_tiny.py +0 -51
- face_recognition/face_detect/vision/nn/mb_tiny_RFB.py +0 -118
- face_recognition/face_detect/vision/nn/multibox_loss.py +0 -46
- face_recognition/face_detect/vision/ssd/__init__.py +0 -0
- face_recognition/face_detect/vision/ssd/config/__init__.py +0 -0
- face_recognition/face_detect/vision/ssd/config/fd_config.py +0 -41
- face_recognition/face_detect/vision/ssd/data_preprocessing.py +0 -61
- face_recognition/face_detect/vision/ssd/mb_tiny_RFB_fd.py +0 -64
- face_recognition/face_detect/vision/ssd/mb_tiny_fd.py +0 -64
- face_recognition/face_detect/vision/ssd/predictor.py +0 -70
- face_recognition/face_detect/vision/ssd/ssd.py +0 -166
- face_recognition/face_detect/vision/transforms/__init__.py +0 -0
- face_recognition/face_detect/vision/transforms/transforms.py +0 -541
- face_recognition/face_detect/vision/utils/__init__.py +0 -1
- face_recognition/face_detect/vision/utils/box_utils.py +0 -241
- face_recognition/face_detect/vision/utils/box_utils_numpy.py +0 -119
- face_recognition/face_detect/vision/utils/misc.py +0 -46
- face_recognition/face_detect/widerface_evaluate/box_overlaps.pyx +0 -55
- face_recognition/face_detect/widerface_evaluate/evaluation.py +0 -302
- face_recognition/face_detect/widerface_evaluate/evaluation_on_widerface.py +0 -73
- face_recognition/face_detect/widerface_evaluate/ground_truth/wider_easy_val.mat +0 -0
- face_recognition/face_detect/widerface_evaluate/ground_truth/wider_face_val.mat +0 -0
- face_recognition/face_detect/widerface_evaluate/ground_truth/wider_hard_val.mat +0 -0
- face_recognition/face_detect/widerface_evaluate/ground_truth/wider_medium_val.mat +0 -0
- face_recognition/face_detect/widerface_evaluate/setup.py +0 -13
- face_recognition/face_feature/GetFeature.py +0 -24
- face_recognition/face_feature/irn50_pytorch.npy +0 -3
- face_recognition/face_feature/irn50_pytorch.py +0 -288
- face_recognition/face_landmark/GetLandmark.py +0 -62
- face_recognition/face_landmark/MobileFaceNet.py +0 -123
- face_recognition/face_landmark/vfl_1.02_578_6.734591484069824.pth.tar +0 -3
- face_recognition/face_manage/manage.py +0 -161
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 📈
|
|
4 |
colorFrom: purple
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.43.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
app.py
CHANGED
@@ -14,7 +14,7 @@ import configparser
|
|
14 |
import numpy as np
|
15 |
from PIL import Image
|
16 |
|
17 |
-
from face_recognition.match import match_1_1
|
18 |
# from face_recognition1.run import match_image
|
19 |
|
20 |
|
@@ -26,6 +26,21 @@ def face_recognition_on_file(file1, file2):
|
|
26 |
|
27 |
return response
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
with gr.Blocks() as demo:
|
31 |
gr.Markdown(
|
@@ -51,5 +66,26 @@ with gr.Blocks() as demo:
|
|
51 |
app_output = [gr.JSON()]
|
52 |
|
53 |
start_button.click(face_recognition_on_file, inputs=[first_input, second_input], outputs=app_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
|
|
55 |
demo.queue().launch(share=True)
|
|
|
14 |
import numpy as np
|
15 |
from PIL import Image
|
16 |
|
17 |
+
# from face_recognition.match import match_1_1
|
18 |
# from face_recognition1.run import match_image
|
19 |
|
20 |
|
|
|
26 |
|
27 |
return response
|
28 |
|
29 |
+
def liveness_detection_on_file(file1, file2):
|
30 |
+
img1 = cv2.imread(file1)
|
31 |
+
img2 = cv2.imread(file2)
|
32 |
+
|
33 |
+
response = match_1_1(img1, img2)
|
34 |
+
|
35 |
+
return response
|
36 |
+
|
37 |
+
def mrz_recognition_on_file(file1, file2):
|
38 |
+
img1 = cv2.imread(file1)
|
39 |
+
img2 = cv2.imread(file2)
|
40 |
+
|
41 |
+
response = match_1_1(img1, img2)
|
42 |
+
|
43 |
+
return response
|
44 |
|
45 |
with gr.Blocks() as demo:
|
46 |
gr.Markdown(
|
|
|
66 |
app_output = [gr.JSON()]
|
67 |
|
68 |
start_button.click(face_recognition_on_file, inputs=[first_input, second_input], outputs=app_output)
|
69 |
+
with gr.TabItem("Face Liveness Detection"):
|
70 |
+
with gr.Row():
|
71 |
+
with gr.Column():
|
72 |
+
app_input = gr.Image(type='filepath')
|
73 |
+
gr.Examples(['images/4.jpg', 'images/1.png', 'images/2.png', 'images/3.png'],
|
74 |
+
inputs=app_input)
|
75 |
+
start_button = gr.Button("Run")
|
76 |
+
with gr.Column():
|
77 |
+
app_output = [gr.JSON()]
|
78 |
+
|
79 |
+
start_button.click(liveness_detection_on_file, inputs=app_input, outputs=app_output)
|
80 |
+
with gr.TabItem("ID Document Recognition"):
|
81 |
+
with gr.Row():
|
82 |
+
with gr.Column():
|
83 |
+
app_input = gr.Image(type='pil')
|
84 |
+
gr.Examples(['images/mrz_1.jpg', 'images/mrz_2.png', 'images/mrz_3.jpeg', 'images/mrz_4.jpg'],
|
85 |
+
inputs=app_input)
|
86 |
+
start_button = gr.Button("Run")
|
87 |
+
with gr.Column():
|
88 |
+
app_output = [gr.JSON()]
|
89 |
|
90 |
+
start_button.click(mrz_recognition_on_file, inputs=app_input, outputs=app_output)
|
91 |
demo.queue().launch(share=True)
|
face_recognition/app.py
DELETED
@@ -1,94 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
import base64
|
5 |
-
import face_manage.manage as db_manage
|
6 |
-
from flask import Flask, render_template, request, jsonify
|
7 |
-
from extract import GetImageInfo
|
8 |
-
|
9 |
-
app = Flask(__name__)
|
10 |
-
|
11 |
-
UPLOAD_FOLDER = os.path.basename('uploads')
|
12 |
-
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
13 |
-
|
14 |
-
|
15 |
-
@app.route("/")
|
16 |
-
def start_page():
|
17 |
-
print("Start")
|
18 |
-
response = jsonify({"status": "Start"})
|
19 |
-
response.status_code = 200
|
20 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
21 |
-
return response
|
22 |
-
|
23 |
-
|
24 |
-
@app.route("/enroll")
|
25 |
-
def enroll():
|
26 |
-
file = request.files['image']
|
27 |
-
image = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
28 |
-
|
29 |
-
db_manage.open_database(0)
|
30 |
-
count, boxes, scores, landmarks, alignimgs, features = GetImageInfo(image, 5)
|
31 |
-
|
32 |
-
for idx in range(0, count):
|
33 |
-
db_manage.register_face('sample name', idx, boxes[idx], landmarks[idx], alignimgs[idx], features[idx])
|
34 |
-
|
35 |
-
# db_manage.clear_database()
|
36 |
-
|
37 |
-
response = jsonify({"status": "True"})
|
38 |
-
response.status_code = 200
|
39 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
40 |
-
return response
|
41 |
-
|
42 |
-
|
43 |
-
@app.route("/delete/all")
|
44 |
-
def delete_all():
|
45 |
-
db_manage.open_database(0)
|
46 |
-
db_manage.clear_database()
|
47 |
-
|
48 |
-
response = jsonify({"status": "True"})
|
49 |
-
response.status_code = 200
|
50 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
51 |
-
return response
|
52 |
-
|
53 |
-
|
54 |
-
@app.route("/match11")
|
55 |
-
def match_1_1():
|
56 |
-
file1 = request.files['image1']
|
57 |
-
file2 = request.files['image2']
|
58 |
-
|
59 |
-
image1 = cv2.imdecode(np.fromstring(file1.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
60 |
-
image2 = cv2.imdecode(np.fromstring(file2.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
61 |
-
|
62 |
-
count1, boxes1, scores1, landmarks1, alignimgs1, features1 = GetImageInfo(image1, 1)
|
63 |
-
count2, boxes2, scores2, landmarks2, alignimgs2, features2 = GetImageInfo(image2, 1)
|
64 |
-
|
65 |
-
if count1 != 0 and count2 != 0:
|
66 |
-
sim = db_manage.get_similarity(features1[0], features2[0])
|
67 |
-
if sim > db_manage.threshold:
|
68 |
-
result = True
|
69 |
-
else:
|
70 |
-
result = False
|
71 |
-
|
72 |
-
response = jsonify({"status": result})
|
73 |
-
response.status_code = 200
|
74 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
75 |
-
return response
|
76 |
-
|
77 |
-
|
78 |
-
@app.route("/match1n")
|
79 |
-
def match_1_n():
|
80 |
-
file = request.files['image']
|
81 |
-
image = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
82 |
-
|
83 |
-
result, filename, sub_index = False, None, -1
|
84 |
-
count, boxes, scores, landmarks, alignimgs, features = GetImageInfo(image, 1)
|
85 |
-
|
86 |
-
for idx in range(count):
|
87 |
-
id, fn, sub_id = db_manage.verify_face(features[idx])
|
88 |
-
if id != -1:
|
89 |
-
result, filename, sub_index = True, fn, id
|
90 |
-
|
91 |
-
response = jsonify({"status": result, "filename": filename, "subIndex": sub_index})
|
92 |
-
response.status_code = 200
|
93 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
94 |
-
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/extract.py
DELETED
@@ -1,88 +0,0 @@
|
|
1 |
-
|
2 |
-
import argparse
|
3 |
-
import cv2
|
4 |
-
import torch
|
5 |
-
import numpy as np
|
6 |
-
import ctypes
|
7 |
-
import os.path
|
8 |
-
import time
|
9 |
-
|
10 |
-
from face_detect.detect_imgs import get_face_boundingbox
|
11 |
-
from face_landmark.GetLandmark import get_face_landmark
|
12 |
-
from face_feature.GetFeature import get_face_feature
|
13 |
-
from face_pose.GetPose import get_face_pose
|
14 |
-
import face_manage.manage as db_manage
|
15 |
-
|
16 |
-
def GetImageInfo(image, faceMaxCount):
|
17 |
-
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
18 |
-
### Detection
|
19 |
-
start_time = time.time() * 1000
|
20 |
-
boxes, scores = get_face_boundingbox(image)
|
21 |
-
boxes = boxes[:faceMaxCount]
|
22 |
-
scores = scores[:faceMaxCount]
|
23 |
-
count = len(boxes)
|
24 |
-
bboxes = []
|
25 |
-
bscores = []
|
26 |
-
for idx in range(count):
|
27 |
-
bboxes.append(boxes[idx].data.numpy())
|
28 |
-
bscores.append(scores[idx].data.numpy())
|
29 |
-
# print("Detection time = %s ms" % (time.time() * 1000 - start_time))
|
30 |
-
|
31 |
-
### Landmark
|
32 |
-
start_time = time.time() * 1000
|
33 |
-
landmarks = [] ### np.zeros((count, 136), dtype=np.float32)
|
34 |
-
for idx in range(count):
|
35 |
-
landmarks.append(get_face_landmark(gray_image, boxes[idx]).data.numpy())
|
36 |
-
# print("Landmark time = %s ms" % (time.time() * 1000 - start_time))
|
37 |
-
|
38 |
-
### Pose
|
39 |
-
poses = []
|
40 |
-
for idx in range(count):
|
41 |
-
poses.append(get_face_pose(boxes[idx], landmarks[idx]))
|
42 |
-
|
43 |
-
### Feature
|
44 |
-
start_time = time.time() * 1000
|
45 |
-
features = []
|
46 |
-
alignimgs = []
|
47 |
-
for idx in range(count):
|
48 |
-
alignimg, feature = get_face_feature(image, landmarks[idx])
|
49 |
-
features.append(feature)
|
50 |
-
alignimgs.append(alignimg)
|
51 |
-
print("Feature extraction time = %s ms" % (time.time() * 1000 - start_time))
|
52 |
-
|
53 |
-
####
|
54 |
-
if 0:
|
55 |
-
for idx in range(count):
|
56 |
-
print_image = image.copy()
|
57 |
-
box = boxes[idx].numpy()
|
58 |
-
print(">>>>>>>>: ", box)
|
59 |
-
landmark = landmarks[idx]
|
60 |
-
cv2.rectangle(print_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
|
61 |
-
for p in range(68):
|
62 |
-
cv2.circle(print_image, (int(landmark[p * 2]), int(landmark[p * 2 + 1])), 1, (255,255,255))
|
63 |
-
cv2.imshow("face recognition", print_image)
|
64 |
-
cv2.waitKey()
|
65 |
-
|
66 |
-
return count, bboxes, bscores, landmarks, alignimgs, features
|
67 |
-
|
68 |
-
def get_similarity(feat1, feat2):
|
69 |
-
return (np.sum(feat1 * feat2) + 1) * 50
|
70 |
-
|
71 |
-
if __name__ == '__main__':
|
72 |
-
threshold = 75
|
73 |
-
test_directory = 'test'
|
74 |
-
|
75 |
-
efn = os.getcwd() + "/test/1.png"
|
76 |
-
img = cv2.imread(efn, cv2.IMREAD_COLOR)
|
77 |
-
count, boxes, scores, landmarks, alignimgs, features1 = GetImageInfo(img, 5)
|
78 |
-
|
79 |
-
vfn = os.getcwd() + "/test/3.png"
|
80 |
-
img = cv2.imread(vfn, cv2.IMREAD_COLOR)
|
81 |
-
count, boxes, scores, landmarks, alignimgs, features2 = GetImageInfo(img, 5)
|
82 |
-
|
83 |
-
score = get_similarity(features1[0], features2[0])
|
84 |
-
print('score = ', score)
|
85 |
-
if score > threshold:
|
86 |
-
print('same person')
|
87 |
-
else:
|
88 |
-
print('different person')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/check_gt_box.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
This code is used to check the data size distribution in the dataset.
|
3 |
-
"""
|
4 |
-
import xml.etree.ElementTree as ET
|
5 |
-
from math import sqrt as sqrt
|
6 |
-
|
7 |
-
import cv2
|
8 |
-
import matplotlib.pyplot as plt
|
9 |
-
|
10 |
-
# sets = [("./data/wider_face_add_lm_10_10", "trainval")]
|
11 |
-
sets = [("./data/wider_face_add_lm_10_10", "test")]
|
12 |
-
|
13 |
-
classes = ['face']
|
14 |
-
|
15 |
-
if __name__ == '__main__':
|
16 |
-
width = []
|
17 |
-
height = []
|
18 |
-
|
19 |
-
for image_set, set in sets:
|
20 |
-
image_ids = open('{}/ImageSets/Main/{}.txt'.format(image_set, set)).read().strip().split()
|
21 |
-
for image_id in image_ids:
|
22 |
-
img_path = '{}/JPEGImages/{}.jpg'.format(image_set, image_id)
|
23 |
-
label_file = open('{}/Annotations/{}.xml'.format(image_set, image_id))
|
24 |
-
tree = ET.parse(label_file)
|
25 |
-
root = tree.getroot()
|
26 |
-
size = root.find('size')
|
27 |
-
img_w = int(size.find('width').text)
|
28 |
-
img_h = int(size.find('height').text)
|
29 |
-
img = cv2.imread(img_path)
|
30 |
-
for obj in root.iter('object'):
|
31 |
-
difficult = obj.find('difficult').text
|
32 |
-
cls = obj.find('name').text
|
33 |
-
if cls not in classes or int(difficult) == 2:
|
34 |
-
continue
|
35 |
-
cls_id = classes.index(cls)
|
36 |
-
|
37 |
-
xmlbox = obj.find('bndbox')
|
38 |
-
xmin = int(xmlbox.find('xmin').text)
|
39 |
-
ymin = int(xmlbox.find('ymin').text)
|
40 |
-
xmax = int(xmlbox.find('xmax').text)
|
41 |
-
ymax = int(xmlbox.find('ymax').text)
|
42 |
-
w = xmax - xmin
|
43 |
-
h = ymax - ymin
|
44 |
-
|
45 |
-
# img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 8)
|
46 |
-
w_change = (w / img_w) * 320
|
47 |
-
h_change = (h / img_h) * 240
|
48 |
-
s = w_change * h_change
|
49 |
-
if w_change / h_change > 6:
|
50 |
-
print("{}/{}/{}/{}".format(xmin, xmax, ymin, ymax))
|
51 |
-
width.append(sqrt(s))
|
52 |
-
height.append(w_change / h_change)
|
53 |
-
print(img_path)
|
54 |
-
# img = cv2.resize(img, (608, 608))
|
55 |
-
# cv2.imwrite('{}_{}'.format(image_set.split('/')[-1], set), img)
|
56 |
-
# cv2.waitKey()
|
57 |
-
|
58 |
-
plt.plot(width, height, 'ro')
|
59 |
-
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/create_fd_result.py
DELETED
@@ -1,99 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
This code is used to batch detect images in a folder.
|
3 |
-
"""
|
4 |
-
import argparse
|
5 |
-
import os
|
6 |
-
import sys
|
7 |
-
|
8 |
-
import cv2
|
9 |
-
|
10 |
-
from vision.ssd.config.fd_config import define_img_size
|
11 |
-
|
12 |
-
parser = argparse.ArgumentParser(description='detect_imgs')
|
13 |
-
|
14 |
-
parser.add_argument('--net_type', default="RFB", type=str,
|
15 |
-
help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
|
16 |
-
parser.add_argument('--input_size', default=320, type=int,
|
17 |
-
help='define network input size,default optional value 128/160/320/480/640/1280')
|
18 |
-
parser.add_argument('--threshold', default=0.65, type=float,
|
19 |
-
help='score threshold')
|
20 |
-
parser.add_argument('--candidate_size', default=1500, type=int,
|
21 |
-
help='nms candidate size')
|
22 |
-
parser.add_argument('--path', default="D:/Database/face_detect/test/originalPics", type=str,
|
23 |
-
help='imgs dir')
|
24 |
-
parser.add_argument('--test_device', default="cpu", type=str,
|
25 |
-
help='cuda:0 or cpu')
|
26 |
-
args = parser.parse_args()
|
27 |
-
define_img_size(args.input_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
|
28 |
-
|
29 |
-
from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
|
30 |
-
from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
|
31 |
-
|
32 |
-
result_path = "./detect_imgs_results"
|
33 |
-
label_path = "./models/voc-model-labels.txt"
|
34 |
-
fd_result_path = 'D:/Database/face_detect/test/rfb_fd_result.txt'
|
35 |
-
fddb_txt_path = 'D:/Database/face_detect/test/FDDB-folds/FDDB-fold-01-10_2845.txt'
|
36 |
-
|
37 |
-
test_device = args.test_device
|
38 |
-
|
39 |
-
class_names = [name.strip() for name in open(label_path).readlines()]
|
40 |
-
if args.net_type == 'slim':
|
41 |
-
model_path = "models/pretrained/version-slim-320.pth"
|
42 |
-
net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
|
43 |
-
predictor = create_mb_tiny_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
|
44 |
-
elif args.net_type == 'RFB':
|
45 |
-
model_path = "models/pretrained/version-RFB-320.pth"
|
46 |
-
net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
|
47 |
-
predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
|
48 |
-
else:
|
49 |
-
print("The net type is wrong!")
|
50 |
-
sys.exit(1)
|
51 |
-
net.load(model_path)
|
52 |
-
|
53 |
-
def get_file_names(dir_path):
|
54 |
-
file_list = os.listdir(dir_path)
|
55 |
-
total_file_list = list()
|
56 |
-
|
57 |
-
for entry in file_list:
|
58 |
-
full_path = os.path.join(dir_path, entry)
|
59 |
-
if (os.path.isdir(full_path)):
|
60 |
-
total_file_list = total_file_list + get_file_names(full_path)
|
61 |
-
else:
|
62 |
-
total_file_list.append(full_path)
|
63 |
-
|
64 |
-
return total_file_list
|
65 |
-
|
66 |
-
def get_file_paths(txt_path):
|
67 |
-
path_list = list()
|
68 |
-
with open(txt_path, "r") as txt_file:
|
69 |
-
for line in txt_file:
|
70 |
-
path_list.append(line.strip())
|
71 |
-
|
72 |
-
return path_list
|
73 |
-
|
74 |
-
if __name__ == '__main__':
|
75 |
-
if not os.path.exists(result_path):
|
76 |
-
os.makedirs(result_path)
|
77 |
-
listdir = get_file_paths(fddb_txt_path)
|
78 |
-
|
79 |
-
total_count = 0
|
80 |
-
correct_count = 0
|
81 |
-
for file_path in listdir:
|
82 |
-
filename = file_path
|
83 |
-
img_path = os.path.join(args.path, filename)
|
84 |
-
orig_image = cv2.imread(img_path + ".jpg")
|
85 |
-
if orig_image is None:
|
86 |
-
continue
|
87 |
-
|
88 |
-
print("filename: ", filename)
|
89 |
-
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
|
90 |
-
boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold)
|
91 |
-
|
92 |
-
with open(fd_result_path, "a") as fd_result_file:
|
93 |
-
print(filename, file=fd_result_file)
|
94 |
-
print(boxes.size(0), file=fd_result_file)
|
95 |
-
for i in range(boxes.size(0)):
|
96 |
-
box = boxes[i, :]
|
97 |
-
score = f"{probs[i]:.3f}"
|
98 |
-
print(f"{box[0]:.3f}", f"{box[1]:.3f}", f"{box[2] - box[0]:.3f}", f"{box[3] - box[1]:.3f}", score, file=fd_result_file)
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/detect_imgs.py
DELETED
@@ -1,65 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
This code is used to batch detect images in a folder.
|
3 |
-
"""
|
4 |
-
|
5 |
-
import os
|
6 |
-
import sys
|
7 |
-
import cv2
|
8 |
-
import numpy as np
|
9 |
-
import torch
|
10 |
-
|
11 |
-
from face_detect.vision.ssd.config.fd_config import define_img_size
|
12 |
-
|
13 |
-
input_size = 320
|
14 |
-
test_device = 'cpu'
|
15 |
-
net_type = 'slim'
|
16 |
-
threshold = 0.6
|
17 |
-
candidate_size = 1500
|
18 |
-
|
19 |
-
define_img_size(input_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
|
20 |
-
|
21 |
-
from face_detect.vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
|
22 |
-
from face_detect.vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
|
23 |
-
|
24 |
-
label_path = "./face_recognition/face_detect/models/voc-model-labels.txt"
|
25 |
-
test_device = test_device
|
26 |
-
|
27 |
-
class_names = [name.strip() for name in open(label_path).readlines()]
|
28 |
-
if net_type == 'slim':
|
29 |
-
model_path = "./face_recognition/face_detect/models/pretrained/version-slim-320.pth"
|
30 |
-
# model_path = "./face_detect/models/pretrained/version-slim-640.pth"
|
31 |
-
net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
|
32 |
-
predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device)
|
33 |
-
elif net_type == 'RFB':
|
34 |
-
model_path = "./face_recognition/face_detect/models/pretrained/version-RFB-320.pth"
|
35 |
-
# model_path = "./face_detect/models/pretrained/version-RFB-640.pth"
|
36 |
-
net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
|
37 |
-
predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device)
|
38 |
-
else:
|
39 |
-
print("The net type is wrong!")
|
40 |
-
sys.exit(1)
|
41 |
-
net.load(model_path)
|
42 |
-
|
43 |
-
def get_face_boundingbox(orig_image):
|
44 |
-
"""
|
45 |
-
Description:
|
46 |
-
In input image, detect face
|
47 |
-
|
48 |
-
Args:
|
49 |
-
orig_image: input BGR image.
|
50 |
-
"""
|
51 |
-
boxes, labels, probs = predictor.predict(cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB), candidate_size / 2, threshold)
|
52 |
-
|
53 |
-
if len(boxes) == 0:
|
54 |
-
return torch.tensor([]), torch.tensor([])
|
55 |
-
|
56 |
-
height, width, _ = orig_image.shape
|
57 |
-
valid_face = np.logical_and(
|
58 |
-
np.logical_and(boxes[:,0] >= 0, boxes[:,1] >= 0),
|
59 |
-
np.logical_and(boxes[:,2] < width, boxes[:,3] < height)
|
60 |
-
)
|
61 |
-
|
62 |
-
boxes = boxes[valid_face]
|
63 |
-
probs = probs[valid_face]
|
64 |
-
|
65 |
-
return boxes, probs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/models/pretrained/version-RFB-320.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c722b4427cc71642768baef6e15c659931b56f07425e5d2b0ec033ad41b145b3
|
3 |
-
size 1168374
|
|
|
|
|
|
|
|
face_recognition/face_detect/models/pretrained/version-RFB-640.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:bf34512b1a93dc234178e8a701ecf25c6afddf335a3226accf62982536e160b5
|
3 |
-
size 1168354
|
|
|
|
|
|
|
|
face_recognition/face_detect/models/pretrained/version-slim-320.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cd24abce45da5dbc7cfd8167cd3d5f955382dfc9d9ae9459f0026abd3c2e38a4
|
3 |
-
size 1091283
|
|
|
|
|
|
|
|
face_recognition/face_detect/models/pretrained/version-slim-640.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:02ca778098127c46d2b2680f1c398c7b993c12a424e94c34e6d608beb73481e4
|
3 |
-
size 1091287
|
|
|
|
|
|
|
|
face_recognition/face_detect/models/voc-model-labels.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
BACKGROUND
|
2 |
-
face
|
|
|
|
|
|
face_recognition/face_detect/requirements.txt
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
numpy
|
2 |
-
torch
|
3 |
-
opencv_python
|
4 |
-
torchvision
|
5 |
-
typing
|
6 |
-
torchstat
|
7 |
-
torchsummary
|
8 |
-
ptflops
|
9 |
-
matplotlib
|
10 |
-
onnx
|
11 |
-
onnxruntime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/__init__.py
DELETED
File without changes
|
face_recognition/face_detect/vision/datasets/__init__.py
DELETED
File without changes
|
face_recognition/face_detect/vision/datasets/caffe_pb2.py
DELETED
The diff for this file is too large to render.
See raw diff
|
|
face_recognition/face_detect/vision/datasets/voc_dataset.py
DELETED
@@ -1,146 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
import os
|
3 |
-
import pathlib
|
4 |
-
import xml.etree.ElementTree as ET
|
5 |
-
import h5py
|
6 |
-
import cv2
|
7 |
-
import numpy as np
|
8 |
-
import lmdb
|
9 |
-
from .caffe_pb2 import *
|
10 |
-
|
11 |
-
class VOCDataset:
|
12 |
-
|
13 |
-
def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
|
14 |
-
"""Dataset for VOC data.
|
15 |
-
Args:
|
16 |
-
root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
|
17 |
-
Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
|
18 |
-
"""
|
19 |
-
self.root = "D:/test"
|
20 |
-
self.transform = transform
|
21 |
-
self.target_transform = target_transform
|
22 |
-
if is_test:
|
23 |
-
image_sets_file = self.root + '/test.txt'
|
24 |
-
else:
|
25 |
-
image_sets_file = self.root + '/test.txt'
|
26 |
-
self.ids = ['1.hdf5']#VOCDataset._read_image_ids(image_sets_file)
|
27 |
-
self.keep_difficult = keep_difficult
|
28 |
-
|
29 |
-
# if the labels file exists, read in the class names
|
30 |
-
label_file_name = self.root + "labels.txt"
|
31 |
-
|
32 |
-
if os.path.isfile(label_file_name):
|
33 |
-
class_string = ""
|
34 |
-
with open(label_file_name, 'r') as infile:
|
35 |
-
for line in infile:
|
36 |
-
class_string += line.rstrip()
|
37 |
-
|
38 |
-
# classes should be a comma separated list
|
39 |
-
|
40 |
-
classes = class_string.split(',')
|
41 |
-
# prepend BACKGROUND as first class
|
42 |
-
classes.insert(0, 'BACKGROUND')
|
43 |
-
classes = [elem.replace(" ", "") for elem in classes]
|
44 |
-
self.class_names = tuple(classes)
|
45 |
-
logging.info("VOC Labels read from file: " + str(self.class_names))
|
46 |
-
|
47 |
-
else:
|
48 |
-
logging.info("No labels file, using default VOC classes.")
|
49 |
-
self.class_names = ('BACKGROUND',
|
50 |
-
'face')
|
51 |
-
|
52 |
-
self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
|
53 |
-
|
54 |
-
# def __getitem__(self, index):
|
55 |
-
# image_id = self.ids[index]
|
56 |
-
# boxes, labels, is_difficult = self._get_annotation(image_id)
|
57 |
-
# if not self.keep_difficult:
|
58 |
-
# boxes = boxes[is_difficult == 0]
|
59 |
-
# labels = labels[is_difficult == 0]
|
60 |
-
# image = self._read_image(image_id)
|
61 |
-
# if self.transform:
|
62 |
-
# image, boxes, labels = self.transform(image, boxes, labels)
|
63 |
-
# if self.target_transform:
|
64 |
-
# boxes, labels = self.target_transform(boxes, labels)
|
65 |
-
# return image, boxes, labels
|
66 |
-
|
67 |
-
def __getitem__(self, index):
|
68 |
-
num_per_shared = 3
|
69 |
-
file_idx = index // num_per_shared
|
70 |
-
idx_in_file = index % num_per_shared
|
71 |
-
hdf_path = os.path.join(self.root, self.ids[file_idx])
|
72 |
-
with h5py.File(hdf_path, 'r') as f:
|
73 |
-
boxes = f[str(idx_in_file) + '_boxes']
|
74 |
-
is_difficult = f[str(idx_in_file) + '_difficult']
|
75 |
-
image = f[str(idx_in_file) + '_image']
|
76 |
-
labels = f[str(idx_in_file) + 'labels']
|
77 |
-
|
78 |
-
if not self.keep_difficult:
|
79 |
-
boxes = boxes[is_difficult == 0]
|
80 |
-
labels = labels[is_difficult == 0]
|
81 |
-
if self.transform:
|
82 |
-
image, boxes, labels = self.transform(image, boxes, labels)
|
83 |
-
if self.target_transform:
|
84 |
-
boxes, labels = self.target_transform(boxes, labels)
|
85 |
-
|
86 |
-
return image, boxes, labels
|
87 |
-
|
88 |
-
def get_image(self, index):
|
89 |
-
image_id = self.ids[index]
|
90 |
-
image = self._read_image(image_id)
|
91 |
-
if self.transform:
|
92 |
-
image, _ = self.transform(image)
|
93 |
-
return image
|
94 |
-
|
95 |
-
def get_annotation(self, index):
|
96 |
-
image_id = self.ids[index]
|
97 |
-
return image_id, self._get_annotation(image_id)
|
98 |
-
|
99 |
-
def __len__(self):
|
100 |
-
total = 0
|
101 |
-
# for file in self.ids:
|
102 |
-
# hdf_path = os.path.join(self.root, file)
|
103 |
-
# f = h5py.File(hdf_path, 'r')
|
104 |
-
# total += len(f.keys())
|
105 |
-
return total // 4
|
106 |
-
|
107 |
-
@staticmethod
|
108 |
-
def _read_image_ids(image_sets_file):
|
109 |
-
ids = []
|
110 |
-
with open(image_sets_file) as f:
|
111 |
-
for line in f:
|
112 |
-
ids.append(line.rstrip())
|
113 |
-
return ids
|
114 |
-
|
115 |
-
def _get_annotation(self, image_id):
|
116 |
-
annotation_file = self.root / f"Annotations/{image_id}.xml"
|
117 |
-
objects = ET.parse(annotation_file).findall("object")
|
118 |
-
boxes = []
|
119 |
-
labels = []
|
120 |
-
is_difficult = []
|
121 |
-
for object in objects:
|
122 |
-
class_name = object.find('name').text.lower().strip()
|
123 |
-
# we're only concerned with clases in our list
|
124 |
-
if class_name in self.class_dict:
|
125 |
-
bbox = object.find('bndbox')
|
126 |
-
|
127 |
-
# VOC dataset format follows Matlab, in which indexes start from 0
|
128 |
-
x1 = float(bbox.find('xmin').text) - 1
|
129 |
-
y1 = float(bbox.find('ymin').text) - 1
|
130 |
-
x2 = float(bbox.find('xmax').text) - 1
|
131 |
-
y2 = float(bbox.find('ymax').text) - 1
|
132 |
-
boxes.append([x1, y1, x2, y2])
|
133 |
-
|
134 |
-
labels.append(self.class_dict[class_name])
|
135 |
-
is_difficult_str = object.find('difficult').text
|
136 |
-
is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
|
137 |
-
|
138 |
-
return (np.array(boxes, dtype=np.float32),
|
139 |
-
np.array(labels, dtype=np.int64),
|
140 |
-
np.array(is_difficult, dtype=np.uint8))
|
141 |
-
|
142 |
-
def _read_image(self, image_id):
|
143 |
-
image_file = self.root / f"JPEGImages/{image_id}.jpg"
|
144 |
-
image = cv2.imread(str(image_file))
|
145 |
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
146 |
-
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/nn/__init__.py
DELETED
File without changes
|
face_recognition/face_detect/vision/nn/mb_tiny.py
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
import torch.nn as nn
|
2 |
-
import torch.nn.functional as F
|
3 |
-
|
4 |
-
|
5 |
-
class Mb_Tiny(nn.Module):
|
6 |
-
|
7 |
-
def __init__(self, num_classes=2):
|
8 |
-
super(Mb_Tiny, self).__init__()
|
9 |
-
self.base_channel = 8 * 2
|
10 |
-
|
11 |
-
def conv_bn(inp, oup, stride):
|
12 |
-
return nn.Sequential(
|
13 |
-
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
14 |
-
nn.BatchNorm2d(oup),
|
15 |
-
nn.ReLU(inplace=True)
|
16 |
-
)
|
17 |
-
|
18 |
-
def conv_dw(inp, oup, stride):
|
19 |
-
return nn.Sequential(
|
20 |
-
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
21 |
-
nn.BatchNorm2d(inp),
|
22 |
-
nn.ReLU(inplace=True),
|
23 |
-
|
24 |
-
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
25 |
-
nn.BatchNorm2d(oup),
|
26 |
-
nn.ReLU(inplace=True),
|
27 |
-
)
|
28 |
-
|
29 |
-
self.model = nn.Sequential(
|
30 |
-
conv_bn(3, self.base_channel, 2), # 160*120
|
31 |
-
conv_dw(self.base_channel, self.base_channel * 2, 1),
|
32 |
-
conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60
|
33 |
-
conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
|
34 |
-
conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30
|
35 |
-
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
36 |
-
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
37 |
-
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
38 |
-
conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15
|
39 |
-
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
40 |
-
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
41 |
-
conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8
|
42 |
-
conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
|
43 |
-
)
|
44 |
-
self.fc = nn.Linear(1024, num_classes)
|
45 |
-
|
46 |
-
def forward(self, x):
|
47 |
-
x = self.model(x)
|
48 |
-
x = F.avg_pool2d(x, 7)
|
49 |
-
x = x.view(-1, 1024)
|
50 |
-
x = self.fc(x)
|
51 |
-
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/nn/mb_tiny_RFB.py
DELETED
@@ -1,118 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn as nn
|
3 |
-
import torch.nn.functional as F
|
4 |
-
|
5 |
-
|
6 |
-
class BasicConv(nn.Module):
|
7 |
-
|
8 |
-
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
|
9 |
-
super(BasicConv, self).__init__()
|
10 |
-
self.out_channels = out_planes
|
11 |
-
if bn:
|
12 |
-
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
|
13 |
-
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
|
14 |
-
self.relu = nn.ReLU(inplace=True) if relu else None
|
15 |
-
else:
|
16 |
-
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
|
17 |
-
self.bn = None
|
18 |
-
self.relu = nn.ReLU(inplace=True) if relu else None
|
19 |
-
|
20 |
-
def forward(self, x):
|
21 |
-
x = self.conv(x)
|
22 |
-
if self.bn is not None:
|
23 |
-
x = self.bn(x)
|
24 |
-
if self.relu is not None:
|
25 |
-
x = self.relu(x)
|
26 |
-
return x
|
27 |
-
|
28 |
-
|
29 |
-
class BasicRFB(nn.Module):
|
30 |
-
|
31 |
-
def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8, vision=1, groups=1):
|
32 |
-
super(BasicRFB, self).__init__()
|
33 |
-
self.scale = scale
|
34 |
-
self.out_channels = out_planes
|
35 |
-
inter_planes = in_planes // map_reduce
|
36 |
-
|
37 |
-
self.branch0 = nn.Sequential(
|
38 |
-
BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
|
39 |
-
BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
|
40 |
-
BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 1, dilation=vision + 1, relu=False, groups=groups)
|
41 |
-
)
|
42 |
-
self.branch1 = nn.Sequential(
|
43 |
-
BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
|
44 |
-
BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
|
45 |
-
BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
|
46 |
-
)
|
47 |
-
self.branch2 = nn.Sequential(
|
48 |
-
BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
|
49 |
-
BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=3, stride=1, padding=1, groups=groups),
|
50 |
-
BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
|
51 |
-
BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
|
52 |
-
)
|
53 |
-
|
54 |
-
self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
|
55 |
-
self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
|
56 |
-
self.relu = nn.ReLU(inplace=False)
|
57 |
-
|
58 |
-
def forward(self, x):
|
59 |
-
x0 = self.branch0(x)
|
60 |
-
x1 = self.branch1(x)
|
61 |
-
x2 = self.branch2(x)
|
62 |
-
|
63 |
-
out = torch.cat((x0, x1, x2), 1)
|
64 |
-
out = self.ConvLinear(out)
|
65 |
-
short = self.shortcut(x)
|
66 |
-
out = out * self.scale + short
|
67 |
-
out = self.relu(out)
|
68 |
-
|
69 |
-
return out
|
70 |
-
|
71 |
-
|
72 |
-
class Mb_Tiny_RFB(nn.Module):
|
73 |
-
|
74 |
-
def __init__(self, num_classes=2):
|
75 |
-
super(Mb_Tiny_RFB, self).__init__()
|
76 |
-
self.base_channel = 8 * 2
|
77 |
-
|
78 |
-
def conv_bn(inp, oup, stride):
|
79 |
-
return nn.Sequential(
|
80 |
-
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
81 |
-
nn.BatchNorm2d(oup),
|
82 |
-
nn.ReLU(inplace=True)
|
83 |
-
)
|
84 |
-
|
85 |
-
def conv_dw(inp, oup, stride):
|
86 |
-
return nn.Sequential(
|
87 |
-
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
88 |
-
nn.BatchNorm2d(inp),
|
89 |
-
nn.ReLU(inplace=True),
|
90 |
-
|
91 |
-
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
92 |
-
nn.BatchNorm2d(oup),
|
93 |
-
nn.ReLU(inplace=True),
|
94 |
-
)
|
95 |
-
|
96 |
-
self.model = nn.Sequential(
|
97 |
-
conv_bn(3, self.base_channel, 2), # 160*120
|
98 |
-
conv_dw(self.base_channel, self.base_channel * 2, 1),
|
99 |
-
conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60
|
100 |
-
conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
|
101 |
-
conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30
|
102 |
-
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
103 |
-
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
104 |
-
BasicRFB(self.base_channel * 4, self.base_channel * 4, stride=1, scale=1.0),
|
105 |
-
conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15
|
106 |
-
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
107 |
-
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
108 |
-
conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8
|
109 |
-
conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
|
110 |
-
)
|
111 |
-
self.fc = nn.Linear(1024, num_classes)
|
112 |
-
|
113 |
-
def forward(self, x):
|
114 |
-
x = self.model(x)
|
115 |
-
x = F.avg_pool2d(x, 7)
|
116 |
-
x = x.view(-1, 1024)
|
117 |
-
x = self.fc(x)
|
118 |
-
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/nn/multibox_loss.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn as nn
|
3 |
-
import torch.nn.functional as F
|
4 |
-
|
5 |
-
from ..utils import box_utils
|
6 |
-
|
7 |
-
|
8 |
-
class MultiboxLoss(nn.Module):
|
9 |
-
def __init__(self, priors, neg_pos_ratio,
|
10 |
-
center_variance, size_variance, device):
|
11 |
-
"""Implement SSD Multibox Loss.
|
12 |
-
|
13 |
-
Basically, Multibox loss combines classification loss
|
14 |
-
and Smooth L1 regression loss.
|
15 |
-
"""
|
16 |
-
super(MultiboxLoss, self).__init__()
|
17 |
-
self.neg_pos_ratio = neg_pos_ratio
|
18 |
-
self.center_variance = center_variance
|
19 |
-
self.size_variance = size_variance
|
20 |
-
self.priors = priors
|
21 |
-
self.priors.to(device)
|
22 |
-
|
23 |
-
def forward(self, confidence, predicted_locations, labels, gt_locations):
|
24 |
-
"""Compute classification loss and smooth l1 loss.
|
25 |
-
|
26 |
-
Args:
|
27 |
-
confidence (batch_size, num_priors, num_classes): class predictions.
|
28 |
-
locations (batch_size, num_priors, 4): predicted locations.
|
29 |
-
labels (batch_size, num_priors): real labels of all the priors.
|
30 |
-
boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
|
31 |
-
"""
|
32 |
-
num_classes = confidence.size(2)
|
33 |
-
with torch.no_grad():
|
34 |
-
# derived from cross_entropy=sum(log(p))
|
35 |
-
loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
|
36 |
-
mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
|
37 |
-
|
38 |
-
confidence = confidence[mask, :]
|
39 |
-
classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], reduction='sum')
|
40 |
-
pos_mask = labels > 0
|
41 |
-
predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
|
42 |
-
gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
|
43 |
-
smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum') # smooth_l1_loss
|
44 |
-
# smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum') #l2 loss
|
45 |
-
num_pos = gt_locations.size(0)
|
46 |
-
return smooth_l1_loss / num_pos, classification_loss / num_pos
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/ssd/__init__.py
DELETED
File without changes
|
face_recognition/face_detect/vision/ssd/config/__init__.py
DELETED
File without changes
|
face_recognition/face_detect/vision/ssd/config/fd_config.py
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
|
3 |
-
from face_detect.vision.utils.box_utils import generate_priors
|
4 |
-
|
5 |
-
image_mean_test = image_mean = np.array([127, 127, 127])
|
6 |
-
image_std = 128.0
|
7 |
-
iou_threshold = 0.3
|
8 |
-
center_variance = 0.1
|
9 |
-
size_variance = 0.2
|
10 |
-
|
11 |
-
min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
|
12 |
-
shrinkage_list = []
|
13 |
-
image_size = [320, 240] # default input size 320*240
|
14 |
-
feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]] # default feature map size
|
15 |
-
priors = []
|
16 |
-
|
17 |
-
|
18 |
-
def define_img_size(size):
|
19 |
-
global image_size, feature_map_w_h_list, priors
|
20 |
-
img_size_dict = {128: [128, 96],
|
21 |
-
160: [160, 120],
|
22 |
-
320: [320, 240],
|
23 |
-
480: [480, 360],
|
24 |
-
640: [640, 480],
|
25 |
-
1280: [1280, 960]}
|
26 |
-
image_size = img_size_dict[size]
|
27 |
-
|
28 |
-
feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]],
|
29 |
-
160: [[20, 10, 5, 3], [15, 8, 4, 2]],
|
30 |
-
320: [[40, 20, 10, 5], [30, 15, 8, 4]],
|
31 |
-
480: [[60, 30, 15, 8], [45, 23, 12, 6]],
|
32 |
-
640: [[80, 40, 20, 10], [60, 30, 15, 8]],
|
33 |
-
1280: [[160, 80, 40, 20], [120, 60, 30, 15]]}
|
34 |
-
feature_map_w_h_list = feature_map_w_h_list_dict[size]
|
35 |
-
|
36 |
-
for i in range(0, len(image_size)):
|
37 |
-
item_list = []
|
38 |
-
for k in range(0, len(feature_map_w_h_list[i])):
|
39 |
-
item_list.append(image_size[i] / feature_map_w_h_list[i][k])
|
40 |
-
shrinkage_list.append(item_list)
|
41 |
-
priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/ssd/data_preprocessing.py
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
from ..transforms.transforms import *
|
2 |
-
|
3 |
-
|
4 |
-
class TrainAugmentation:
|
5 |
-
def __init__(self, size, mean=0, std=1.0):
|
6 |
-
"""
|
7 |
-
Args:
|
8 |
-
size: the size the of final image.
|
9 |
-
mean: mean pixel value per channel.
|
10 |
-
"""
|
11 |
-
self.mean = mean
|
12 |
-
self.size = size
|
13 |
-
self.augment = Compose([
|
14 |
-
ConvertFromInts(),
|
15 |
-
PhotometricDistort(),
|
16 |
-
RandomSampleCrop_v2(),
|
17 |
-
RandomMirror(),
|
18 |
-
ToPercentCoords(),
|
19 |
-
Resize(self.size),
|
20 |
-
SubtractMeans(self.mean),
|
21 |
-
lambda img, boxes=None, labels=None: (img / std, boxes, labels),
|
22 |
-
ToTensor(),
|
23 |
-
])
|
24 |
-
|
25 |
-
def __call__(self, img, boxes, labels):
|
26 |
-
"""
|
27 |
-
|
28 |
-
Args:
|
29 |
-
img: the output of cv.imread in RGB layout.
|
30 |
-
boxes: boundding boxes in the form of (x1, y1, x2, y2).
|
31 |
-
labels: labels of boxes.
|
32 |
-
"""
|
33 |
-
return self.augment(img, boxes, labels)
|
34 |
-
|
35 |
-
|
36 |
-
class TestTransform:
|
37 |
-
def __init__(self, size, mean=0.0, std=1.0):
|
38 |
-
self.transform = Compose([
|
39 |
-
ToPercentCoords(),
|
40 |
-
Resize(size),
|
41 |
-
SubtractMeans(mean),
|
42 |
-
lambda img, boxes=None, labels=None: (img / std, boxes, labels),
|
43 |
-
ToTensor(),
|
44 |
-
])
|
45 |
-
|
46 |
-
def __call__(self, image, boxes, labels):
|
47 |
-
return self.transform(image, boxes, labels)
|
48 |
-
|
49 |
-
|
50 |
-
class PredictionTransform:
|
51 |
-
def __init__(self, size, mean=0.0, std=1.0):
|
52 |
-
self.transform = Compose([
|
53 |
-
Resize(size),
|
54 |
-
SubtractMeans(mean),
|
55 |
-
lambda img, boxes=None, labels=None: (img / std, boxes, labels),
|
56 |
-
ToTensor()
|
57 |
-
])
|
58 |
-
|
59 |
-
def __call__(self, image):
|
60 |
-
image, _, _ = self.transform(image)
|
61 |
-
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/ssd/mb_tiny_RFB_fd.py
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
from torch.nn import Conv2d, Sequential, ModuleList, ReLU
|
2 |
-
|
3 |
-
from face_detect.vision.nn.mb_tiny_RFB import Mb_Tiny_RFB
|
4 |
-
from face_detect.vision.ssd.config import fd_config as config
|
5 |
-
from face_detect.vision.ssd.predictor import Predictor
|
6 |
-
from face_detect.vision.ssd.ssd import SSD
|
7 |
-
|
8 |
-
|
9 |
-
def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
|
10 |
-
"""Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
|
11 |
-
"""
|
12 |
-
return Sequential(
|
13 |
-
Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
|
14 |
-
groups=in_channels, stride=stride, padding=padding),
|
15 |
-
ReLU(),
|
16 |
-
Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
|
17 |
-
)
|
18 |
-
|
19 |
-
|
20 |
-
def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"):
|
21 |
-
base_net = Mb_Tiny_RFB(2)
|
22 |
-
base_net_model = base_net.model # disable dropout layer
|
23 |
-
|
24 |
-
source_layer_indexes = [
|
25 |
-
8,
|
26 |
-
11,
|
27 |
-
13
|
28 |
-
]
|
29 |
-
extras = ModuleList([
|
30 |
-
Sequential(
|
31 |
-
Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
|
32 |
-
ReLU(),
|
33 |
-
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
|
34 |
-
ReLU()
|
35 |
-
)
|
36 |
-
])
|
37 |
-
|
38 |
-
regression_headers = ModuleList([
|
39 |
-
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
|
40 |
-
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
|
41 |
-
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
|
42 |
-
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
|
43 |
-
])
|
44 |
-
|
45 |
-
classification_headers = ModuleList([
|
46 |
-
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
|
47 |
-
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
48 |
-
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
49 |
-
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
|
50 |
-
])
|
51 |
-
|
52 |
-
return SSD(num_classes, base_net_model, source_layer_indexes,
|
53 |
-
extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
|
54 |
-
|
55 |
-
|
56 |
-
def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
|
57 |
-
predictor = Predictor(net, config.image_size, config.image_mean_test,
|
58 |
-
config.image_std,
|
59 |
-
nms_method=nms_method,
|
60 |
-
iou_threshold=config.iou_threshold,
|
61 |
-
candidate_size=candidate_size,
|
62 |
-
sigma=sigma,
|
63 |
-
device=device)
|
64 |
-
return predictor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/ssd/mb_tiny_fd.py
DELETED
@@ -1,64 +0,0 @@
|
|
1 |
-
from torch.nn import Conv2d, Sequential, ModuleList, ReLU
|
2 |
-
|
3 |
-
from face_detect.vision.nn.mb_tiny import Mb_Tiny
|
4 |
-
from face_detect.vision.ssd.config import fd_config as config
|
5 |
-
from face_detect.vision.ssd.predictor import Predictor
|
6 |
-
from face_detect.vision.ssd.ssd import SSD
|
7 |
-
|
8 |
-
|
9 |
-
def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
|
10 |
-
"""Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
|
11 |
-
"""
|
12 |
-
return Sequential(
|
13 |
-
Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
|
14 |
-
groups=in_channels, stride=stride, padding=padding),
|
15 |
-
ReLU(),
|
16 |
-
Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
|
17 |
-
)
|
18 |
-
|
19 |
-
|
20 |
-
def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"):
|
21 |
-
base_net = Mb_Tiny(2)
|
22 |
-
base_net_model = base_net.model # disable dropout layer
|
23 |
-
|
24 |
-
source_layer_indexes = [
|
25 |
-
8,
|
26 |
-
11,
|
27 |
-
13
|
28 |
-
]
|
29 |
-
extras = ModuleList([
|
30 |
-
Sequential(
|
31 |
-
Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
|
32 |
-
ReLU(),
|
33 |
-
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
|
34 |
-
ReLU()
|
35 |
-
)
|
36 |
-
])
|
37 |
-
|
38 |
-
regression_headers = ModuleList([
|
39 |
-
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
|
40 |
-
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
|
41 |
-
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
|
42 |
-
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
|
43 |
-
])
|
44 |
-
|
45 |
-
classification_headers = ModuleList([
|
46 |
-
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
|
47 |
-
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
48 |
-
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
49 |
-
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
|
50 |
-
])
|
51 |
-
|
52 |
-
return SSD(num_classes, base_net_model, source_layer_indexes,
|
53 |
-
extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
|
54 |
-
|
55 |
-
|
56 |
-
def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
|
57 |
-
predictor = Predictor(net, config.image_size, config.image_mean_test,
|
58 |
-
config.image_std,
|
59 |
-
nms_method=nms_method,
|
60 |
-
iou_threshold=config.iou_threshold,
|
61 |
-
candidate_size=candidate_size,
|
62 |
-
sigma=sigma,
|
63 |
-
device=device)
|
64 |
-
return predictor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/ssd/predictor.py
DELETED
@@ -1,70 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
from ..utils import box_utils
|
4 |
-
from .data_preprocessing import PredictionTransform
|
5 |
-
from ..utils.misc import Timer
|
6 |
-
|
7 |
-
|
8 |
-
class Predictor:
|
9 |
-
def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None,
|
10 |
-
iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None):
|
11 |
-
self.net = net
|
12 |
-
self.transform = PredictionTransform(size, mean, std)
|
13 |
-
self.iou_threshold = iou_threshold
|
14 |
-
self.filter_threshold = filter_threshold
|
15 |
-
self.candidate_size = candidate_size
|
16 |
-
self.nms_method = nms_method
|
17 |
-
|
18 |
-
self.sigma = sigma
|
19 |
-
if device:
|
20 |
-
self.device = device
|
21 |
-
else:
|
22 |
-
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
23 |
-
|
24 |
-
self.net.to(self.device)
|
25 |
-
self.net.eval()
|
26 |
-
|
27 |
-
self.timer = Timer()
|
28 |
-
|
29 |
-
def predict(self, image, top_k=-1, prob_threshold=None):
|
30 |
-
cpu_device = torch.device("cpu")
|
31 |
-
height, width, _ = image.shape
|
32 |
-
image = self.transform(image)
|
33 |
-
images = image.unsqueeze(0)
|
34 |
-
images = images.to(self.device)
|
35 |
-
with torch.no_grad():
|
36 |
-
for i in range(1):
|
37 |
-
scores, boxes = self.net.forward(images)
|
38 |
-
boxes = boxes[0]
|
39 |
-
scores = scores[0]
|
40 |
-
if not prob_threshold:
|
41 |
-
prob_threshold = self.filter_threshold
|
42 |
-
# this version of nms is slower on GPU, so we move data to CPU.
|
43 |
-
boxes = boxes.to(cpu_device)
|
44 |
-
scores = scores.to(cpu_device)
|
45 |
-
picked_box_probs = []
|
46 |
-
picked_labels = []
|
47 |
-
for class_index in range(1, scores.size(1)):
|
48 |
-
probs = scores[:, class_index]
|
49 |
-
mask = probs > prob_threshold
|
50 |
-
probs = probs[mask]
|
51 |
-
if probs.size(0) == 0:
|
52 |
-
continue
|
53 |
-
subset_boxes = boxes[mask, :]
|
54 |
-
box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
|
55 |
-
box_probs = box_utils.nms(box_probs, self.nms_method,
|
56 |
-
score_threshold=prob_threshold,
|
57 |
-
iou_threshold=self.iou_threshold,
|
58 |
-
sigma=self.sigma,
|
59 |
-
top_k=top_k,
|
60 |
-
candidate_size=self.candidate_size)
|
61 |
-
picked_box_probs.append(box_probs)
|
62 |
-
picked_labels.extend([class_index] * box_probs.size(0))
|
63 |
-
if not picked_box_probs:
|
64 |
-
return torch.tensor([]), torch.tensor([]), torch.tensor([])
|
65 |
-
picked_box_probs = torch.cat(picked_box_probs)
|
66 |
-
picked_box_probs[:, 0] *= width
|
67 |
-
picked_box_probs[:, 1] *= height
|
68 |
-
picked_box_probs[:, 2] *= width
|
69 |
-
picked_box_probs[:, 3] *= height
|
70 |
-
return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/ssd/ssd.py
DELETED
@@ -1,166 +0,0 @@
|
|
1 |
-
from collections import namedtuple
|
2 |
-
from typing import List, Tuple
|
3 |
-
|
4 |
-
import numpy as np
|
5 |
-
import torch
|
6 |
-
import torch.nn as nn
|
7 |
-
import torch.nn.functional as F
|
8 |
-
|
9 |
-
from face_detect.vision.utils import box_utils
|
10 |
-
|
11 |
-
GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1'])
|
12 |
-
|
13 |
-
|
14 |
-
class SSD(nn.Module):
|
15 |
-
def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int],
|
16 |
-
extras: nn.ModuleList, classification_headers: nn.ModuleList,
|
17 |
-
regression_headers: nn.ModuleList, is_test=False, config=None, device=None):
|
18 |
-
"""Compose a SSD model using the given components.
|
19 |
-
"""
|
20 |
-
super(SSD, self).__init__()
|
21 |
-
|
22 |
-
self.num_classes = num_classes
|
23 |
-
self.base_net = base_net
|
24 |
-
self.source_layer_indexes = source_layer_indexes
|
25 |
-
self.extras = extras
|
26 |
-
self.classification_headers = classification_headers
|
27 |
-
self.regression_headers = regression_headers
|
28 |
-
self.is_test = is_test
|
29 |
-
self.config = config
|
30 |
-
|
31 |
-
# register layers in source_layer_indexes by adding them to a module list
|
32 |
-
self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes
|
33 |
-
if isinstance(t, tuple) and not isinstance(t, GraphPath)])
|
34 |
-
if device:
|
35 |
-
self.device = device
|
36 |
-
else:
|
37 |
-
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
38 |
-
if is_test:
|
39 |
-
self.config = config
|
40 |
-
self.priors = config.priors.to(self.device)
|
41 |
-
|
42 |
-
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
43 |
-
confidences = []
|
44 |
-
locations = []
|
45 |
-
start_layer_index = 0
|
46 |
-
header_index = 0
|
47 |
-
end_layer_index = 0
|
48 |
-
for end_layer_index in self.source_layer_indexes:
|
49 |
-
if isinstance(end_layer_index, GraphPath):
|
50 |
-
path = end_layer_index
|
51 |
-
end_layer_index = end_layer_index.s0
|
52 |
-
added_layer = None
|
53 |
-
elif isinstance(end_layer_index, tuple):
|
54 |
-
added_layer = end_layer_index[1]
|
55 |
-
end_layer_index = end_layer_index[0]
|
56 |
-
path = None
|
57 |
-
else:
|
58 |
-
added_layer = None
|
59 |
-
path = None
|
60 |
-
for layer in self.base_net[start_layer_index: end_layer_index]:
|
61 |
-
x = layer(x)
|
62 |
-
if added_layer:
|
63 |
-
y = added_layer(x)
|
64 |
-
else:
|
65 |
-
y = x
|
66 |
-
if path:
|
67 |
-
sub = getattr(self.base_net[end_layer_index], path.name)
|
68 |
-
for layer in sub[:path.s1]:
|
69 |
-
x = layer(x)
|
70 |
-
y = x
|
71 |
-
for layer in sub[path.s1:]:
|
72 |
-
x = layer(x)
|
73 |
-
end_layer_index += 1
|
74 |
-
start_layer_index = end_layer_index
|
75 |
-
confidence, location = self.compute_header(header_index, y)
|
76 |
-
header_index += 1
|
77 |
-
confidences.append(confidence)
|
78 |
-
locations.append(location)
|
79 |
-
|
80 |
-
for layer in self.base_net[end_layer_index:]:
|
81 |
-
x = layer(x)
|
82 |
-
|
83 |
-
for layer in self.extras:
|
84 |
-
x = layer(x)
|
85 |
-
confidence, location = self.compute_header(header_index, x)
|
86 |
-
header_index += 1
|
87 |
-
confidences.append(confidence)
|
88 |
-
locations.append(location)
|
89 |
-
|
90 |
-
confidences = torch.cat(confidences, 1)
|
91 |
-
locations = torch.cat(locations, 1)
|
92 |
-
|
93 |
-
if self.is_test:
|
94 |
-
confidences = F.softmax(confidences, dim=2)
|
95 |
-
boxes = box_utils.convert_locations_to_boxes(
|
96 |
-
locations, self.priors, self.config.center_variance, self.config.size_variance
|
97 |
-
)
|
98 |
-
boxes = box_utils.center_form_to_corner_form(boxes)
|
99 |
-
return confidences, boxes
|
100 |
-
else:
|
101 |
-
return confidences, locations
|
102 |
-
|
103 |
-
def compute_header(self, i, x):
|
104 |
-
confidence = self.classification_headers[i](x)
|
105 |
-
confidence = confidence.permute(0, 2, 3, 1).contiguous()
|
106 |
-
confidence = confidence.view(confidence.size(0), -1, self.num_classes)
|
107 |
-
|
108 |
-
location = self.regression_headers[i](x)
|
109 |
-
location = location.permute(0, 2, 3, 1).contiguous()
|
110 |
-
location = location.view(location.size(0), -1, 4)
|
111 |
-
|
112 |
-
return confidence, location
|
113 |
-
|
114 |
-
def init_from_base_net(self, model):
|
115 |
-
self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True)
|
116 |
-
self.source_layer_add_ons.apply(_xavier_init_)
|
117 |
-
self.extras.apply(_xavier_init_)
|
118 |
-
self.classification_headers.apply(_xavier_init_)
|
119 |
-
self.regression_headers.apply(_xavier_init_)
|
120 |
-
|
121 |
-
def init_from_pretrained_ssd(self, model):
|
122 |
-
state_dict = torch.load(model, map_location=lambda storage, loc: storage)
|
123 |
-
state_dict = {k: v for k, v in state_dict.items() if not (k.startswith("classification_headers") or k.startswith("regression_headers"))}
|
124 |
-
model_dict = self.state_dict()
|
125 |
-
model_dict.update(state_dict)
|
126 |
-
self.load_state_dict(model_dict)
|
127 |
-
self.classification_headers.apply(_xavier_init_)
|
128 |
-
self.regression_headers.apply(_xavier_init_)
|
129 |
-
|
130 |
-
def init(self):
|
131 |
-
self.base_net.apply(_xavier_init_)
|
132 |
-
self.source_layer_add_ons.apply(_xavier_init_)
|
133 |
-
self.extras.apply(_xavier_init_)
|
134 |
-
self.classification_headers.apply(_xavier_init_)
|
135 |
-
self.regression_headers.apply(_xavier_init_)
|
136 |
-
|
137 |
-
def load(self, model):
|
138 |
-
self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
|
139 |
-
|
140 |
-
def save(self, model_path):
|
141 |
-
torch.save(self.state_dict(), model_path)
|
142 |
-
|
143 |
-
|
144 |
-
class MatchPrior(object):
|
145 |
-
def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
|
146 |
-
self.center_form_priors = center_form_priors
|
147 |
-
self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
|
148 |
-
self.center_variance = center_variance
|
149 |
-
self.size_variance = size_variance
|
150 |
-
self.iou_threshold = iou_threshold
|
151 |
-
|
152 |
-
def __call__(self, gt_boxes, gt_labels):
|
153 |
-
if type(gt_boxes) is np.ndarray:
|
154 |
-
gt_boxes = torch.from_numpy(gt_boxes)
|
155 |
-
if type(gt_labels) is np.ndarray:
|
156 |
-
gt_labels = torch.from_numpy(gt_labels)
|
157 |
-
boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
|
158 |
-
self.corner_form_priors, self.iou_threshold)
|
159 |
-
boxes = box_utils.corner_form_to_center_form(boxes)
|
160 |
-
locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
|
161 |
-
return locations, labels
|
162 |
-
|
163 |
-
|
164 |
-
def _xavier_init_(m: nn.Module):
|
165 |
-
if isinstance(m, nn.Conv2d):
|
166 |
-
nn.init.xavier_uniform_(m.weight)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/transforms/__init__.py
DELETED
File without changes
|
face_recognition/face_detect/vision/transforms/transforms.py
DELETED
@@ -1,541 +0,0 @@
|
|
1 |
-
# from https://github.com/amdegroot/ssd.pytorch
|
2 |
-
|
3 |
-
|
4 |
-
import types
|
5 |
-
|
6 |
-
import cv2
|
7 |
-
import numpy as np
|
8 |
-
import torch
|
9 |
-
from numpy import random
|
10 |
-
from torchvision import transforms
|
11 |
-
|
12 |
-
|
13 |
-
def intersect(box_a, box_b):
|
14 |
-
max_xy = np.minimum(box_a[:, 2:], box_b[2:])
|
15 |
-
min_xy = np.maximum(box_a[:, :2], box_b[:2])
|
16 |
-
inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
|
17 |
-
return inter[:, 0] * inter[:, 1]
|
18 |
-
|
19 |
-
|
20 |
-
def jaccard_numpy(box_a, box_b):
|
21 |
-
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
22 |
-
is simply the intersection over union of two boxes.
|
23 |
-
E.g.:
|
24 |
-
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
25 |
-
Args:
|
26 |
-
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
|
27 |
-
box_b: Single bounding box, Shape: [4]
|
28 |
-
Return:
|
29 |
-
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
|
30 |
-
"""
|
31 |
-
inter = intersect(box_a, box_b)
|
32 |
-
area_a = ((box_a[:, 2] - box_a[:, 0]) *
|
33 |
-
(box_a[:, 3] - box_a[:, 1])) # [A,B]
|
34 |
-
area_b = ((box_b[2] - box_b[0]) *
|
35 |
-
(box_b[3] - box_b[1])) # [A,B]
|
36 |
-
union = area_a + area_b - inter
|
37 |
-
return inter / union # [A,B]
|
38 |
-
|
39 |
-
|
40 |
-
def object_converage_numpy(box_a, box_b):
|
41 |
-
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
42 |
-
is simply the intersection over union of two boxes.
|
43 |
-
E.g.:
|
44 |
-
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
45 |
-
Args:
|
46 |
-
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
|
47 |
-
box_b: Single bounding box, Shape: [4]
|
48 |
-
Return:
|
49 |
-
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
|
50 |
-
"""
|
51 |
-
inter = intersect(box_a, box_b)
|
52 |
-
area_a = ((box_a[:, 2] - box_a[:, 0]) *
|
53 |
-
(box_a[:, 3] - box_a[:, 1])) # [A,B]
|
54 |
-
area_b = ((box_b[2] - box_b[0]) *
|
55 |
-
(box_b[3] - box_b[1])) # [A,B]
|
56 |
-
return inter / area_a # [A,B]
|
57 |
-
|
58 |
-
|
59 |
-
class Compose(object):
|
60 |
-
"""Composes several augmentations together.
|
61 |
-
Args:
|
62 |
-
transforms (List[Transform]): list of transforms to compose.
|
63 |
-
Example:
|
64 |
-
>>> augmentations.Compose([
|
65 |
-
>>> transforms.CenterCrop(10),
|
66 |
-
>>> transforms.ToTensor(),
|
67 |
-
>>> ])
|
68 |
-
"""
|
69 |
-
|
70 |
-
def __init__(self, transforms):
|
71 |
-
self.transforms = transforms
|
72 |
-
|
73 |
-
def __call__(self, img, boxes=None, labels=None):
|
74 |
-
for t in self.transforms:
|
75 |
-
img, boxes, labels = t(img, boxes, labels)
|
76 |
-
return img, boxes, labels
|
77 |
-
|
78 |
-
|
79 |
-
class Lambda(object):
|
80 |
-
"""Applies a lambda as a transform."""
|
81 |
-
|
82 |
-
def __init__(self, lambd):
|
83 |
-
assert isinstance(lambd, types.LambdaType)
|
84 |
-
self.lambd = lambd
|
85 |
-
|
86 |
-
def __call__(self, img, boxes=None, labels=None):
|
87 |
-
return self.lambd(img, boxes, labels)
|
88 |
-
|
89 |
-
|
90 |
-
class ConvertFromInts(object):
|
91 |
-
def __call__(self, image, boxes=None, labels=None):
|
92 |
-
return image.astype(np.float32), boxes, labels
|
93 |
-
|
94 |
-
|
95 |
-
class SubtractMeans(object):
|
96 |
-
def __init__(self, mean):
|
97 |
-
self.mean = np.array(mean, dtype=np.float32)
|
98 |
-
|
99 |
-
def __call__(self, image, boxes=None, labels=None):
|
100 |
-
image = image.astype(np.float32)
|
101 |
-
image -= self.mean
|
102 |
-
return image.astype(np.float32), boxes, labels
|
103 |
-
|
104 |
-
|
105 |
-
class imgprocess(object):
|
106 |
-
def __init__(self, std):
|
107 |
-
self.std = np.array(std, dtype=np.float32)
|
108 |
-
|
109 |
-
def __call__(self, image, boxes=None, labels=None):
|
110 |
-
image = image.astype(np.float32)
|
111 |
-
image /= self.std
|
112 |
-
return image.astype(np.float32), boxes, labels
|
113 |
-
|
114 |
-
|
115 |
-
class ToAbsoluteCoords(object):
|
116 |
-
def __call__(self, image, boxes=None, labels=None):
|
117 |
-
height, width, channels = image.shape
|
118 |
-
boxes[:, 0] *= width
|
119 |
-
boxes[:, 2] *= width
|
120 |
-
boxes[:, 1] *= height
|
121 |
-
boxes[:, 3] *= height
|
122 |
-
|
123 |
-
return image, boxes, labels
|
124 |
-
|
125 |
-
|
126 |
-
class ToPercentCoords(object):
|
127 |
-
def __call__(self, image, boxes=None, labels=None):
|
128 |
-
height, width, channels = image.shape
|
129 |
-
boxes[:, 0] /= width
|
130 |
-
boxes[:, 2] /= width
|
131 |
-
boxes[:, 1] /= height
|
132 |
-
boxes[:, 3] /= height
|
133 |
-
|
134 |
-
return image, boxes, labels
|
135 |
-
|
136 |
-
|
137 |
-
class Resize(object):
|
138 |
-
def __init__(self, size=(300, 300)):
|
139 |
-
self.size = size
|
140 |
-
|
141 |
-
def __call__(self, image, boxes=None, labels=None):
|
142 |
-
image = cv2.resize(image, (self.size[0],
|
143 |
-
self.size[1]))
|
144 |
-
return image, boxes, labels
|
145 |
-
|
146 |
-
|
147 |
-
class RandomSaturation(object):
|
148 |
-
def __init__(self, lower=0.5, upper=1.5):
|
149 |
-
self.lower = lower
|
150 |
-
self.upper = upper
|
151 |
-
assert self.upper >= self.lower, "contrast upper must be >= lower."
|
152 |
-
assert self.lower >= 0, "contrast lower must be non-negative."
|
153 |
-
|
154 |
-
def __call__(self, image, boxes=None, labels=None):
|
155 |
-
if random.randint(2):
|
156 |
-
image[:, :, 1] *= random.uniform(self.lower, self.upper)
|
157 |
-
|
158 |
-
return image, boxes, labels
|
159 |
-
|
160 |
-
|
161 |
-
class RandomHue(object):
|
162 |
-
def __init__(self, delta=18.0):
|
163 |
-
assert delta >= 0.0 and delta <= 360.0
|
164 |
-
self.delta = delta
|
165 |
-
|
166 |
-
def __call__(self, image, boxes=None, labels=None):
|
167 |
-
if random.randint(2):
|
168 |
-
image[:, :, 0] += random.uniform(-self.delta, self.delta)
|
169 |
-
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
|
170 |
-
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
|
171 |
-
return image, boxes, labels
|
172 |
-
|
173 |
-
|
174 |
-
class RandomLightingNoise(object):
|
175 |
-
def __init__(self):
|
176 |
-
self.perms = ((0, 1, 2), (0, 2, 1),
|
177 |
-
(1, 0, 2), (1, 2, 0),
|
178 |
-
(2, 0, 1), (2, 1, 0))
|
179 |
-
|
180 |
-
def __call__(self, image, boxes=None, labels=None):
|
181 |
-
if random.randint(2):
|
182 |
-
swap = self.perms[random.randint(len(self.perms))]
|
183 |
-
shuffle = SwapChannels(swap) # shuffle channels
|
184 |
-
image = shuffle(image)
|
185 |
-
return image, boxes, labels
|
186 |
-
|
187 |
-
|
188 |
-
class ConvertColor(object):
|
189 |
-
def __init__(self, current, transform):
|
190 |
-
self.transform = transform
|
191 |
-
self.current = current
|
192 |
-
|
193 |
-
def __call__(self, image, boxes=None, labels=None):
|
194 |
-
if self.current == 'BGR' and self.transform == 'HSV':
|
195 |
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
196 |
-
elif self.current == 'RGB' and self.transform == 'HSV':
|
197 |
-
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
|
198 |
-
elif self.current == 'BGR' and self.transform == 'RGB':
|
199 |
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
200 |
-
elif self.current == 'HSV' and self.transform == 'BGR':
|
201 |
-
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
202 |
-
elif self.current == 'HSV' and self.transform == "RGB":
|
203 |
-
image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
|
204 |
-
else:
|
205 |
-
raise NotImplementedError
|
206 |
-
return image, boxes, labels
|
207 |
-
|
208 |
-
|
209 |
-
class RandomContrast(object):
|
210 |
-
def __init__(self, lower=0.5, upper=1.5):
|
211 |
-
self.lower = lower
|
212 |
-
self.upper = upper
|
213 |
-
assert self.upper >= self.lower, "contrast upper must be >= lower."
|
214 |
-
assert self.lower >= 0, "contrast lower must be non-negative."
|
215 |
-
|
216 |
-
# expects float image
|
217 |
-
def __call__(self, image, boxes=None, labels=None):
|
218 |
-
if random.randint(2):
|
219 |
-
alpha = random.uniform(self.lower, self.upper)
|
220 |
-
image *= alpha
|
221 |
-
return image, boxes, labels
|
222 |
-
|
223 |
-
|
224 |
-
class RandomBrightness(object):
|
225 |
-
def __init__(self, delta=32):
|
226 |
-
assert delta >= 0.0
|
227 |
-
assert delta <= 255.0
|
228 |
-
self.delta = delta
|
229 |
-
|
230 |
-
def __call__(self, image, boxes=None, labels=None):
|
231 |
-
if random.randint(2):
|
232 |
-
delta = random.uniform(-self.delta, self.delta)
|
233 |
-
image += delta
|
234 |
-
return image, boxes, labels
|
235 |
-
|
236 |
-
|
237 |
-
class ToCV2Image(object):
|
238 |
-
def __call__(self, tensor, boxes=None, labels=None):
|
239 |
-
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
|
240 |
-
|
241 |
-
|
242 |
-
class ToTensor(object):
|
243 |
-
def __call__(self, cvimage, boxes=None, labels=None):
|
244 |
-
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
|
245 |
-
|
246 |
-
|
247 |
-
class RandomSampleCrop(object):
|
248 |
-
"""Crop
|
249 |
-
Arguments:
|
250 |
-
img (Image): the image being input during training
|
251 |
-
boxes (Tensor): the original bounding boxes in pt form
|
252 |
-
labels (Tensor): the class labels for each bbox
|
253 |
-
mode (float tuple): the min and max jaccard overlaps
|
254 |
-
Return:
|
255 |
-
(img, boxes, classes)
|
256 |
-
img (Image): the cropped image
|
257 |
-
boxes (Tensor): the adjusted bounding boxes in pt form
|
258 |
-
labels (Tensor): the class labels for each bbox
|
259 |
-
"""
|
260 |
-
|
261 |
-
def __init__(self):
|
262 |
-
self.sample_options = (
|
263 |
-
# using entire original input image
|
264 |
-
None,
|
265 |
-
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
|
266 |
-
(0.1, None),
|
267 |
-
(0.3, None),
|
268 |
-
(0.7, None),
|
269 |
-
(0.9, None),
|
270 |
-
# randomly sample a patch
|
271 |
-
(None, None),
|
272 |
-
)
|
273 |
-
|
274 |
-
def __call__(self, image, boxes=None, labels=None):
|
275 |
-
height, width, _ = image.shape
|
276 |
-
while True:
|
277 |
-
# randomly choose a mode
|
278 |
-
mode = random.choice(self.sample_options)
|
279 |
-
if mode is None:
|
280 |
-
return image, boxes, labels
|
281 |
-
|
282 |
-
min_iou, max_iou = mode
|
283 |
-
if min_iou is None:
|
284 |
-
min_iou = float('-inf')
|
285 |
-
if max_iou is None:
|
286 |
-
max_iou = float('inf')
|
287 |
-
|
288 |
-
# max trails (50)
|
289 |
-
for _ in range(50):
|
290 |
-
current_image = image
|
291 |
-
|
292 |
-
w = random.uniform(0.3 * width, width)
|
293 |
-
h = random.uniform(0.3 * height, height)
|
294 |
-
|
295 |
-
# aspect ratio constraint b/t .5 & 2
|
296 |
-
if h / w < 0.5 or h / w > 2:
|
297 |
-
continue
|
298 |
-
|
299 |
-
left = random.uniform(width - w)
|
300 |
-
top = random.uniform(height - h)
|
301 |
-
|
302 |
-
# convert to integer rect x1,y1,x2,y2
|
303 |
-
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
|
304 |
-
|
305 |
-
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
|
306 |
-
overlap = jaccard_numpy(boxes, rect)
|
307 |
-
|
308 |
-
# is min and max overlap constraint satisfied? if not try again
|
309 |
-
if overlap.max() < min_iou or overlap.min() > max_iou:
|
310 |
-
continue
|
311 |
-
|
312 |
-
# cut the crop from the image
|
313 |
-
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
|
314 |
-
:]
|
315 |
-
|
316 |
-
# keep overlap with gt box IF center in sampled patch
|
317 |
-
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
|
318 |
-
|
319 |
-
# mask in all gt boxes that above and to the left of centers
|
320 |
-
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
|
321 |
-
|
322 |
-
# mask in all gt boxes that under and to the right of centers
|
323 |
-
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
|
324 |
-
|
325 |
-
# mask in that both m1 and m2 are true
|
326 |
-
mask = m1 * m2
|
327 |
-
|
328 |
-
# have any valid boxes? try again if not
|
329 |
-
if not mask.any():
|
330 |
-
continue
|
331 |
-
|
332 |
-
# take only matching gt boxes
|
333 |
-
current_boxes = boxes[mask, :].copy()
|
334 |
-
|
335 |
-
# take only matching gt labels
|
336 |
-
current_labels = labels[mask]
|
337 |
-
|
338 |
-
# should we use the box left and top corner or the crop's
|
339 |
-
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
|
340 |
-
rect[:2])
|
341 |
-
# adjust to crop (by substracting crop's left,top)
|
342 |
-
current_boxes[:, :2] -= rect[:2]
|
343 |
-
|
344 |
-
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
|
345 |
-
rect[2:])
|
346 |
-
# adjust to crop (by substracting crop's left,top)
|
347 |
-
current_boxes[:, 2:] -= rect[:2]
|
348 |
-
|
349 |
-
return current_image, current_boxes, current_labels
|
350 |
-
|
351 |
-
|
352 |
-
class RandomSampleCrop_v2(object):
|
353 |
-
"""Crop
|
354 |
-
Arguments:
|
355 |
-
img (Image): the image being input during training
|
356 |
-
boxes (Tensor): the original bounding boxes in pt form
|
357 |
-
labels (Tensor): the class labels for each bbox
|
358 |
-
mode (float tuple): the min and max jaccard overlaps
|
359 |
-
Return:
|
360 |
-
(img, boxes, classes)
|
361 |
-
img (Image): the cropped image
|
362 |
-
boxes (Tensor): the adjusted bounding boxes in pt form
|
363 |
-
labels (Tensor): the class labels for each bbox
|
364 |
-
"""
|
365 |
-
|
366 |
-
def __init__(self):
|
367 |
-
self.sample_options = (
|
368 |
-
# using entire original input image
|
369 |
-
None,
|
370 |
-
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
|
371 |
-
|
372 |
-
# randomly sample a patch
|
373 |
-
(1, None),
|
374 |
-
(1, None),
|
375 |
-
(1, None),
|
376 |
-
(1, None),
|
377 |
-
)
|
378 |
-
|
379 |
-
def __call__(self, image, boxes=None, labels=None):
|
380 |
-
height, width, _ = image.shape
|
381 |
-
while True:
|
382 |
-
# randomly choose a mode
|
383 |
-
mode = random.choice(self.sample_options)
|
384 |
-
if mode is None:
|
385 |
-
return image, boxes, labels
|
386 |
-
|
387 |
-
min_iou, max_iou = mode
|
388 |
-
if min_iou is None:
|
389 |
-
min_iou = float('-inf')
|
390 |
-
if max_iou is None:
|
391 |
-
max_iou = float('inf')
|
392 |
-
|
393 |
-
# max trails (50)
|
394 |
-
for _ in range(50):
|
395 |
-
current_image = image
|
396 |
-
|
397 |
-
w = random.uniform(0.3 * width, width)
|
398 |
-
h = random.uniform(0.3 * height, height)
|
399 |
-
|
400 |
-
# aspect ratio constraint b/t .5 & 2
|
401 |
-
if h / w != 1:
|
402 |
-
continue
|
403 |
-
left = random.uniform(width - w)
|
404 |
-
top = random.uniform(height - h)
|
405 |
-
|
406 |
-
# convert to integer rect x1,y1,x2,y2
|
407 |
-
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
|
408 |
-
|
409 |
-
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
|
410 |
-
overlap = object_converage_numpy(boxes, rect)
|
411 |
-
|
412 |
-
# is min and max overlap constraint satisfied? if not try again
|
413 |
-
if overlap.max() < min_iou or overlap.min() > max_iou:
|
414 |
-
continue
|
415 |
-
|
416 |
-
# cut the crop from the image
|
417 |
-
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
|
418 |
-
:]
|
419 |
-
|
420 |
-
# keep overlap with gt box IF center in sampled patch
|
421 |
-
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
|
422 |
-
|
423 |
-
# mask in all gt boxes that above and to the left of centers
|
424 |
-
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
|
425 |
-
|
426 |
-
# mask in all gt boxes that under and to the right of centers
|
427 |
-
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
|
428 |
-
|
429 |
-
# mask in that both m1 and m2 are true
|
430 |
-
mask = m1 * m2
|
431 |
-
|
432 |
-
# have any valid boxes? try again if not
|
433 |
-
if not mask.any():
|
434 |
-
continue
|
435 |
-
|
436 |
-
# take only matching gt boxes
|
437 |
-
current_boxes = boxes[mask, :].copy()
|
438 |
-
|
439 |
-
# take only matching gt labels
|
440 |
-
current_labels = labels[mask]
|
441 |
-
|
442 |
-
# should we use the box left and top corner or the crop's
|
443 |
-
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
|
444 |
-
rect[:2])
|
445 |
-
# adjust to crop (by substracting crop's left,top)
|
446 |
-
current_boxes[:, :2] -= rect[:2]
|
447 |
-
|
448 |
-
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
|
449 |
-
rect[2:])
|
450 |
-
# adjust to crop (by substracting crop's left,top)
|
451 |
-
current_boxes[:, 2:] -= rect[:2]
|
452 |
-
|
453 |
-
return current_image, current_boxes, current_labels
|
454 |
-
|
455 |
-
|
456 |
-
class Expand(object):
|
457 |
-
def __init__(self, mean):
|
458 |
-
self.mean = mean
|
459 |
-
|
460 |
-
def __call__(self, image, boxes, labels):
|
461 |
-
if random.randint(2):
|
462 |
-
return image, boxes, labels
|
463 |
-
|
464 |
-
height, width, depth = image.shape
|
465 |
-
ratio = random.uniform(1, 4)
|
466 |
-
left = random.uniform(0, width * ratio - width)
|
467 |
-
top = random.uniform(0, height * ratio - height)
|
468 |
-
|
469 |
-
expand_image = np.zeros(
|
470 |
-
(int(height * ratio), int(width * ratio), depth),
|
471 |
-
dtype=image.dtype)
|
472 |
-
expand_image[:, :, :] = self.mean
|
473 |
-
expand_image[int(top):int(top + height),
|
474 |
-
int(left):int(left + width)] = image
|
475 |
-
image = expand_image
|
476 |
-
|
477 |
-
boxes = boxes.copy()
|
478 |
-
boxes[:, :2] += (int(left), int(top))
|
479 |
-
boxes[:, 2:] += (int(left), int(top))
|
480 |
-
|
481 |
-
return image, boxes, labels
|
482 |
-
|
483 |
-
|
484 |
-
class RandomMirror(object):
|
485 |
-
def __call__(self, image, boxes, classes):
|
486 |
-
_, width, _ = image.shape
|
487 |
-
if random.randint(2):
|
488 |
-
image = image[:, ::-1]
|
489 |
-
boxes = boxes.copy()
|
490 |
-
boxes[:, 0::2] = width - boxes[:, 2::-2]
|
491 |
-
return image, boxes, classes
|
492 |
-
|
493 |
-
|
494 |
-
class SwapChannels(object):
|
495 |
-
"""Transforms a tensorized image by swapping the channels in the order
|
496 |
-
specified in the swap tuple.
|
497 |
-
Args:
|
498 |
-
swaps (int triple): final order of channels
|
499 |
-
eg: (2, 1, 0)
|
500 |
-
"""
|
501 |
-
|
502 |
-
def __init__(self, swaps):
|
503 |
-
self.swaps = swaps
|
504 |
-
|
505 |
-
def __call__(self, image):
|
506 |
-
"""
|
507 |
-
Args:
|
508 |
-
image (Tensor): image tensor to be transformed
|
509 |
-
Return:
|
510 |
-
a tensor with channels swapped according to swap
|
511 |
-
"""
|
512 |
-
# if torch.is_tensor(image):
|
513 |
-
# image = image.data.cpu().numpy()
|
514 |
-
# else:
|
515 |
-
# image = np.array(image)
|
516 |
-
image = image[:, :, self.swaps]
|
517 |
-
return image
|
518 |
-
|
519 |
-
|
520 |
-
class PhotometricDistort(object):
|
521 |
-
def __init__(self):
|
522 |
-
self.pd = [
|
523 |
-
RandomContrast(), # RGB
|
524 |
-
ConvertColor(current="RGB", transform='HSV'), # HSV
|
525 |
-
RandomSaturation(), # HSV
|
526 |
-
RandomHue(), # HSV
|
527 |
-
ConvertColor(current='HSV', transform='RGB'), # RGB
|
528 |
-
RandomContrast() # RGB
|
529 |
-
]
|
530 |
-
self.rand_brightness = RandomBrightness()
|
531 |
-
self.rand_light_noise = RandomLightingNoise()
|
532 |
-
|
533 |
-
def __call__(self, image, boxes, labels):
|
534 |
-
im = image.copy()
|
535 |
-
im, boxes, labels = self.rand_brightness(im, boxes, labels)
|
536 |
-
if random.randint(2):
|
537 |
-
distort = Compose(self.pd[:-1])
|
538 |
-
else:
|
539 |
-
distort = Compose(self.pd[1:])
|
540 |
-
im, boxes, labels = distort(im, boxes, labels)
|
541 |
-
return self.rand_light_noise(im, boxes, labels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/utils/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
from .misc import *
|
|
|
|
face_recognition/face_detect/vision/utils/box_utils.py
DELETED
@@ -1,241 +0,0 @@
|
|
1 |
-
import math
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
|
6 |
-
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes, clamp=True) -> torch.Tensor:
|
7 |
-
priors = []
|
8 |
-
for index in range(0, len(feature_map_list[0])):
|
9 |
-
scale_w = image_size[0] / shrinkage_list[0][index]
|
10 |
-
scale_h = image_size[1] / shrinkage_list[1][index]
|
11 |
-
for j in range(0, feature_map_list[1][index]):
|
12 |
-
for i in range(0, feature_map_list[0][index]):
|
13 |
-
x_center = (i + 0.5) / scale_w
|
14 |
-
y_center = (j + 0.5) / scale_h
|
15 |
-
|
16 |
-
for min_box in min_boxes[index]:
|
17 |
-
w = min_box / image_size[0]
|
18 |
-
h = min_box / image_size[1]
|
19 |
-
priors.append([
|
20 |
-
x_center,
|
21 |
-
y_center,
|
22 |
-
w,
|
23 |
-
h
|
24 |
-
])
|
25 |
-
print("priors nums:{}".format(len(priors)))
|
26 |
-
priors = torch.tensor(priors)
|
27 |
-
if clamp:
|
28 |
-
torch.clamp(priors, 0.0, 1.0, out=priors)
|
29 |
-
return priors
|
30 |
-
|
31 |
-
|
32 |
-
def convert_locations_to_boxes(locations, priors, center_variance,
|
33 |
-
size_variance):
|
34 |
-
"""Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
|
35 |
-
|
36 |
-
The conversion:
|
37 |
-
$$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
|
38 |
-
$$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
|
39 |
-
We do it in the inverse direction here.
|
40 |
-
Args:
|
41 |
-
locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
|
42 |
-
priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
|
43 |
-
center_variance: a float used to change the scale of center.
|
44 |
-
size_variance: a float used to change of scale of size.
|
45 |
-
Returns:
|
46 |
-
boxes: priors: [[center_x, center_y, h, w]]. All the values
|
47 |
-
are relative to the image size.
|
48 |
-
"""
|
49 |
-
# priors can have one dimension less.
|
50 |
-
if priors.dim() + 1 == locations.dim():
|
51 |
-
priors = priors.unsqueeze(0)
|
52 |
-
return torch.cat([
|
53 |
-
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
|
54 |
-
torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
|
55 |
-
], dim=locations.dim() - 1)
|
56 |
-
|
57 |
-
|
58 |
-
def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
|
59 |
-
# priors can have one dimension less
|
60 |
-
if center_form_priors.dim() + 1 == center_form_boxes.dim():
|
61 |
-
center_form_priors = center_form_priors.unsqueeze(0)
|
62 |
-
return torch.cat([
|
63 |
-
(center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
|
64 |
-
torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
|
65 |
-
], dim=center_form_boxes.dim() - 1)
|
66 |
-
|
67 |
-
|
68 |
-
def area_of(left_top, right_bottom) -> torch.Tensor:
|
69 |
-
"""Compute the areas of rectangles given two corners.
|
70 |
-
|
71 |
-
Args:
|
72 |
-
left_top (N, 2): left top corner.
|
73 |
-
right_bottom (N, 2): right bottom corner.
|
74 |
-
|
75 |
-
Returns:
|
76 |
-
area (N): return the area.
|
77 |
-
"""
|
78 |
-
hw = torch.clamp(right_bottom - left_top, min=0.0)
|
79 |
-
return hw[..., 0] * hw[..., 1]
|
80 |
-
|
81 |
-
|
82 |
-
def iou_of(boxes0, boxes1, eps=1e-5):
|
83 |
-
"""Return intersection-over-union (Jaccard index) of boxes.
|
84 |
-
|
85 |
-
Args:
|
86 |
-
boxes0 (N, 4): ground truth boxes.
|
87 |
-
boxes1 (N or 1, 4): predicted boxes.
|
88 |
-
eps: a small number to avoid 0 as denominator.
|
89 |
-
Returns:
|
90 |
-
iou (N): IoU values.
|
91 |
-
"""
|
92 |
-
overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
|
93 |
-
overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])
|
94 |
-
|
95 |
-
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
|
96 |
-
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
|
97 |
-
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
|
98 |
-
return overlap_area / (area0 + area1 - overlap_area + eps)
|
99 |
-
|
100 |
-
|
101 |
-
def assign_priors(gt_boxes, gt_labels, corner_form_priors,
|
102 |
-
iou_threshold):
|
103 |
-
"""Assign ground truth boxes and targets to priors.
|
104 |
-
|
105 |
-
Args:
|
106 |
-
gt_boxes (num_targets, 4): ground truth boxes.
|
107 |
-
gt_labels (num_targets): labels of targets.
|
108 |
-
priors (num_priors, 4): corner form priors
|
109 |
-
Returns:
|
110 |
-
boxes (num_priors, 4): real values for priors.
|
111 |
-
labels (num_priros): labels for priors.
|
112 |
-
"""
|
113 |
-
# size: num_priors x num_targets
|
114 |
-
ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))
|
115 |
-
# size: num_priors
|
116 |
-
best_target_per_prior, best_target_per_prior_index = ious.max(1)
|
117 |
-
# size: num_targets
|
118 |
-
best_prior_per_target, best_prior_per_target_index = ious.max(0)
|
119 |
-
|
120 |
-
for target_index, prior_index in enumerate(best_prior_per_target_index):
|
121 |
-
best_target_per_prior_index[prior_index] = target_index
|
122 |
-
# 2.0 is used to make sure every target has a prior assigned
|
123 |
-
best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)
|
124 |
-
# size: num_priors
|
125 |
-
labels = gt_labels[best_target_per_prior_index]
|
126 |
-
labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id
|
127 |
-
boxes = gt_boxes[best_target_per_prior_index]
|
128 |
-
return boxes, labels
|
129 |
-
|
130 |
-
|
131 |
-
def hard_negative_mining(loss, labels, neg_pos_ratio):
|
132 |
-
"""
|
133 |
-
It used to suppress the presence of a large number of negative prediction.
|
134 |
-
It works on image level not batch level.
|
135 |
-
For any example/image, it keeps all the positive predictions and
|
136 |
-
cut the number of negative predictions to make sure the ratio
|
137 |
-
between the negative examples and positive examples is no more
|
138 |
-
the given ratio for an image.
|
139 |
-
|
140 |
-
Args:
|
141 |
-
loss (N, num_priors): the loss for each example.
|
142 |
-
labels (N, num_priors): the labels.
|
143 |
-
neg_pos_ratio: the ratio between the negative examples and positive examples.
|
144 |
-
"""
|
145 |
-
pos_mask = labels > 0
|
146 |
-
num_pos = pos_mask.long().sum(dim=1, keepdim=True)
|
147 |
-
num_neg = num_pos * neg_pos_ratio
|
148 |
-
|
149 |
-
loss[pos_mask] = -math.inf
|
150 |
-
_, indexes = loss.sort(dim=1, descending=True)
|
151 |
-
_, orders = indexes.sort(dim=1)
|
152 |
-
neg_mask = orders < num_neg
|
153 |
-
return pos_mask | neg_mask
|
154 |
-
|
155 |
-
|
156 |
-
def center_form_to_corner_form(locations):
|
157 |
-
return torch.cat([locations[..., :2] - locations[..., 2:] / 2,
|
158 |
-
locations[..., :2] + locations[..., 2:] / 2], locations.dim() - 1)
|
159 |
-
|
160 |
-
|
161 |
-
def corner_form_to_center_form(boxes):
|
162 |
-
return torch.cat([
|
163 |
-
(boxes[..., :2] + boxes[..., 2:]) / 2,
|
164 |
-
boxes[..., 2:] - boxes[..., :2]
|
165 |
-
], boxes.dim() - 1)
|
166 |
-
|
167 |
-
|
168 |
-
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
|
169 |
-
"""
|
170 |
-
|
171 |
-
Args:
|
172 |
-
box_scores (N, 5): boxes in corner-form and probabilities.
|
173 |
-
iou_threshold: intersection over union threshold.
|
174 |
-
top_k: keep top_k results. If k <= 0, keep all the results.
|
175 |
-
candidate_size: only consider the candidates with the highest scores.
|
176 |
-
Returns:
|
177 |
-
picked: a list of indexes of the kept boxes
|
178 |
-
"""
|
179 |
-
scores = box_scores[:, -1]
|
180 |
-
boxes = box_scores[:, :-1]
|
181 |
-
picked = []
|
182 |
-
_, indexes = scores.sort(descending=True)
|
183 |
-
indexes = indexes[:candidate_size]
|
184 |
-
while len(indexes) > 0:
|
185 |
-
current = indexes[0]
|
186 |
-
picked.append(current.item())
|
187 |
-
if 0 < top_k == len(picked) or len(indexes) == 1:
|
188 |
-
break
|
189 |
-
current_box = boxes[current, :]
|
190 |
-
indexes = indexes[1:]
|
191 |
-
rest_boxes = boxes[indexes, :]
|
192 |
-
iou = iou_of(
|
193 |
-
rest_boxes,
|
194 |
-
current_box.unsqueeze(0),
|
195 |
-
)
|
196 |
-
indexes = indexes[iou <= iou_threshold]
|
197 |
-
|
198 |
-
return box_scores[picked, :]
|
199 |
-
|
200 |
-
|
201 |
-
def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None,
|
202 |
-
sigma=0.5, top_k=-1, candidate_size=200):
|
203 |
-
if nms_method == "soft":
|
204 |
-
return soft_nms(box_scores, score_threshold, sigma, top_k)
|
205 |
-
else:
|
206 |
-
return hard_nms(box_scores, iou_threshold, top_k, candidate_size=candidate_size)
|
207 |
-
|
208 |
-
|
209 |
-
def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1):
|
210 |
-
"""Soft NMS implementation.
|
211 |
-
|
212 |
-
References:
|
213 |
-
https://arxiv.org/abs/1704.04503
|
214 |
-
https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/cython_nms.pyx
|
215 |
-
|
216 |
-
Args:
|
217 |
-
box_scores (N, 5): boxes in corner-form and probabilities.
|
218 |
-
score_threshold: boxes with scores less than value are not considered.
|
219 |
-
sigma: the parameter in score re-computation.
|
220 |
-
scores[i] = scores[i] * exp(-(iou_i)^2 / simga)
|
221 |
-
top_k: keep top_k results. If k <= 0, keep all the results.
|
222 |
-
Returns:
|
223 |
-
picked_box_scores (K, 5): results of NMS.
|
224 |
-
"""
|
225 |
-
picked_box_scores = []
|
226 |
-
while box_scores.size(0) > 0:
|
227 |
-
max_score_index = torch.argmax(box_scores[:, 4])
|
228 |
-
cur_box_prob = torch.tensor(box_scores[max_score_index, :])
|
229 |
-
picked_box_scores.append(cur_box_prob)
|
230 |
-
if len(picked_box_scores) == top_k > 0 or box_scores.size(0) == 1:
|
231 |
-
break
|
232 |
-
cur_box = cur_box_prob[:-1]
|
233 |
-
box_scores[max_score_index, :] = box_scores[-1, :]
|
234 |
-
box_scores = box_scores[:-1, :]
|
235 |
-
ious = iou_of(cur_box.unsqueeze(0), box_scores[:, :-1])
|
236 |
-
box_scores[:, -1] = box_scores[:, -1] * torch.exp(-(ious * ious) / sigma)
|
237 |
-
box_scores = box_scores[box_scores[:, -1] > score_threshold, :]
|
238 |
-
if len(picked_box_scores) > 0:
|
239 |
-
return torch.stack(picked_box_scores)
|
240 |
-
else:
|
241 |
-
return torch.tensor([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/utils/box_utils_numpy.py
DELETED
@@ -1,119 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
|
3 |
-
|
4 |
-
def convert_locations_to_boxes(locations, priors, center_variance,
|
5 |
-
size_variance):
|
6 |
-
"""Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
|
7 |
-
|
8 |
-
The conversion:
|
9 |
-
$$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
|
10 |
-
$$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
|
11 |
-
We do it in the inverse direction here.
|
12 |
-
Args:
|
13 |
-
locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
|
14 |
-
priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
|
15 |
-
center_variance: a float used to change the scale of center.
|
16 |
-
size_variance: a float used to change of scale of size.
|
17 |
-
Returns:
|
18 |
-
boxes: priors: [[center_x, center_y, h, w]]. All the values
|
19 |
-
are relative to the image size.
|
20 |
-
"""
|
21 |
-
# priors can have one dimension less.
|
22 |
-
if len(priors.shape) + 1 == len(locations.shape):
|
23 |
-
priors = np.expand_dims(priors, 0)
|
24 |
-
return np.concatenate([
|
25 |
-
locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
|
26 |
-
np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
|
27 |
-
], axis=len(locations.shape) - 1)
|
28 |
-
|
29 |
-
|
30 |
-
def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
|
31 |
-
# priors can have one dimension less
|
32 |
-
if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape):
|
33 |
-
center_form_priors = np.expand_dims(center_form_priors, 0)
|
34 |
-
return np.concatenate([
|
35 |
-
(center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
|
36 |
-
np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
|
37 |
-
], axis=len(center_form_boxes.shape) - 1)
|
38 |
-
|
39 |
-
|
40 |
-
def area_of(left_top, right_bottom):
|
41 |
-
"""Compute the areas of rectangles given two corners.
|
42 |
-
|
43 |
-
Args:
|
44 |
-
left_top (N, 2): left top corner.
|
45 |
-
right_bottom (N, 2): right bottom corner.
|
46 |
-
|
47 |
-
Returns:
|
48 |
-
area (N): return the area.
|
49 |
-
"""
|
50 |
-
hw = np.clip(right_bottom - left_top, 0.0, None)
|
51 |
-
return hw[..., 0] * hw[..., 1]
|
52 |
-
|
53 |
-
|
54 |
-
def iou_of(boxes0, boxes1, eps=1e-5):
|
55 |
-
"""Return intersection-over-union (Jaccard index) of boxes.
|
56 |
-
|
57 |
-
Args:
|
58 |
-
boxes0 (N, 4): ground truth boxes.
|
59 |
-
boxes1 (N or 1, 4): predicted boxes.
|
60 |
-
eps: a small number to avoid 0 as denominator.
|
61 |
-
Returns:
|
62 |
-
iou (N): IoU values.
|
63 |
-
"""
|
64 |
-
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
|
65 |
-
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
|
66 |
-
|
67 |
-
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
|
68 |
-
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
|
69 |
-
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
|
70 |
-
return overlap_area / (area0 + area1 - overlap_area + eps)
|
71 |
-
|
72 |
-
|
73 |
-
def center_form_to_corner_form(locations):
|
74 |
-
return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
|
75 |
-
locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
|
76 |
-
|
77 |
-
|
78 |
-
def corner_form_to_center_form(boxes):
|
79 |
-
return np.concatenate([
|
80 |
-
(boxes[..., :2] + boxes[..., 2:]) / 2,
|
81 |
-
boxes[..., 2:] - boxes[..., :2]
|
82 |
-
], len(boxes.shape) - 1)
|
83 |
-
|
84 |
-
|
85 |
-
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
|
86 |
-
"""
|
87 |
-
|
88 |
-
Args:
|
89 |
-
box_scores (N, 5): boxes in corner-form and probabilities.
|
90 |
-
iou_threshold: intersection over union threshold.
|
91 |
-
top_k: keep top_k results. If k <= 0, keep all the results.
|
92 |
-
candidate_size: only consider the candidates with the highest scores.
|
93 |
-
Returns:
|
94 |
-
picked: a list of indexes of the kept boxes
|
95 |
-
"""
|
96 |
-
scores = box_scores[:, -1]
|
97 |
-
boxes = box_scores[:, :-1]
|
98 |
-
picked = []
|
99 |
-
# _, indexes = scores.sort(descending=True)
|
100 |
-
indexes = np.argsort(scores)
|
101 |
-
# indexes = indexes[:candidate_size]
|
102 |
-
indexes = indexes[-candidate_size:]
|
103 |
-
while len(indexes) > 0:
|
104 |
-
# current = indexes[0]
|
105 |
-
current = indexes[-1]
|
106 |
-
picked.append(current)
|
107 |
-
if 0 < top_k == len(picked) or len(indexes) == 1:
|
108 |
-
break
|
109 |
-
current_box = boxes[current, :]
|
110 |
-
# indexes = indexes[1:]
|
111 |
-
indexes = indexes[:-1]
|
112 |
-
rest_boxes = boxes[indexes, :]
|
113 |
-
iou = iou_of(
|
114 |
-
rest_boxes,
|
115 |
-
np.expand_dims(current_box, axis=0),
|
116 |
-
)
|
117 |
-
indexes = indexes[iou <= iou_threshold]
|
118 |
-
|
119 |
-
return box_scores[picked, :]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/vision/utils/misc.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
import datetime
|
2 |
-
|
3 |
-
import torch
|
4 |
-
|
5 |
-
|
6 |
-
def str2bool(s):
|
7 |
-
return s.lower() in ('true', '1')
|
8 |
-
|
9 |
-
|
10 |
-
class Timer:
|
11 |
-
def __init__(self):
|
12 |
-
self.clock = {}
|
13 |
-
|
14 |
-
def start(self, key="default"):
|
15 |
-
self.clock[key] = datetime.datetime.now()
|
16 |
-
|
17 |
-
def end(self, key="default"):
|
18 |
-
if key not in self.clock:
|
19 |
-
raise Exception(f"{key} is not in the clock.")
|
20 |
-
interval = datetime.datetime.now() - self.clock[key]
|
21 |
-
del self.clock[key]
|
22 |
-
return interval.total_seconds()
|
23 |
-
|
24 |
-
|
25 |
-
def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path):
|
26 |
-
torch.save({
|
27 |
-
'epoch': epoch,
|
28 |
-
'model': net_state_dict,
|
29 |
-
'optimizer': optimizer_state_dict,
|
30 |
-
'best_score': best_score
|
31 |
-
}, checkpoint_path)
|
32 |
-
torch.save(net_state_dict, model_path)
|
33 |
-
|
34 |
-
|
35 |
-
def load_checkpoint(checkpoint_path):
|
36 |
-
return torch.load(checkpoint_path)
|
37 |
-
|
38 |
-
|
39 |
-
def freeze_net_layers(net):
|
40 |
-
for param in net.parameters():
|
41 |
-
param.requires_grad = False
|
42 |
-
|
43 |
-
|
44 |
-
def store_labels(path, labels):
|
45 |
-
with open(path, "w") as f:
|
46 |
-
f.write("\n".join(labels))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/widerface_evaluate/box_overlaps.pyx
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
# --------------------------------------------------------
|
2 |
-
# Fast R-CNN
|
3 |
-
# Copyright (c) 2015 Microsoft
|
4 |
-
# Licensed under The MIT License [see LICENSE for details]
|
5 |
-
# Written by Sergey Karayev
|
6 |
-
# --------------------------------------------------------
|
7 |
-
|
8 |
-
cimport cython
|
9 |
-
import numpy as np
|
10 |
-
cimport numpy as np
|
11 |
-
|
12 |
-
DTYPE = np.float
|
13 |
-
ctypedef np.float_t DTYPE_t
|
14 |
-
|
15 |
-
def bbox_overlaps(
|
16 |
-
np.ndarray[DTYPE_t, ndim=2] boxes,
|
17 |
-
np.ndarray[DTYPE_t, ndim=2] query_boxes):
|
18 |
-
"""
|
19 |
-
Parameters
|
20 |
-
----------
|
21 |
-
boxes: (N, 4) ndarray of float
|
22 |
-
query_boxes: (K, 4) ndarray of float
|
23 |
-
Returns
|
24 |
-
-------
|
25 |
-
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
|
26 |
-
"""
|
27 |
-
cdef unsigned int N = boxes.shape[0]
|
28 |
-
cdef unsigned int K = query_boxes.shape[0]
|
29 |
-
cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
|
30 |
-
cdef DTYPE_t iw, ih, box_area
|
31 |
-
cdef DTYPE_t ua
|
32 |
-
cdef unsigned int k, n
|
33 |
-
for k in range(K):
|
34 |
-
box_area = (
|
35 |
-
(query_boxes[k, 2] - query_boxes[k, 0] + 1) *
|
36 |
-
(query_boxes[k, 3] - query_boxes[k, 1] + 1)
|
37 |
-
)
|
38 |
-
for n in range(N):
|
39 |
-
iw = (
|
40 |
-
min(boxes[n, 2], query_boxes[k, 2]) -
|
41 |
-
max(boxes[n, 0], query_boxes[k, 0]) + 1
|
42 |
-
)
|
43 |
-
if iw > 0:
|
44 |
-
ih = (
|
45 |
-
min(boxes[n, 3], query_boxes[k, 3]) -
|
46 |
-
max(boxes[n, 1], query_boxes[k, 1]) + 1
|
47 |
-
)
|
48 |
-
if ih > 0:
|
49 |
-
ua = float(
|
50 |
-
(boxes[n, 2] - boxes[n, 0] + 1) *
|
51 |
-
(boxes[n, 3] - boxes[n, 1] + 1) +
|
52 |
-
box_area - iw * ih
|
53 |
-
)
|
54 |
-
overlaps[n, k] = iw * ih / ua
|
55 |
-
return overlaps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/widerface_evaluate/evaluation.py
DELETED
@@ -1,302 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
WiderFace evaluation code
|
3 |
-
author: wondervictor
|
4 |
-
mail: [email protected]
|
5 |
-
copyright@wondervictor
|
6 |
-
"""
|
7 |
-
|
8 |
-
import os
|
9 |
-
import tqdm
|
10 |
-
import pickle
|
11 |
-
import argparse
|
12 |
-
import numpy as np
|
13 |
-
from scipy.io import loadmat
|
14 |
-
from bbox import bbox_overlaps
|
15 |
-
|
16 |
-
|
17 |
-
def get_gt_boxes(gt_dir):
|
18 |
-
""" gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)"""
|
19 |
-
|
20 |
-
gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat'))
|
21 |
-
hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat'))
|
22 |
-
medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat'))
|
23 |
-
easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat'))
|
24 |
-
|
25 |
-
facebox_list = gt_mat['face_bbx_list']
|
26 |
-
event_list = gt_mat['event_list']
|
27 |
-
file_list = gt_mat['file_list']
|
28 |
-
|
29 |
-
hard_gt_list = hard_mat['gt_list']
|
30 |
-
medium_gt_list = medium_mat['gt_list']
|
31 |
-
easy_gt_list = easy_mat['gt_list']
|
32 |
-
|
33 |
-
return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list
|
34 |
-
|
35 |
-
|
36 |
-
def get_gt_boxes_from_txt(gt_path, cache_dir):
|
37 |
-
|
38 |
-
cache_file = os.path.join(cache_dir, 'gt_cache.pkl')
|
39 |
-
if os.path.exists(cache_file):
|
40 |
-
f = open(cache_file, 'rb')
|
41 |
-
boxes = pickle.load(f)
|
42 |
-
f.close()
|
43 |
-
return boxes
|
44 |
-
|
45 |
-
f = open(gt_path, 'r')
|
46 |
-
state = 0
|
47 |
-
lines = f.readlines()
|
48 |
-
lines = list(map(lambda x: x.rstrip('\r\n'), lines))
|
49 |
-
boxes = {}
|
50 |
-
print(len(lines))
|
51 |
-
f.close()
|
52 |
-
current_boxes = []
|
53 |
-
current_name = None
|
54 |
-
for line in lines:
|
55 |
-
if state == 0 and '--' in line:
|
56 |
-
state = 1
|
57 |
-
current_name = line
|
58 |
-
continue
|
59 |
-
if state == 1:
|
60 |
-
state = 2
|
61 |
-
continue
|
62 |
-
|
63 |
-
if state == 2 and '--' in line:
|
64 |
-
state = 1
|
65 |
-
boxes[current_name] = np.array(current_boxes).astype('float32')
|
66 |
-
current_name = line
|
67 |
-
current_boxes = []
|
68 |
-
continue
|
69 |
-
|
70 |
-
if state == 2:
|
71 |
-
box = [float(x) for x in line.split(' ')[:4]]
|
72 |
-
current_boxes.append(box)
|
73 |
-
continue
|
74 |
-
|
75 |
-
f = open(cache_file, 'wb')
|
76 |
-
pickle.dump(boxes, f)
|
77 |
-
f.close()
|
78 |
-
return boxes
|
79 |
-
|
80 |
-
|
81 |
-
def read_pred_file(filepath):
|
82 |
-
|
83 |
-
with open(filepath, 'r') as f:
|
84 |
-
lines = f.readlines()
|
85 |
-
img_file = lines[0].rstrip('\n\r')
|
86 |
-
lines = lines[2:]
|
87 |
-
|
88 |
-
# b = lines[0].rstrip('\r\n').split(' ')[:-1]
|
89 |
-
# c = float(b)
|
90 |
-
# a = map(lambda x: [[float(a[0]), float(a[1]), float(a[2]), float(a[3]), float(a[4])] for a in x.rstrip('\r\n').split(' ')], lines)
|
91 |
-
boxes = []
|
92 |
-
for line in lines:
|
93 |
-
line = line.rstrip('\r\n').split(' ')
|
94 |
-
if line[0] == '':
|
95 |
-
continue
|
96 |
-
# a = float(line[4])
|
97 |
-
boxes.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])])
|
98 |
-
boxes = np.array(boxes)
|
99 |
-
# boxes = np.array(list(map(lambda x: [float(a) for a in x.rstrip('\r\n').split(' ')], lines))).astype('float')
|
100 |
-
return img_file.split('/')[-1], boxes
|
101 |
-
|
102 |
-
|
103 |
-
def get_preds(pred_dir):
|
104 |
-
events = os.listdir(pred_dir)
|
105 |
-
boxes = dict()
|
106 |
-
pbar = tqdm.tqdm(events)
|
107 |
-
|
108 |
-
for event in pbar:
|
109 |
-
pbar.set_description('Reading Predictions ')
|
110 |
-
event_dir = os.path.join(pred_dir, event)
|
111 |
-
event_images = os.listdir(event_dir)
|
112 |
-
current_event = dict()
|
113 |
-
for imgtxt in event_images:
|
114 |
-
imgname, _boxes = read_pred_file(os.path.join(event_dir, imgtxt))
|
115 |
-
current_event[imgname.rstrip('.jpg')] = _boxes
|
116 |
-
boxes[event] = current_event
|
117 |
-
return boxes
|
118 |
-
|
119 |
-
|
120 |
-
def norm_score(pred):
|
121 |
-
""" norm score
|
122 |
-
pred {key: [[x1,y1,x2,y2,s]]}
|
123 |
-
"""
|
124 |
-
|
125 |
-
max_score = 0
|
126 |
-
min_score = 1
|
127 |
-
|
128 |
-
for _, k in pred.items():
|
129 |
-
for _, v in k.items():
|
130 |
-
if len(v) == 0:
|
131 |
-
continue
|
132 |
-
_min = np.min(v[:, -1])
|
133 |
-
_max = np.max(v[:, -1])
|
134 |
-
max_score = max(_max, max_score)
|
135 |
-
min_score = min(_min, min_score)
|
136 |
-
|
137 |
-
diff = max_score - min_score
|
138 |
-
for _, k in pred.items():
|
139 |
-
for _, v in k.items():
|
140 |
-
if len(v) == 0:
|
141 |
-
continue
|
142 |
-
v[:, -1] = (v[:, -1] - min_score)/diff
|
143 |
-
|
144 |
-
|
145 |
-
def image_eval(pred, gt, ignore, iou_thresh):
|
146 |
-
""" single image evaluation
|
147 |
-
pred: Nx5
|
148 |
-
gt: Nx4
|
149 |
-
ignore:
|
150 |
-
"""
|
151 |
-
|
152 |
-
_pred = pred.copy()
|
153 |
-
_gt = gt.copy()
|
154 |
-
pred_recall = np.zeros(_pred.shape[0])
|
155 |
-
recall_list = np.zeros(_gt.shape[0])
|
156 |
-
proposal_list = np.ones(_pred.shape[0])
|
157 |
-
|
158 |
-
_pred[:, 2] = _pred[:, 2] + _pred[:, 0]
|
159 |
-
_pred[:, 3] = _pred[:, 3] + _pred[:, 1]
|
160 |
-
_gt[:, 2] = _gt[:, 2] + _gt[:, 0]
|
161 |
-
_gt[:, 3] = _gt[:, 3] + _gt[:, 1]
|
162 |
-
|
163 |
-
overlaps = bbox_overlaps(_pred[:, :4], _gt)
|
164 |
-
|
165 |
-
for h in range(_pred.shape[0]):
|
166 |
-
|
167 |
-
gt_overlap = overlaps[h]
|
168 |
-
max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
|
169 |
-
if max_overlap >= iou_thresh:
|
170 |
-
if ignore[max_idx] == 0:
|
171 |
-
recall_list[max_idx] = -1
|
172 |
-
proposal_list[h] = -1
|
173 |
-
elif recall_list[max_idx] == 0:
|
174 |
-
recall_list[max_idx] = 1
|
175 |
-
|
176 |
-
r_keep_index = np.where(recall_list == 1)[0]
|
177 |
-
pred_recall[h] = len(r_keep_index)
|
178 |
-
return pred_recall, proposal_list
|
179 |
-
|
180 |
-
|
181 |
-
def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall):
|
182 |
-
pr_info = np.zeros((thresh_num, 2)).astype('float')
|
183 |
-
for t in range(thresh_num):
|
184 |
-
|
185 |
-
thresh = 1 - (t+1)/thresh_num
|
186 |
-
r_index = np.where(pred_info[:, 4] >= thresh)[0]
|
187 |
-
if len(r_index) == 0:
|
188 |
-
pr_info[t, 0] = 0
|
189 |
-
pr_info[t, 1] = 0
|
190 |
-
else:
|
191 |
-
r_index = r_index[-1]
|
192 |
-
p_index = np.where(proposal_list[:r_index+1] == 1)[0]
|
193 |
-
pr_info[t, 0] = len(p_index)
|
194 |
-
pr_info[t, 1] = pred_recall[r_index]
|
195 |
-
return pr_info
|
196 |
-
|
197 |
-
|
198 |
-
def dataset_pr_info(thresh_num, pr_curve, count_face):
|
199 |
-
_pr_curve = np.zeros((thresh_num, 2))
|
200 |
-
for i in range(thresh_num):
|
201 |
-
_pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
|
202 |
-
_pr_curve[i, 1] = pr_curve[i, 1] / count_face
|
203 |
-
return _pr_curve
|
204 |
-
|
205 |
-
|
206 |
-
def voc_ap(rec, prec):
|
207 |
-
|
208 |
-
# correct AP calculation
|
209 |
-
# first append sentinel values at the end
|
210 |
-
mrec = np.concatenate(([0.], rec, [1.]))
|
211 |
-
mpre = np.concatenate(([0.], prec, [0.]))
|
212 |
-
|
213 |
-
# compute the precision envelope
|
214 |
-
for i in range(mpre.size - 1, 0, -1):
|
215 |
-
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
216 |
-
|
217 |
-
# to calculate area under PR curve, look for points
|
218 |
-
# where X axis (recall) changes value
|
219 |
-
i = np.where(mrec[1:] != mrec[:-1])[0]
|
220 |
-
|
221 |
-
# and sum (\Delta recall) * prec
|
222 |
-
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
223 |
-
return ap
|
224 |
-
|
225 |
-
|
226 |
-
def evaluation(pred, gt_path, iou_thresh=0.5):
|
227 |
-
pred = get_preds(pred)
|
228 |
-
norm_score(pred)
|
229 |
-
facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_path)
|
230 |
-
event_num = len(event_list)
|
231 |
-
thresh_num = 1000
|
232 |
-
settings = ['easy', 'medium', 'hard']
|
233 |
-
setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list]
|
234 |
-
aps = []
|
235 |
-
for setting_id in range(3):
|
236 |
-
# different setting
|
237 |
-
gt_list = setting_gts[setting_id]
|
238 |
-
count_face = 0
|
239 |
-
pr_curve = np.zeros((thresh_num, 2)).astype('float')
|
240 |
-
# [hard, medium, easy]
|
241 |
-
pbar = tqdm.tqdm(range(event_num))
|
242 |
-
for i in pbar:
|
243 |
-
pbar.set_description('Processing {}'.format(settings[setting_id]))
|
244 |
-
event_name = str(event_list[i][0][0])
|
245 |
-
img_list = file_list[i][0]
|
246 |
-
pred_list = pred[event_name]
|
247 |
-
sub_gt_list = gt_list[i][0]
|
248 |
-
# img_pr_info_list = np.zeros((len(img_list), thresh_num, 2))
|
249 |
-
gt_bbx_list = facebox_list[i][0]
|
250 |
-
|
251 |
-
for j in range(len(img_list)):
|
252 |
-
pred_info = pred_list[str(img_list[j][0][0])]
|
253 |
-
|
254 |
-
gt_boxes = gt_bbx_list[j][0].astype('float')
|
255 |
-
keep_index = sub_gt_list[j][0]
|
256 |
-
count_face += len(keep_index)
|
257 |
-
|
258 |
-
if len(gt_boxes) == 0 or len(pred_info) == 0:
|
259 |
-
continue
|
260 |
-
ignore = np.zeros(gt_boxes.shape[0])
|
261 |
-
if len(keep_index) != 0:
|
262 |
-
ignore[keep_index-1] = 1
|
263 |
-
pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh)
|
264 |
-
|
265 |
-
_img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
|
266 |
-
|
267 |
-
pr_curve += _img_pr_info
|
268 |
-
pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
|
269 |
-
|
270 |
-
propose = pr_curve[:, 0]
|
271 |
-
recall = pr_curve[:, 1]
|
272 |
-
|
273 |
-
ap = voc_ap(recall, propose)
|
274 |
-
aps.append(ap)
|
275 |
-
|
276 |
-
print("==================== Results ====================")
|
277 |
-
print("Easy Val AP: {}".format(aps[0]))
|
278 |
-
print("Medium Val AP: {}".format(aps[1]))
|
279 |
-
print("Hard Val AP: {}".format(aps[2]))
|
280 |
-
print("=================================================")
|
281 |
-
|
282 |
-
|
283 |
-
if __name__ == '__main__':
|
284 |
-
|
285 |
-
parser = argparse.ArgumentParser()
|
286 |
-
parser.add_argument('-p', '--pred', default="./widerface_txt/")
|
287 |
-
parser.add_argument('-g', '--gt', default='./ground_truth/')
|
288 |
-
|
289 |
-
args = parser.parse_args()
|
290 |
-
evaluation(args.pred, args.gt)
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/widerface_evaluate/evaluation_on_widerface.py
DELETED
@@ -1,73 +0,0 @@
|
|
1 |
-
#!/usr/bin/ python3
|
2 |
-
# -*- coding: utf-8 -*-
|
3 |
-
# @Time : 2019-10-17
|
4 |
-
# @Author : vealocia
|
5 |
-
# @FileName: evaluation_on_widerface.py
|
6 |
-
|
7 |
-
import math
|
8 |
-
import os
|
9 |
-
import sys
|
10 |
-
|
11 |
-
import cv2
|
12 |
-
sys.path.append('../')
|
13 |
-
from vision.ssd.config.fd_config import define_img_size
|
14 |
-
|
15 |
-
input_img_size = 320 # define input size ,default optional(128/160/320/480/640/1280)
|
16 |
-
define_img_size(input_img_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
|
17 |
-
|
18 |
-
from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
|
19 |
-
from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
|
20 |
-
|
21 |
-
label_path = "../models/voc-model-labels.txt"
|
22 |
-
|
23 |
-
# net_type = "slim" # inference faster,lower precision
|
24 |
-
net_type = "RFB" # inference lower,higher precision
|
25 |
-
|
26 |
-
class_names = [name.strip() for name in open(label_path).readlines()]
|
27 |
-
num_classes = len(class_names)
|
28 |
-
test_device = "cuda:0"
|
29 |
-
# test_device = "cpu"
|
30 |
-
candidate_size = 800
|
31 |
-
threshold = 0.1
|
32 |
-
|
33 |
-
val_image_root = "/pic/linzai/1080Ti/home_linzai/PycharmProjects/insightface/RetinaFace/data/retinaface/val" # path to widerface valuation image root
|
34 |
-
val_result_txt_save_root = "./widerface_evaluation/" # result directory
|
35 |
-
|
36 |
-
if net_type == 'slim':
|
37 |
-
model_path = "../models/pretrained/version-slim-320.pth"
|
38 |
-
# model_path = "../models/pretrained/version-slim-640.pth"
|
39 |
-
net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
|
40 |
-
predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device)
|
41 |
-
elif net_type == 'RFB':
|
42 |
-
model_path = "../models/pretrained/version-RFB-320.pth"
|
43 |
-
# model_path = "../models/pretrained/version-RFB-640.pth"
|
44 |
-
net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
|
45 |
-
predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device)
|
46 |
-
else:
|
47 |
-
print("The net type is wrong!")
|
48 |
-
sys.exit(1)
|
49 |
-
net.load(model_path)
|
50 |
-
|
51 |
-
counter = 0
|
52 |
-
for parent, dir_names, file_names in os.walk(val_image_root):
|
53 |
-
for file_name in file_names:
|
54 |
-
if not file_name.lower().endswith('jpg'):
|
55 |
-
continue
|
56 |
-
im = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR)
|
57 |
-
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
58 |
-
boxes, labels, probs = predictor.predict(im, candidate_size / 2, threshold)
|
59 |
-
|
60 |
-
event_name = parent.split('/')[-1]
|
61 |
-
if not os.path.exists(os.path.join(val_result_txt_save_root, event_name)):
|
62 |
-
os.makedirs(os.path.join(val_result_txt_save_root, event_name))
|
63 |
-
fout = open(os.path.join(val_result_txt_save_root, event_name, file_name.split('.')[0] + '.txt'), 'w')
|
64 |
-
fout.write(file_name.split('.')[0] + '\n')
|
65 |
-
fout.write(str(boxes.size(0)) + '\n')
|
66 |
-
for i in range(boxes.size(0)):
|
67 |
-
bbox = boxes[i, :]
|
68 |
-
fout.write('%d %d %d %d %.03f' % (math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), probs[i] if probs[i] <= 1 else 1) + '\n')
|
69 |
-
fout.close()
|
70 |
-
counter += 1
|
71 |
-
print('[%d] %s is processed.' % (counter, file_name))
|
72 |
-
|
73 |
-
# note: with score_threshold = 0.11 and hard_nms, MAP of 320-input model on widerface val set is: 0.785/0.695/0.431
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_detect/widerface_evaluate/ground_truth/wider_easy_val.mat
DELETED
Binary file (409 kB)
|
|
face_recognition/face_detect/widerface_evaluate/ground_truth/wider_face_val.mat
DELETED
Binary file (398 kB)
|
|
face_recognition/face_detect/widerface_evaluate/ground_truth/wider_hard_val.mat
DELETED
Binary file (424 kB)
|
|
face_recognition/face_detect/widerface_evaluate/ground_truth/wider_medium_val.mat
DELETED
Binary file (413 kB)
|
|
face_recognition/face_detect/widerface_evaluate/setup.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
WiderFace evaluation code
|
3 |
-
author: wondervictor
|
4 |
-
mail: [email protected]
|
5 |
-
copyright@wondervictor
|
6 |
-
"""
|
7 |
-
|
8 |
-
from distutils.core import setup, Extension
|
9 |
-
from Cython.Build import cythonize
|
10 |
-
import numpy
|
11 |
-
|
12 |
-
package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()])
|
13 |
-
setup(ext_modules=cythonize([package]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_feature/GetFeature.py
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
import torch
|
5 |
-
from face_feature.irn50_pytorch import irn50_pytorch
|
6 |
-
from face_util.faceutil import align_vertical
|
7 |
-
|
8 |
-
import ctypes
|
9 |
-
|
10 |
-
model_feature = irn50_pytorch("./face_recognition/face_feature/irn50_pytorch.npy")
|
11 |
-
model_feature.eval()
|
12 |
-
feature_align_image = np.zeros([128, 128, 3], dtype=np.uint8)
|
13 |
-
|
14 |
-
def get_face_feature(image, landmark):
|
15 |
-
landmark_vec = (ctypes.c_float * len(landmark))(*landmark)
|
16 |
-
align_vertical(image, image.shape[1], image.shape[0], feature_align_image, 128, 128, 3, landmark_vec, 48, 64, 40)
|
17 |
-
# cv2.imwrite("D:/align.png", feature_align_image)
|
18 |
-
feature_align_image_proc = feature_align_image / 256
|
19 |
-
feature_align_image_proc = torch.from_numpy(feature_align_image_proc.astype(np.float32))
|
20 |
-
feature_align_image_proc = feature_align_image_proc.permute(2, 0, 1)
|
21 |
-
feature_align_image_proc = feature_align_image_proc.unsqueeze(0)
|
22 |
-
feature_out = model_feature(feature_align_image_proc)
|
23 |
-
feature_out = torch.nn.functional.normalize(feature_out)[0, :]
|
24 |
-
return feature_align_image, feature_out.data.numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_feature/irn50_pytorch.npy
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:56a68eb98fe85c1db44947d0ee1715659a7b9b9decce7e3b7d0a819e07b68511
|
3 |
-
size 56273641
|
|
|
|
|
|
|
|
face_recognition/face_feature/irn50_pytorch.py
DELETED
@@ -1,288 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
import torch.nn as nn
|
4 |
-
import torch.nn.functional as F
|
5 |
-
import math
|
6 |
-
|
7 |
-
_weights_dict = dict()
|
8 |
-
|
9 |
-
def load_weights(weight_file):
|
10 |
-
if weight_file == None:
|
11 |
-
return
|
12 |
-
|
13 |
-
try:
|
14 |
-
weights_dict = np.load(weight_file, allow_pickle=True).item()
|
15 |
-
except:
|
16 |
-
weights_dict = np.load(weight_file, allow_pickle=True, encoding='bytes').item()
|
17 |
-
|
18 |
-
return weights_dict
|
19 |
-
|
20 |
-
class irn50_pytorch(nn.Module):
|
21 |
-
def __init__(self, weight_file):
|
22 |
-
super(irn50_pytorch, self).__init__()
|
23 |
-
global _weights_dict
|
24 |
-
_weights_dict = load_weights(weight_file)
|
25 |
-
|
26 |
-
self.Convolution1 = self.__conv(2, name='Convolution1', in_channels=3, out_channels=32, kernel_size=(3, 3), stride=(2, 2), groups=1, bias=False)
|
27 |
-
self.BatchNorm1 = self.__batch_normalization(2, 'BatchNorm1', num_features=32, eps=9.999999747378752e-06, momentum=0.0)
|
28 |
-
self.Convolution2 = self.__conv(2, name='Convolution2', in_channels=32, out_channels=32, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
29 |
-
self.BatchNorm2 = self.__batch_normalization(2, 'BatchNorm2', num_features=32, eps=9.999999747378752e-06, momentum=0.0)
|
30 |
-
self.Convolution3 = self.__conv(2, name='Convolution3', in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
31 |
-
self.BatchNorm3 = self.__batch_normalization(2, 'BatchNorm3', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
32 |
-
self.Convolution4 = self.__conv(2, name='Convolution4', in_channels=64, out_channels=80, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
33 |
-
self.BatchNorm4 = self.__batch_normalization(2, 'BatchNorm4', num_features=80, eps=9.999999747378752e-06, momentum=0.0)
|
34 |
-
self.Convolution5 = self.__conv(2, name='Convolution5', in_channels=80, out_channels=192, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
35 |
-
self.BatchNorm5 = self.__batch_normalization(2, 'BatchNorm5', num_features=192, eps=9.999999747378752e-06, momentum=0.0)
|
36 |
-
self.Convolution6 = self.__conv(2, name='Convolution6', in_channels=192, out_channels=256, kernel_size=(3, 3), stride=(2, 2), groups=1, bias=False)
|
37 |
-
self.BatchNorm6 = self.__batch_normalization(2, 'BatchNorm6', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
38 |
-
self.conv2_res1_proj = self.__conv(2, name='conv2_res1_proj', in_channels=256, out_channels=256, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
39 |
-
self.conv2_res1_conv1 = self.__conv(2, name='conv2_res1_conv1', in_channels=256, out_channels=64, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
40 |
-
self.conv2_res1_conv1_bn = self.__batch_normalization(2, 'conv2_res1_conv1_bn', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
41 |
-
self.conv2_res1_conv2 = self.__conv(2, name='conv2_res1_conv2', in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
42 |
-
self.conv2_res1_conv2_bn = self.__batch_normalization(2, 'conv2_res1_conv2_bn', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
43 |
-
self.conv2_res1_conv3 = self.__conv(2, name='conv2_res1_conv3', in_channels=64, out_channels=256, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
44 |
-
self.conv2_res2_pre_bn = self.__batch_normalization(2, 'conv2_res2_pre_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
45 |
-
self.conv2_res2_conv1 = self.__conv(2, name='conv2_res2_conv1', in_channels=256, out_channels=64, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
46 |
-
self.conv2_res2_conv1_bn = self.__batch_normalization(2, 'conv2_res2_conv1_bn', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
47 |
-
self.conv2_res2_conv2 = self.__conv(2, name='conv2_res2_conv2', in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
48 |
-
self.conv2_res2_conv2_bn = self.__batch_normalization(2, 'conv2_res2_conv2_bn', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
49 |
-
self.conv2_res2_conv3 = self.__conv(2, name='conv2_res2_conv3', in_channels=64, out_channels=256, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
50 |
-
self.conv2_res3_pre_bn = self.__batch_normalization(2, 'conv2_res3_pre_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
51 |
-
self.conv2_res3_conv1 = self.__conv(2, name='conv2_res3_conv1', in_channels=256, out_channels=64, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
52 |
-
self.conv2_res3_conv1_bn = self.__batch_normalization(2, 'conv2_res3_conv1_bn', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
53 |
-
self.conv2_res3_conv2 = self.__conv(2, name='conv2_res3_conv2', in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
54 |
-
self.conv2_res3_conv2_bn = self.__batch_normalization(2, 'conv2_res3_conv2_bn', num_features=64, eps=9.999999747378752e-06, momentum=0.0)
|
55 |
-
self.conv2_res3_conv3 = self.__conv(2, name='conv2_res3_conv3', in_channels=64, out_channels=256, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
56 |
-
self.conv3_res1_pre_bn = self.__batch_normalization(2, 'conv3_res1_pre_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
57 |
-
self.conv3_res1_proj = self.__conv(2, name='conv3_res1_proj', in_channels=256, out_channels=512, kernel_size=(1, 1), stride=(2, 2), groups=1, bias=False)
|
58 |
-
self.conv3_res1_conv1 = self.__conv(2, name='conv3_res1_conv1', in_channels=256, out_channels=128, kernel_size=(1, 1), stride=(2, 2), groups=1, bias=False)
|
59 |
-
self.conv3_res1_conv1_bn = self.__batch_normalization(2, 'conv3_res1_conv1_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
60 |
-
self.conv3_res1_conv2 = self.__conv(2, name='conv3_res1_conv2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
61 |
-
self.conv3_res1_conv2_bn = self.__batch_normalization(2, 'conv3_res1_conv2_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
62 |
-
self.conv3_res1_conv3 = self.__conv(2, name='conv3_res1_conv3', in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
63 |
-
self.conv3_res2_pre_bn = self.__batch_normalization(2, 'conv3_res2_pre_bn', num_features=512, eps=9.999999747378752e-06, momentum=0.0)
|
64 |
-
self.conv3_res2_conv1 = self.__conv(2, name='conv3_res2_conv1', in_channels=512, out_channels=128, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
65 |
-
self.conv3_res2_conv1_bn = self.__batch_normalization(2, 'conv3_res2_conv1_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
66 |
-
self.conv3_res2_conv2 = self.__conv(2, name='conv3_res2_conv2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
67 |
-
self.conv3_res2_conv2_bn = self.__batch_normalization(2, 'conv3_res2_conv2_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
68 |
-
self.conv3_res2_conv3 = self.__conv(2, name='conv3_res2_conv3', in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
69 |
-
self.conv3_res3_pre_bn = self.__batch_normalization(2, 'conv3_res3_pre_bn', num_features=512, eps=9.999999747378752e-06, momentum=0.0)
|
70 |
-
self.conv3_res3_conv1 = self.__conv(2, name='conv3_res3_conv1', in_channels=512, out_channels=128, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
71 |
-
self.conv3_res3_conv1_bn = self.__batch_normalization(2, 'conv3_res3_conv1_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
72 |
-
self.conv3_res3_conv2 = self.__conv(2, name='conv3_res3_conv2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
73 |
-
self.conv3_res3_conv2_bn = self.__batch_normalization(2, 'conv3_res3_conv2_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
74 |
-
self.conv3_res3_conv3 = self.__conv(2, name='conv3_res3_conv3', in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
75 |
-
self.conv3_res4_pre_bn = self.__batch_normalization(2, 'conv3_res4_pre_bn', num_features=512, eps=9.999999747378752e-06, momentum=0.0)
|
76 |
-
self.conv3_res4_conv1 = self.__conv(2, name='conv3_res4_conv1', in_channels=512, out_channels=128, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
77 |
-
self.conv3_res4_conv1_bn = self.__batch_normalization(2, 'conv3_res4_conv1_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
78 |
-
self.conv3_res4_conv2 = self.__conv(2, name='conv3_res4_conv2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
79 |
-
self.conv3_res4_conv2_bn = self.__batch_normalization(2, 'conv3_res4_conv2_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
80 |
-
self.conv3_res4_conv3 = self.__conv(2, name='conv3_res4_conv3', in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
81 |
-
self.conv4_res1_pre_bn = self.__batch_normalization(2, 'conv4_res1_pre_bn', num_features=512, eps=9.999999747378752e-06, momentum=0.0)
|
82 |
-
self.conv4_res1_proj = self.__conv(2, name='conv4_res1_proj', in_channels=512, out_channels=512, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
83 |
-
self.conv4_res1_conv1 = self.__conv(2, name='conv4_res1_conv1', in_channels=512, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
84 |
-
self.conv4_res1_conv1_bn = self.__batch_normalization(2, 'conv4_res1_conv1_bn', num_features=128, eps=9.999999747378752e-06, momentum=0.0)
|
85 |
-
self.conv4_res1_conv2 = self.__conv(2, name='conv4_res1_conv2', in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
86 |
-
self.conv4_res2_pre_bn = self.__batch_normalization(2, 'conv4_res2_pre_bn', num_features=512, eps=9.999999747378752e-06, momentum=0.0)
|
87 |
-
self.conv4_res2_conv1_proj = self.__conv(2, name='conv4_res2_conv1_proj', in_channels=512, out_channels=1024, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
88 |
-
self.conv4_res2_conv1 = self.__conv(2, name='conv4_res2_conv1', in_channels=512, out_channels=256, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
89 |
-
self.conv4_res2_conv1_bn = self.__batch_normalization(2, 'conv4_res2_conv1_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
90 |
-
self.conv4_res2_conv2 = self.__conv(2, name='conv4_res2_conv2', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
91 |
-
self.conv4_res2_conv2_bn = self.__batch_normalization(2, 'conv4_res2_conv2_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
92 |
-
self.conv4_res2_conv3 = self.__conv(2, name='conv4_res2_conv3', in_channels=256, out_channels=1024, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
93 |
-
self.conv4_res3_pre_bn = self.__batch_normalization(2, 'conv4_res3_pre_bn', num_features=1024, eps=9.999999747378752e-06, momentum=0.0)
|
94 |
-
self.conv4_res3_conv1 = self.__conv(2, name='conv4_res3_conv1', in_channels=1024, out_channels=256, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
95 |
-
self.conv4_res3_conv1_bn = self.__batch_normalization(2, 'conv4_res3_conv1_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
96 |
-
self.conv4_res3_conv2 = self.__conv(2, name='conv4_res3_conv2', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=False)
|
97 |
-
self.conv4_res3_conv2_bn = self.__batch_normalization(2, 'conv4_res3_conv2_bn', num_features=256, eps=9.999999747378752e-06, momentum=0.0)
|
98 |
-
self.conv4_res3_conv3 = self.__conv(2, name='conv4_res3_conv3', in_channels=256, out_channels=1024, kernel_size=(1, 1), stride=(1, 1), groups=1, bias=False)
|
99 |
-
self.conv5_bn = self.__batch_normalization(2, 'conv5_bn', num_features=1024, eps=9.999999747378752e-06, momentum=0.0)
|
100 |
-
self.fc1_1 = self.__dense(name = 'fc1_1', in_features = 16384, out_features = 512, bias = False)
|
101 |
-
self.bn_fc1 = self.__batch_normalization(1, 'bn_fc1', num_features=512, eps=9.999999747378752e-06, momentum=0.0)
|
102 |
-
|
103 |
-
def forward(self, x):
|
104 |
-
Convolution1 = self.Convolution1(x)
|
105 |
-
BatchNorm1 = self.BatchNorm1(Convolution1)
|
106 |
-
ReLU1 = F.relu(BatchNorm1)
|
107 |
-
Convolution2 = self.Convolution2(ReLU1)
|
108 |
-
BatchNorm2 = self.BatchNorm2(Convolution2)
|
109 |
-
ReLU2 = F.relu(BatchNorm2)
|
110 |
-
Convolution3_pad = F.pad(ReLU2, (1, 1, 1, 1))
|
111 |
-
Convolution3 = self.Convolution3(Convolution3_pad)
|
112 |
-
BatchNorm3 = self.BatchNorm3(Convolution3)
|
113 |
-
ReLU3 = F.relu(BatchNorm3)
|
114 |
-
Pooling1_pad = F.pad(ReLU3, (0, 1, 0, 1), value=float('-inf'))
|
115 |
-
Pooling1, Pooling1_idx = F.max_pool2d(Pooling1_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False, return_indices=True)
|
116 |
-
Convolution4 = self.Convolution4(Pooling1)
|
117 |
-
BatchNorm4 = self.BatchNorm4(Convolution4)
|
118 |
-
ReLU4 = F.relu(BatchNorm4)
|
119 |
-
Convolution5 = self.Convolution5(ReLU4)
|
120 |
-
BatchNorm5 = self.BatchNorm5(Convolution5)
|
121 |
-
ReLU5 = F.relu(BatchNorm5)
|
122 |
-
Convolution6_pad = F.pad(ReLU5, (1, 1, 1, 1))
|
123 |
-
Convolution6 = self.Convolution6(Convolution6_pad)
|
124 |
-
BatchNorm6 = self.BatchNorm6(Convolution6)
|
125 |
-
ReLU6 = F.relu(BatchNorm6)
|
126 |
-
conv2_res1_proj = self.conv2_res1_proj(ReLU6)
|
127 |
-
conv2_res1_conv1 = self.conv2_res1_conv1(ReLU6)
|
128 |
-
conv2_res1_conv1_bn = self.conv2_res1_conv1_bn(conv2_res1_conv1)
|
129 |
-
conv2_res1_conv1_relu = F.relu(conv2_res1_conv1_bn)
|
130 |
-
conv2_res1_conv2_pad = F.pad(conv2_res1_conv1_relu, (1, 1, 1, 1))
|
131 |
-
conv2_res1_conv2 = self.conv2_res1_conv2(conv2_res1_conv2_pad)
|
132 |
-
conv2_res1_conv2_bn = self.conv2_res1_conv2_bn(conv2_res1_conv2)
|
133 |
-
conv2_res1_conv2_relu = F.relu(conv2_res1_conv2_bn)
|
134 |
-
conv2_res1_conv3 = self.conv2_res1_conv3(conv2_res1_conv2_relu)
|
135 |
-
conv2_res1 = conv2_res1_proj + conv2_res1_conv3
|
136 |
-
conv2_res2_pre_bn = self.conv2_res2_pre_bn(conv2_res1)
|
137 |
-
conv2_res2_pre_relu = F.relu(conv2_res2_pre_bn)
|
138 |
-
conv2_res2_conv1 = self.conv2_res2_conv1(conv2_res2_pre_relu)
|
139 |
-
conv2_res2_conv1_bn = self.conv2_res2_conv1_bn(conv2_res2_conv1)
|
140 |
-
conv2_res2_conv1_relu = F.relu(conv2_res2_conv1_bn)
|
141 |
-
conv2_res2_conv2_pad = F.pad(conv2_res2_conv1_relu, (1, 1, 1, 1))
|
142 |
-
conv2_res2_conv2 = self.conv2_res2_conv2(conv2_res2_conv2_pad)
|
143 |
-
conv2_res2_conv2_bn = self.conv2_res2_conv2_bn(conv2_res2_conv2)
|
144 |
-
conv2_res2_conv2_relu = F.relu(conv2_res2_conv2_bn)
|
145 |
-
conv2_res2_conv3 = self.conv2_res2_conv3(conv2_res2_conv2_relu)
|
146 |
-
conv2_res2 = conv2_res1 + conv2_res2_conv3
|
147 |
-
conv2_res3_pre_bn = self.conv2_res3_pre_bn(conv2_res2)
|
148 |
-
conv2_res3_pre_relu = F.relu(conv2_res3_pre_bn)
|
149 |
-
conv2_res3_conv1 = self.conv2_res3_conv1(conv2_res3_pre_relu)
|
150 |
-
conv2_res3_conv1_bn = self.conv2_res3_conv1_bn(conv2_res3_conv1)
|
151 |
-
conv2_res3_conv1_relu = F.relu(conv2_res3_conv1_bn)
|
152 |
-
conv2_res3_conv2_pad = F.pad(conv2_res3_conv1_relu, (1, 1, 1, 1))
|
153 |
-
conv2_res3_conv2 = self.conv2_res3_conv2(conv2_res3_conv2_pad)
|
154 |
-
conv2_res3_conv2_bn = self.conv2_res3_conv2_bn(conv2_res3_conv2)
|
155 |
-
conv2_res3_conv2_relu = F.relu(conv2_res3_conv2_bn)
|
156 |
-
conv2_res3_conv3 = self.conv2_res3_conv3(conv2_res3_conv2_relu)
|
157 |
-
conv2_res3 = conv2_res2 + conv2_res3_conv3
|
158 |
-
conv3_res1_pre_bn = self.conv3_res1_pre_bn(conv2_res3)
|
159 |
-
conv3_res1_pre_relu = F.relu(conv3_res1_pre_bn)
|
160 |
-
conv3_res1_proj = self.conv3_res1_proj(conv3_res1_pre_relu)
|
161 |
-
conv3_res1_conv1 = self.conv3_res1_conv1(conv3_res1_pre_relu)
|
162 |
-
conv3_res1_conv1_bn = self.conv3_res1_conv1_bn(conv3_res1_conv1)
|
163 |
-
conv3_res1_conv1_relu = F.relu(conv3_res1_conv1_bn)
|
164 |
-
conv3_res1_conv2_pad = F.pad(conv3_res1_conv1_relu, (1, 1, 1, 1))
|
165 |
-
conv3_res1_conv2 = self.conv3_res1_conv2(conv3_res1_conv2_pad)
|
166 |
-
conv3_res1_conv2_bn = self.conv3_res1_conv2_bn(conv3_res1_conv2)
|
167 |
-
conv3_res1_conv2_relu = F.relu(conv3_res1_conv2_bn)
|
168 |
-
conv3_res1_conv3 = self.conv3_res1_conv3(conv3_res1_conv2_relu)
|
169 |
-
conv3_res1 = conv3_res1_proj + conv3_res1_conv3
|
170 |
-
conv3_res2_pre_bn = self.conv3_res2_pre_bn(conv3_res1)
|
171 |
-
conv3_res2_pre_relu = F.relu(conv3_res2_pre_bn)
|
172 |
-
conv3_res2_conv1 = self.conv3_res2_conv1(conv3_res2_pre_relu)
|
173 |
-
conv3_res2_conv1_bn = self.conv3_res2_conv1_bn(conv3_res2_conv1)
|
174 |
-
conv3_res2_conv1_relu = F.relu(conv3_res2_conv1_bn)
|
175 |
-
conv3_res2_conv2_pad = F.pad(conv3_res2_conv1_relu, (1, 1, 1, 1))
|
176 |
-
conv3_res2_conv2 = self.conv3_res2_conv2(conv3_res2_conv2_pad)
|
177 |
-
conv3_res2_conv2_bn = self.conv3_res2_conv2_bn(conv3_res2_conv2)
|
178 |
-
conv3_res2_conv2_relu = F.relu(conv3_res2_conv2_bn)
|
179 |
-
conv3_res2_conv3 = self.conv3_res2_conv3(conv3_res2_conv2_relu)
|
180 |
-
conv3_res2 = conv3_res1 + conv3_res2_conv3
|
181 |
-
conv3_res3_pre_bn = self.conv3_res3_pre_bn(conv3_res2)
|
182 |
-
conv3_res3_pre_relu = F.relu(conv3_res3_pre_bn)
|
183 |
-
conv3_res3_conv1 = self.conv3_res3_conv1(conv3_res3_pre_relu)
|
184 |
-
conv3_res3_conv1_bn = self.conv3_res3_conv1_bn(conv3_res3_conv1)
|
185 |
-
conv3_res3_conv1_relu = F.relu(conv3_res3_conv1_bn)
|
186 |
-
conv3_res3_conv2_pad = F.pad(conv3_res3_conv1_relu, (1, 1, 1, 1))
|
187 |
-
conv3_res3_conv2 = self.conv3_res3_conv2(conv3_res3_conv2_pad)
|
188 |
-
conv3_res3_conv2_bn = self.conv3_res3_conv2_bn(conv3_res3_conv2)
|
189 |
-
conv3_res3_conv2_relu = F.relu(conv3_res3_conv2_bn)
|
190 |
-
conv3_res3_conv3 = self.conv3_res3_conv3(conv3_res3_conv2_relu)
|
191 |
-
conv3_res3 = conv3_res2 + conv3_res3_conv3
|
192 |
-
conv3_res4_pre_bn = self.conv3_res4_pre_bn(conv3_res3)
|
193 |
-
conv3_res4_pre_relu = F.relu(conv3_res4_pre_bn)
|
194 |
-
conv3_res4_conv1 = self.conv3_res4_conv1(conv3_res4_pre_relu)
|
195 |
-
conv3_res4_conv1_bn = self.conv3_res4_conv1_bn(conv3_res4_conv1)
|
196 |
-
conv3_res4_conv1_relu = F.relu(conv3_res4_conv1_bn)
|
197 |
-
conv3_res4_conv2_pad = F.pad(conv3_res4_conv1_relu, (1, 1, 1, 1))
|
198 |
-
conv3_res4_conv2 = self.conv3_res4_conv2(conv3_res4_conv2_pad)
|
199 |
-
conv3_res4_conv2_bn = self.conv3_res4_conv2_bn(conv3_res4_conv2)
|
200 |
-
conv3_res4_conv2_relu = F.relu(conv3_res4_conv2_bn)
|
201 |
-
conv3_res4_conv3 = self.conv3_res4_conv3(conv3_res4_conv2_relu)
|
202 |
-
conv3_res4 = conv3_res3 + conv3_res4_conv3
|
203 |
-
conv4_res1_pre_bn = self.conv4_res1_pre_bn(conv3_res4)
|
204 |
-
conv4_res1_pre_relu = F.relu(conv4_res1_pre_bn)
|
205 |
-
conv4_res1_proj = self.conv4_res1_proj(conv4_res1_pre_relu)
|
206 |
-
conv4_res1_conv1_pad = F.pad(conv4_res1_pre_relu, (1, 1, 1, 1))
|
207 |
-
conv4_res1_conv1 = self.conv4_res1_conv1(conv4_res1_conv1_pad)
|
208 |
-
conv4_res1_conv1_bn = self.conv4_res1_conv1_bn(conv4_res1_conv1)
|
209 |
-
conv4_res1_conv1_relu = F.relu(conv4_res1_conv1_bn)
|
210 |
-
conv4_res1_conv2 = self.conv4_res1_conv2(conv4_res1_conv1_relu)
|
211 |
-
conv4_res1 = conv4_res1_proj + conv4_res1_conv2
|
212 |
-
conv4_res2_pre_bn = self.conv4_res2_pre_bn(conv4_res1)
|
213 |
-
conv4_res2_pre_relu = F.relu(conv4_res2_pre_bn)
|
214 |
-
conv4_res2_conv1_proj = self.conv4_res2_conv1_proj(conv4_res2_pre_relu)
|
215 |
-
conv4_res2_conv1 = self.conv4_res2_conv1(conv4_res2_pre_relu)
|
216 |
-
conv4_res2_conv1_bn = self.conv4_res2_conv1_bn(conv4_res2_conv1)
|
217 |
-
conv4_res2_conv1_relu = F.relu(conv4_res2_conv1_bn)
|
218 |
-
conv4_res2_conv2_pad = F.pad(conv4_res2_conv1_relu, (1, 1, 1, 1))
|
219 |
-
conv4_res2_conv2 = self.conv4_res2_conv2(conv4_res2_conv2_pad)
|
220 |
-
conv4_res2_conv2_bn = self.conv4_res2_conv2_bn(conv4_res2_conv2)
|
221 |
-
conv4_res2_conv2_relu = F.relu(conv4_res2_conv2_bn)
|
222 |
-
conv4_res2_conv3 = self.conv4_res2_conv3(conv4_res2_conv2_relu)
|
223 |
-
conv4_res2 = conv4_res2_conv1_proj + conv4_res2_conv3
|
224 |
-
conv4_res3_pre_bn = self.conv4_res3_pre_bn(conv4_res2)
|
225 |
-
conv4_res3_pre_relu = F.relu(conv4_res3_pre_bn)
|
226 |
-
conv4_res3_conv1 = self.conv4_res3_conv1(conv4_res3_pre_relu)
|
227 |
-
conv4_res3_conv1_bn = self.conv4_res3_conv1_bn(conv4_res3_conv1)
|
228 |
-
conv4_res3_conv1_relu = F.relu(conv4_res3_conv1_bn)
|
229 |
-
conv4_res3_conv2_pad = F.pad(conv4_res3_conv1_relu, (1, 1, 1, 1))
|
230 |
-
conv4_res3_conv2 = self.conv4_res3_conv2(conv4_res3_conv2_pad)
|
231 |
-
conv4_res3_conv2_bn = self.conv4_res3_conv2_bn(conv4_res3_conv2)
|
232 |
-
conv4_res3_conv2_relu = F.relu(conv4_res3_conv2_bn)
|
233 |
-
conv4_res3_conv3 = self.conv4_res3_conv3(conv4_res3_conv2_relu)
|
234 |
-
conv4_res3 = conv4_res2 + conv4_res3_conv3
|
235 |
-
conv5_bn = self.conv5_bn(conv4_res3)
|
236 |
-
conv5_relu = F.relu(conv5_bn)
|
237 |
-
pool5 = F.avg_pool2d(conv5_relu, kernel_size=(4, 4), stride=(1, 1), padding=(0,), ceil_mode=False, count_include_pad=False)
|
238 |
-
fc1_0 = pool5.view(pool5.size(0), -1)
|
239 |
-
fc1_1 = self.fc1_1(fc1_0)
|
240 |
-
bn_fc1 = self.bn_fc1(fc1_1)
|
241 |
-
#return bn_fc1
|
242 |
-
bn_fc1 = bn_fc1.reshape(bn_fc1.size()[0], bn_fc1.size()[1])
|
243 |
-
slice_fc1, slice_fc2 = bn_fc1[:, :256], bn_fc1[:, 256:]
|
244 |
-
eltwise_fc1 = torch.max(slice_fc1, slice_fc2)
|
245 |
-
|
246 |
-
return eltwise_fc1
|
247 |
-
|
248 |
-
@staticmethod
|
249 |
-
def __conv(dim, name, **kwargs):
|
250 |
-
if dim == 1: layer = nn.Conv1d(**kwargs)
|
251 |
-
elif dim == 2: layer = nn.Conv2d(**kwargs)
|
252 |
-
elif dim == 3: layer = nn.Conv3d(**kwargs)
|
253 |
-
else: raise NotImplementedError()
|
254 |
-
|
255 |
-
layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
|
256 |
-
if 'bias' in _weights_dict[name]:
|
257 |
-
layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
|
258 |
-
return layer
|
259 |
-
|
260 |
-
@staticmethod
|
261 |
-
def __batch_normalization(dim, name, **kwargs):
|
262 |
-
if dim == 0 or dim == 1: layer = nn.BatchNorm1d(**kwargs)
|
263 |
-
elif dim == 2: layer = nn.BatchNorm2d(**kwargs)
|
264 |
-
elif dim == 3: layer = nn.BatchNorm3d(**kwargs)
|
265 |
-
else: raise NotImplementedError()
|
266 |
-
|
267 |
-
if 'scale' in _weights_dict[name]:
|
268 |
-
layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['scale']))
|
269 |
-
else:
|
270 |
-
layer.weight.data.fill_(1)
|
271 |
-
|
272 |
-
if 'bias' in _weights_dict[name]:
|
273 |
-
layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
|
274 |
-
else:
|
275 |
-
layer.bias.data.fill_(0)
|
276 |
-
|
277 |
-
layer.state_dict()['running_mean'].copy_(torch.from_numpy(_weights_dict[name]['mean']))
|
278 |
-
layer.state_dict()['running_var'].copy_(torch.from_numpy(_weights_dict[name]['var']))
|
279 |
-
return layer
|
280 |
-
|
281 |
-
@staticmethod
|
282 |
-
def __dense(name, **kwargs):
|
283 |
-
layer = nn.Linear(**kwargs)
|
284 |
-
layer.state_dict()['weight'].copy_(torch.from_numpy(_weights_dict[name]['weights']))
|
285 |
-
if 'bias' in _weights_dict[name]:
|
286 |
-
layer.state_dict()['bias'].copy_(torch.from_numpy(_weights_dict[name]['bias']))
|
287 |
-
return layer
|
288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_landmark/GetLandmark.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
import torch
|
5 |
-
from face_landmark.MobileFaceNet import MobileFaceNet
|
6 |
-
|
7 |
-
model_landmark = MobileFaceNet(input_size=64, embedding_size=136)
|
8 |
-
model_landmark.load_state_dict(torch.load("./face_recognition/face_landmark/vfl_1.02_578_6.734591484069824.pth.tar", map_location=torch.device('cpu'))['state_dict'])
|
9 |
-
model_landmark.eval()
|
10 |
-
|
11 |
-
def get_face_landmark(gray_img, bounding_box):
|
12 |
-
image = gray_img
|
13 |
-
box = bounding_box
|
14 |
-
|
15 |
-
nHeight, nWidth = image.shape
|
16 |
-
|
17 |
-
rLeftMargin = 0.05
|
18 |
-
rTopMargin = 0.00
|
19 |
-
rRightMargin = 0.05
|
20 |
-
rBottomMargin = 0.10
|
21 |
-
|
22 |
-
rW = box[2] - box[0]
|
23 |
-
rH = box[3] - box[1]
|
24 |
-
cx = (box[0] + box[2]) / 2
|
25 |
-
cy = (box[1] + box[3]) / 2
|
26 |
-
sz = pow(rW * rH, 0.5)
|
27 |
-
rX = cx - sz / 2
|
28 |
-
rY = cy - sz / 2
|
29 |
-
rW = sz
|
30 |
-
rH = sz
|
31 |
-
|
32 |
-
#get image range to get face landmark from face rect
|
33 |
-
iExFaceX = int(rX - rLeftMargin * rW)
|
34 |
-
iExFaceY = int(rY - rTopMargin * rH)
|
35 |
-
iExFaceW = int((1 + (rLeftMargin + rRightMargin)) * rW)
|
36 |
-
iExFaceH = int((1 + (rTopMargin + rBottomMargin)) * rH)
|
37 |
-
|
38 |
-
iExFaceX = np.clip(iExFaceX, 0, nWidth - 1)
|
39 |
-
iExFaceY = np.clip(iExFaceY, 0, nHeight - 1)
|
40 |
-
iExFaceW = np.clip(iExFaceX + iExFaceW, 0, nWidth - 1) - iExFaceX
|
41 |
-
iExFaceH = np.clip(iExFaceY + iExFaceH, 0, nHeight - 1) - iExFaceY
|
42 |
-
|
43 |
-
#crop face image in range to face landmark
|
44 |
-
image = image[iExFaceY:iExFaceY+iExFaceH, iExFaceX:iExFaceX+iExFaceW]
|
45 |
-
#normalize croped face image
|
46 |
-
image = cv2.resize(image, (64, 64), cv2.INTER_LINEAR)
|
47 |
-
# cv2.imwrite("D:/crop.png", image)
|
48 |
-
image = image / 256
|
49 |
-
image = torch.from_numpy(image.astype(np.float32))
|
50 |
-
#convert mask_align_image from type [n,n] to [1,1,n,n]
|
51 |
-
image = image.unsqueeze(0).unsqueeze(0)
|
52 |
-
|
53 |
-
#get landmark fron croped face image
|
54 |
-
landmark = model_landmark(image)
|
55 |
-
#reshape face landmark and convert to image coordinates
|
56 |
-
landmark = landmark.reshape(68, 2)
|
57 |
-
landmark[:,0] = landmark[:,0] * iExFaceW + iExFaceX
|
58 |
-
landmark[:,1] = landmark[:,1] * iExFaceH + iExFaceY
|
59 |
-
|
60 |
-
landmark = landmark.reshape(-1)
|
61 |
-
|
62 |
-
return landmark
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_landmark/MobileFaceNet.py
DELETED
@@ -1,123 +0,0 @@
|
|
1 |
-
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout2d, Dropout, AvgPool2d, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module, Parameter
|
2 |
-
import torch.nn.functional as F
|
3 |
-
import torch
|
4 |
-
import torch.nn as nn
|
5 |
-
from collections import namedtuple
|
6 |
-
import math
|
7 |
-
import pdb
|
8 |
-
|
9 |
-
################################## Original Arcface Model #############################################################
|
10 |
-
|
11 |
-
class Flatten(Module):
|
12 |
-
def forward(self, input):
|
13 |
-
return input.view(input.size(0), -1)
|
14 |
-
|
15 |
-
################################## MobileFaceNet #############################################################
|
16 |
-
|
17 |
-
class Conv_block(Module):
|
18 |
-
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
19 |
-
super(Conv_block, self).__init__()
|
20 |
-
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
|
21 |
-
self.bn = BatchNorm2d(out_c)
|
22 |
-
self.relu = ReLU(out_c)
|
23 |
-
def forward(self, x):
|
24 |
-
x = self.conv(x)
|
25 |
-
x = self.bn(x)
|
26 |
-
x = self.relu(x)
|
27 |
-
return x
|
28 |
-
|
29 |
-
class Linear_block(Module):
|
30 |
-
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
31 |
-
super(Linear_block, self).__init__()
|
32 |
-
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
|
33 |
-
self.bn = BatchNorm2d(out_c)
|
34 |
-
def forward(self, x):
|
35 |
-
x = self.conv(x)
|
36 |
-
x = self.bn(x)
|
37 |
-
return x
|
38 |
-
|
39 |
-
class Depth_Wise(Module):
|
40 |
-
def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
|
41 |
-
super(Depth_Wise, self).__init__()
|
42 |
-
self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
43 |
-
self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride)
|
44 |
-
self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
45 |
-
self.residual = residual
|
46 |
-
def forward(self, x):
|
47 |
-
if self.residual:
|
48 |
-
short_cut = x
|
49 |
-
x = self.conv(x)
|
50 |
-
x = self.conv_dw(x)
|
51 |
-
x = self.project(x)
|
52 |
-
if self.residual:
|
53 |
-
output = short_cut + x
|
54 |
-
else:
|
55 |
-
output = x
|
56 |
-
return output
|
57 |
-
|
58 |
-
class Residual(Module):
|
59 |
-
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
60 |
-
super(Residual, self).__init__()
|
61 |
-
modules = []
|
62 |
-
for _ in range(num_block):
|
63 |
-
modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups))
|
64 |
-
self.model = Sequential(*modules)
|
65 |
-
def forward(self, x):
|
66 |
-
return self.model(x)
|
67 |
-
|
68 |
-
class GDC(Module):
|
69 |
-
def __init__(self, embedding_size):
|
70 |
-
super(GDC, self).__init__()
|
71 |
-
self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(4,4), stride=(1, 1), padding=(0, 0))
|
72 |
-
self.linear = Linear(512, embedding_size, bias=True)
|
73 |
-
self.bn = BatchNorm1d(embedding_size)
|
74 |
-
|
75 |
-
def forward(self, x):
|
76 |
-
x = self.conv_6_dw(x)
|
77 |
-
x = torch.flatten(x, 1)
|
78 |
-
x = self.linear(x)
|
79 |
-
x = self.bn(x)
|
80 |
-
return x
|
81 |
-
|
82 |
-
class MobileFaceNet(Module):
|
83 |
-
def __init__(self, input_size, embedding_size = 512):
|
84 |
-
super(MobileFaceNet, self).__init__()
|
85 |
-
self.conv1 = Conv_block(1, 32, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
86 |
-
self.conv2_dw = Conv_block(32, 32, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
|
87 |
-
self.conv_23 = Depth_Wise(32, 32, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=64)
|
88 |
-
self.conv_3 = Residual(32, num_block=3, groups=64, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
89 |
-
self.conv_34 = Depth_Wise(32, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
|
90 |
-
self.conv_4 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
91 |
-
self.conv_45 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
|
92 |
-
self.conv_5 = Residual(64, num_block=2, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
93 |
-
self.conv_6_sep = Conv_block(64, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
|
94 |
-
self.output_layer = GDC(embedding_size)
|
95 |
-
self._initialize_weights()
|
96 |
-
|
97 |
-
def _initialize_weights(self):
|
98 |
-
for m in self.modules():
|
99 |
-
if isinstance(m, nn.Conv2d):
|
100 |
-
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
101 |
-
if m.bias is not None:
|
102 |
-
m.bias.data.zero_()
|
103 |
-
elif isinstance(m, nn.BatchNorm2d):
|
104 |
-
m.weight.data.fill_(1)
|
105 |
-
m.bias.data.zero_()
|
106 |
-
elif isinstance(m, nn.Linear):
|
107 |
-
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
108 |
-
if m.bias is not None:
|
109 |
-
m.bias.data.zero_()
|
110 |
-
|
111 |
-
|
112 |
-
def forward(self, x):
|
113 |
-
out = self.conv1(x)
|
114 |
-
out = self.conv2_dw(out)
|
115 |
-
out = self.conv_23(out)
|
116 |
-
out = self.conv_3(out)
|
117 |
-
out = self.conv_34(out)
|
118 |
-
out = self.conv_4(out)
|
119 |
-
out = self.conv_45(out)
|
120 |
-
out = self.conv_5(out)
|
121 |
-
conv_features = self.conv_6_sep(out)
|
122 |
-
out = self.output_layer(conv_features)
|
123 |
-
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
face_recognition/face_landmark/vfl_1.02_578_6.734591484069824.pth.tar
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:956d0864c51166f1e02cfc1e24c2b9426340a7bcbcfcad789b0a3317f0b2470d
|
3 |
-
size 3723879
|
|
|
|
|
|
|
|
face_recognition/face_manage/manage.py
DELETED
@@ -1,161 +0,0 @@
|
|
1 |
-
import sqlite3
|
2 |
-
import sys
|
3 |
-
import os
|
4 |
-
import os.path
|
5 |
-
import numpy as np
|
6 |
-
|
7 |
-
database_base_name = os.path.abspath(os.path.dirname(__file__)) + '\\person'
|
8 |
-
table_name = "feature"
|
9 |
-
sqlite_insert_blob_query = "INSERT INTO " + table_name + " (id, filename, count, boxes, landmarks, alignimgs, features) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
10 |
-
sqlite_create_table_query = "CREATE TABLE " + table_name + " ( id INTEGER PRIMARY KEY, filename TEXT, count INTEGER, boxes BLOB NOT NULL, landmarks BLOB NOT NULL, alignimgs BLOB NOT NULL, features BLOB NOT NULL)"
|
11 |
-
|
12 |
-
sqlite_update_all_query = "UPDATE " + table_name + " set filename = ?, count = ?, boxes = ?, landmarks = ?, alignimgs = ?, features = ? where id = ?"
|
13 |
-
sqlite_search_query = "SELECT * FROM " + table_name
|
14 |
-
sqlite_delete_all = "DELETE FROM " + table_name
|
15 |
-
|
16 |
-
data_all = []
|
17 |
-
threshold = 68
|
18 |
-
max_feat_count = 8
|
19 |
-
max_id = -1
|
20 |
-
feature_update = False
|
21 |
-
|
22 |
-
face_database = None
|
23 |
-
|
24 |
-
#open databse
|
25 |
-
def open_database(db_no):
|
26 |
-
|
27 |
-
global max_id
|
28 |
-
global face_database
|
29 |
-
|
30 |
-
db_name = database_base_name + str(db_no) + ".db"
|
31 |
-
face_database = sqlite3.connect(db_name)
|
32 |
-
cursor = face_database.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
33 |
-
#check tables exist in database
|
34 |
-
tables = [
|
35 |
-
v[0] for v in cursor.fetchall()
|
36 |
-
if v[0] != "sqlite_sequence"
|
37 |
-
]
|
38 |
-
cursor.close()
|
39 |
-
|
40 |
-
if not "feature" in tables:
|
41 |
-
face_database.execute(sqlite_create_table_query)
|
42 |
-
|
43 |
-
cursor = face_database.execute(sqlite_search_query)
|
44 |
-
|
45 |
-
#load index and feature in "feature table"
|
46 |
-
for row in cursor.fetchall():
|
47 |
-
id = row[0]
|
48 |
-
filename = row[1]
|
49 |
-
count = row[2]
|
50 |
-
boxes = np.fromstring(row[3], dtype=np.float32)
|
51 |
-
landmarks = np.fromstring(row[4], dtype=np.float32)
|
52 |
-
alignimgs = np.fromstring(row[5], dtype=np.uint8)
|
53 |
-
features = np.fromstring(row[6], dtype=np.float32)
|
54 |
-
|
55 |
-
if not boxes.shape[0] == count * 4:
|
56 |
-
continue
|
57 |
-
if not landmarks.shape[0] == count * 136:
|
58 |
-
continue
|
59 |
-
if not alignimgs.shape[0] == count * 49152:
|
60 |
-
continue
|
61 |
-
if not features.shape[0] == count * 256:
|
62 |
-
continue
|
63 |
-
|
64 |
-
boxes = boxes.reshape(count, 4)
|
65 |
-
landmarks = landmarks.reshape(count, 136)
|
66 |
-
alignimgs = alignimgs.reshape(count, 49152)
|
67 |
-
features = features.reshape(count, 256)
|
68 |
-
|
69 |
-
data_all.append({'id':id, 'filename':filename, 'count':count, 'boxes':boxes, 'landmarks':landmarks, 'alignimgs':alignimgs, 'features':features})
|
70 |
-
if id > max_id:
|
71 |
-
max_id = id
|
72 |
-
cursor.close()
|
73 |
-
|
74 |
-
#create database
|
75 |
-
def create_database():
|
76 |
-
db_no = 0
|
77 |
-
db_name = ""
|
78 |
-
while True:
|
79 |
-
db_name = database_base_name + str(db_no) + ".db"
|
80 |
-
if not os.path.isfile(db_name):
|
81 |
-
break
|
82 |
-
db_no += 1
|
83 |
-
open_database(db_no)
|
84 |
-
|
85 |
-
def clear_database():
|
86 |
-
global face_database
|
87 |
-
|
88 |
-
data_all.clear()
|
89 |
-
cursor = face_database.cursor()
|
90 |
-
cursor.execute(sqlite_delete_all)
|
91 |
-
face_database.commit()
|
92 |
-
cursor.close()
|
93 |
-
return
|
94 |
-
|
95 |
-
def register_face(filename, count, boxes, landmarks, alignimgs, features):
|
96 |
-
|
97 |
-
# boxes = boxes.reshape(count, 4)
|
98 |
-
# landmarks = landmarks.reshape(count, 136)
|
99 |
-
# alignimgs = alignimgs.reshape(count, 49152)
|
100 |
-
# features = features.reshape(count, 256)
|
101 |
-
|
102 |
-
global face_database
|
103 |
-
global max_id
|
104 |
-
max_id = max_id + 1
|
105 |
-
id = max_id
|
106 |
-
cursor = face_database.cursor()
|
107 |
-
cursor.execute(sqlite_insert_blob_query, (id, filename, count, boxes.tostring(), landmarks.tostring(), alignimgs.tostring(), features.tostring()))
|
108 |
-
face_database.commit()
|
109 |
-
cursor.close()
|
110 |
-
data_all.append({'id':id, 'filename':filename, 'count':count, 'boxes':boxes, 'landmarks':landmarks, 'alignimgs':alignimgs, 'features':features})
|
111 |
-
print('id = ', id)
|
112 |
-
return id
|
113 |
-
|
114 |
-
def update_face(id = None, filename = None, count = None, boxes = None, landmarks = None, alignimgs = None, features = None):
|
115 |
-
global face_database
|
116 |
-
cursor = face_database.cursor()
|
117 |
-
cursor.execute(sqlite_update_all_query, (filename, count, boxes.tostring(), landmarks.tostring(), alignimgs.tostring(), features.tostring(), id))
|
118 |
-
face_database.commit()
|
119 |
-
cursor.close()
|
120 |
-
|
121 |
-
def get_similarity(feat1, feat2):
|
122 |
-
return (np.sum(feat1 * feat2) + 1) * 50
|
123 |
-
|
124 |
-
def verify_face(feat):
|
125 |
-
|
126 |
-
global max_id
|
127 |
-
max_score = 0
|
128 |
-
|
129 |
-
for data in data_all:
|
130 |
-
id = data['id']
|
131 |
-
sub_id = data['count']
|
132 |
-
features = data['features']
|
133 |
-
|
134 |
-
# for sub_id in range(count):
|
135 |
-
score = get_similarity(feat, features)
|
136 |
-
if score >= max_score:
|
137 |
-
max_score = score
|
138 |
-
|
139 |
-
if score >= threshold:
|
140 |
-
print("score = ", score)
|
141 |
-
return id, data['filename'], sub_id
|
142 |
-
|
143 |
-
return -1, None, None
|
144 |
-
|
145 |
-
def get_info(id, sub_id):
|
146 |
-
for data in data_all:
|
147 |
-
nid = data['id']
|
148 |
-
if nid == id:
|
149 |
-
count = data['count']
|
150 |
-
if count < sub_id:
|
151 |
-
return data['filename'], data['boxes'][sub_id], data['landmarsk'][sub_id], data['alignimgs'][sub_id], data['features'][sub_id]
|
152 |
-
else:
|
153 |
-
return None, None, None, None, None
|
154 |
-
|
155 |
-
return None, None, None, None, None
|
156 |
-
|
157 |
-
def set_threshold(th):
|
158 |
-
threshold = th
|
159 |
-
|
160 |
-
def get_threshold():
|
161 |
-
return threshold
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|