Spaces:
Sleeping
Sleeping
ManhHoDinh
commited on
Commit
•
eb06a89
1
Parent(s):
0fddb14
up code
Browse files- .gitignore +18 -0
- README.md +6 -5
- __init__.py +0 -0
- accountService.json +13 -0
- app.py +172 -0
- deep_sort/__init__.py +0 -0
- deep_sort/detection.py +55 -0
- deep_sort/iou_matching.py +81 -0
- deep_sort/kalman_filter.py +229 -0
- deep_sort/linear_assignment.py +191 -0
- deep_sort/nn_matching.py +177 -0
- deep_sort/preprocessing.py +74 -0
- deep_sort/track.py +170 -0
- deep_sort/tracker.py +139 -0
- image/test +0 -0
- model_data/best.pt +3 -0
- model_data/yolov8m.pt +3 -0
- object_tracker_demo.py +89 -0
- out_video/.gitkeep +0 -0
- output_video.avi +0 -0
- requirements.txt +8 -0
- testAPI.py +0 -0
- tools/freeze_model.py +219 -0
- tools/generate_detections.py +218 -0
- tracker.py +67 -0
- utils.py +22 -0
- video/.gitkeep +0 -0
.gitignore
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Virtual envirionment
|
2 |
+
/myenv
|
3 |
+
/venv
|
4 |
+
|
5 |
+
# Data
|
6 |
+
/data
|
7 |
+
video/*
|
8 |
+
!video/.gitkeep
|
9 |
+
|
10 |
+
out_video/*
|
11 |
+
!out_video/.gitkeep
|
12 |
+
|
13 |
+
# Cache
|
14 |
+
__pycache__/
|
15 |
+
|
16 |
+
# Ignore all log
|
17 |
+
*.log
|
18 |
+
/flagged
|
README.md
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: TrafficDetection
|
3 |
+
emoji: 🐠
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.12.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: mit
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__init__.py
ADDED
File without changes
|
accountService.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "trafficsolution-395806",
|
4 |
+
"private_key_id": "d734f0c912aa65cad15281d8a3fda8bd73cfefb7",
|
5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC96Qujkh6pxk1B\nb0OMihmUZIu0h+HoSS9brkmz2bkH6K/+mnSlM9QKawvcnHSlZCZZrtozorjnNJUG\nW+kFFHvgCsr0uPMO71zDGCASnd2eK1TLYnPwd7iATYdhv/JdkzsQq6NDePUN4ErT\nw3hDW52gZ9AOvfNPjdUvGEPCG+MmeGu85hbe6RNhCQFrUbZDrFr2qBTbgeaUx5/h\nD107ofbyLEcLg1AFHaZW4mc7g58jCxnBiZY1Vp06TGPkHi0hfg9JZ+vw5FIBrizR\nvHoz0hrGwyUYLgIUhrNkQElWyidmmWqFnPVaa9EznJBDdxEOFjy5XPvQGLbsGZzu\n/YcpRLJjAgMBAAECggEAAPyXe2OUdjRaDWd7Fd1X0a4ODcOQ+Gh87wCCUIVmkTRz\nyaJazcrtpJo2xxAnCP7ytg+TAaasSEM9LPnIRab8F8dmk4vsc1zqAfd7ny0jzCEU\ngTQDPzIYCs6cT/Uz2KmdqnIB2KVlvIs4SE5hLxRoRr+TVKhG/dhSTbrWC0mizf3a\n4qp0yA10YiJzaTvR30vP737lLIfwTfN/BLVLXZwwTOcY9RtndkxHwoBevakcN/w+\nibVftQykh7vKbsuAIdvPe5ZBxWmd/7rcj1ckDqQYKZC/Lzyg6lMX9fK1886OfUwi\no7fGSr2xKgEOtsqKOG1o58QW9AJ6tvdWsKslrVF8wQKBgQDizpwbDHh/x2HPT87n\nA41asW7j1MJ4+kx/pce2v2wX9Y9mJNIT7SvMIpuDV1Hda7CMsxG6o4OMDg7R7lAx\nZKDf43MPKdKXBcLgZKGeYpQKmFBfRrTZTuc5s3ZpEGD3mrQqXfbPE+FHR3LkgXMS\n9NUKkXA+jf7Ey4tZJ/FBkROyIwKBgQDWWqwjSW2eDxBrG7abSWFdRDkQYIHmu1Xu\n1SvG4suNuEXFTix72Pv1bzLIzWAV2ZcOaGD9db5U7llDVDXKmTHiycU6QPNY/AYl\njgjkQGZvKyIjbx+TkV0ZbJ5YLfgwSDsB3OftgBGQiqItL/f0WXBPuKgoUfDNVAQv\nePq4Rn5iwQKBgQCgoI6yIVzdsgk1V2JC/aMIjgzo0e+A2lN/O5t7c4pMVZ7fSkh1\n0ExqYEAe/qJ93BzHKLLvqYoqkHIyf0LQGGJO7bi5DneUeYIQhXRoxYvvDPwu3daK\n6HSdytmbgvwJn8jlEHMRKRUs9A8MghlLXZXbzDDgPJQoL7daxwz30JVQXQKBgCpX\nyv1FegPpEeM2nEHcowEbgvI7Pi3n/7eoIHevQWqDZjTtllf4qvz2tF96QmrOispr\n0A0Bf1Gjq7pjHiUhUftsxCsXvXpsfoJYpmUAW7vGF35Maz5pTqzBCh59JjPTcOST\nULVXwSB+Yj7u5No9+LOm1IDyftDwscr26QAR3NGBAoGAaIGiv1TakFzTf5KS/Vuc\nSVOlE4Dx2FG5V37W9ee8sFFjreOsm/2Bfw62tpt+pjO94gtTfh0Gfe5Eu8tr/xHl\nnDKzXbyv+LKZLdrYMF++zI6b00U48AXz4G2LzgQVgb+ItK6MsU4GyWH2/tQY0LEV\n9noIYbdc7Q3vs8OfnOkK4RQ=\n-----END PRIVATE KEY-----\n",
|
6 |
+
"client_email": "firebase-adminsdk-m3k6v@trafficsolution-395806.iam.gserviceaccount.com",
|
7 |
+
"client_id": "114258949821756902394",
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-m3k6v%40trafficsolution-395806.iam.gserviceaccount.com",
|
12 |
+
"universe_domain": "googleapis.com"
|
13 |
+
}
|
app.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import cv2
|
4 |
+
import pandas as pd
|
5 |
+
import random
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
+
import firebase_admin
|
9 |
+
from firebase_admin import credentials
|
10 |
+
from firebase_admin import firestore
|
11 |
+
|
12 |
+
from ultralytics import YOLO
|
13 |
+
from tracker import Tracker
|
14 |
+
from utils import ID2LABEL, MODEL_PATH, AUTHEN_ACCOUNT, compute_color_for_labels
|
15 |
+
|
16 |
+
|
17 |
+
cred = credentials.Certificate(AUTHEN_ACCOUNT)
|
18 |
+
firebase_admin.initialize_app(cred)
|
19 |
+
db = firestore.client()
|
20 |
+
|
21 |
+
colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
|
22 |
+
for j in range(10)]
|
23 |
+
|
24 |
+
detection_threshold = 0.1
|
25 |
+
model = YOLO(MODEL_PATH)
|
26 |
+
|
27 |
+
def addToDatabase(ss_id, obj_ids):
|
28 |
+
try:
|
29 |
+
new_doc = db.collection("TrafficData").document()
|
30 |
+
print(new_doc.id)
|
31 |
+
data = {
|
32 |
+
"SS_ID": ss_id,
|
33 |
+
"TF_COUNT_CAR": len(obj_ids['car']),
|
34 |
+
"TF_COUNT_MOTOBIKE": len(obj_ids['bicycle']) + len(obj_ids['motocycle']),
|
35 |
+
"TF_COUNT_OTHERS": len(obj_ids['bus']) + len(obj_ids['truck']) + len(obj_ids['other']),
|
36 |
+
"TF_ID": new_doc.id,
|
37 |
+
"TF_TIME": datetime.utcnow()
|
38 |
+
|
39 |
+
}
|
40 |
+
try:
|
41 |
+
db.collection("TrafficData").document(new_doc.id).set(data)
|
42 |
+
print("Sucessfully saved to database")
|
43 |
+
except:
|
44 |
+
print("Can't upload a new data")
|
45 |
+
|
46 |
+
except:
|
47 |
+
print("Can't create a new data")
|
48 |
+
|
49 |
+
|
50 |
+
def traffic_counting(video):
|
51 |
+
|
52 |
+
obj_ids = {"person": [],
|
53 |
+
"bicycle": [],
|
54 |
+
"car": [],
|
55 |
+
"motocycle": [],
|
56 |
+
"bus": [],
|
57 |
+
"truck": [],
|
58 |
+
"other": []}
|
59 |
+
|
60 |
+
cap = cv2.VideoCapture(video)
|
61 |
+
ret, frame = cap.read()
|
62 |
+
|
63 |
+
tracker = Tracker()
|
64 |
+
while ret:
|
65 |
+
results = model.predict(frame)
|
66 |
+
|
67 |
+
for result in results:
|
68 |
+
detections = []
|
69 |
+
for r in result.boxes.data.tolist():
|
70 |
+
x1, y1, x2, y2, score, class_id = r
|
71 |
+
x1 = int(x1)
|
72 |
+
x2 = int(x2)
|
73 |
+
y1 = int(y1)
|
74 |
+
y2 = int(y2)
|
75 |
+
class_id = int(class_id)
|
76 |
+
if score > detection_threshold:
|
77 |
+
detections.append([x1, y1, x2, y2, class_id, score])
|
78 |
+
|
79 |
+
|
80 |
+
tracker.update(frame, detections)
|
81 |
+
|
82 |
+
for track in tracker.tracks:
|
83 |
+
bbox = track.bbox
|
84 |
+
x1, y1, x2, y2 = bbox
|
85 |
+
track_id = track.track_id
|
86 |
+
class_id = track.class_id
|
87 |
+
|
88 |
+
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (compute_color_for_labels(class_id)), 3)
|
89 |
+
label_name = ID2LABEL[class_id] if class_id in ID2LABEL.keys() else "other"
|
90 |
+
if track_id not in obj_ids[label_name]:
|
91 |
+
obj_ids[label_name].append(track_id)
|
92 |
+
|
93 |
+
cv2.putText(frame,f"{label_name}-{track_id}",
|
94 |
+
(int(x1) + 5, int(y1) - 5),
|
95 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA )
|
96 |
+
|
97 |
+
# Count each type of traffic
|
98 |
+
output_data = {key: len(value) for key, value in obj_ids.items()}
|
99 |
+
df = pd.DataFrame(list(output_data.items()), columns=['Type', 'Number'])
|
100 |
+
|
101 |
+
yield frame, df
|
102 |
+
ret, frame = cap.read()
|
103 |
+
|
104 |
+
|
105 |
+
cap.release()
|
106 |
+
cv2.destroyAllWindows()
|
107 |
+
video_path = video.replace("\\", "/")
|
108 |
+
# addToDatabase(video_path.split("/")[-1][:-4], obj_ids)
|
109 |
+
|
110 |
+
|
111 |
+
# input_video = gr.Video(label="Input Video")
|
112 |
+
# output_video = gr.outputs.Video(label="Processing Video")
|
113 |
+
# output_data = gr.Dataframe(interactive=False, label="Traffic's Frequency")
|
114 |
+
|
115 |
+
# demo = gr.Interface(traffic_counting,
|
116 |
+
# inputs=input_video,
|
117 |
+
# outputs=[output_video, output_data],
|
118 |
+
# examples=[os.path.join('video', x) for x in os.listdir('video') if x != ".gitkeep"],
|
119 |
+
# allow_flagging='never'
|
120 |
+
# )
|
121 |
+
def traffic_detection(image):
|
122 |
+
|
123 |
+
results = model.predict(image)
|
124 |
+
detections = []
|
125 |
+
obj_ids = {"person": [],
|
126 |
+
"bicycle": [],
|
127 |
+
"car": [],
|
128 |
+
"motocycle": [],
|
129 |
+
"bus": [],
|
130 |
+
"truck": [],
|
131 |
+
"other": []}
|
132 |
+
|
133 |
+
for result in results:
|
134 |
+
for r in result.boxes.data.tolist():
|
135 |
+
x1, y1, x2, y2, score, class_id = r
|
136 |
+
x1 = int(x1)
|
137 |
+
x2 = int(x2)
|
138 |
+
y1 = int(y1)
|
139 |
+
y2 = int(y2)
|
140 |
+
class_id = int(class_id)
|
141 |
+
if score > detection_threshold:
|
142 |
+
detections.append([x1, y1, x2, y2, class_id, score])
|
143 |
+
cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (compute_color_for_labels(class_id)), 1)
|
144 |
+
label_name = ID2LABEL[class_id] if class_id in ID2LABEL.keys() else "other"
|
145 |
+
cv2.putText(image,f"{label_name}",
|
146 |
+
(int(x1) + 5, int(y1) - 5),
|
147 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.3,compute_color_for_labels(class_id), 1, cv2.LINE_AA )
|
148 |
+
|
149 |
+
# Count each type of traffic
|
150 |
+
output_data = {key: len(value) for key, value in obj_ids.items()}
|
151 |
+
df = pd.DataFrame(list(output_data.items()), columns=['Type', 'Number'])
|
152 |
+
yield image, df
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
# Input is a image
|
159 |
+
input_image = gr.Image(label="Input Image")
|
160 |
+
output_image = gr.Image(type="filepath", label="Processing Image")
|
161 |
+
output_data = gr.Dataframe(interactive=False, label="Traffic's Frequency")
|
162 |
+
demo = gr.Interface(traffic_detection,
|
163 |
+
inputs=input_image,
|
164 |
+
outputs=[output_image, output_data],
|
165 |
+
examples=[os.path.join('image', x) for x in os.listdir('image') if x != ".gitkeep"],
|
166 |
+
allow_flagging='never'
|
167 |
+
)
|
168 |
+
|
169 |
+
|
170 |
+
if __name__ == "__main__":
|
171 |
+
demo.queue()
|
172 |
+
demo.launch(share= False)
|
deep_sort/__init__.py
ADDED
File without changes
|
deep_sort/detection.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
class Detection(object):
|
6 |
+
"""
|
7 |
+
This class represents a bounding box detection in a single image.
|
8 |
+
|
9 |
+
Parameters
|
10 |
+
----------
|
11 |
+
tlwh : array_like
|
12 |
+
Bounding box in format `(x, y, w, h)`.
|
13 |
+
confidence : float
|
14 |
+
Detector confidence score.
|
15 |
+
feature : array_like
|
16 |
+
A feature vector that describes the object contained in this image.
|
17 |
+
|
18 |
+
Attributes
|
19 |
+
----------
|
20 |
+
tlwh : ndarray
|
21 |
+
Bounding box in format `(top left x, top left y, width, height)`.
|
22 |
+
confidence : ndarray
|
23 |
+
Detector confidence score.
|
24 |
+
class_name : ndarray
|
25 |
+
Detector class.
|
26 |
+
feature : ndarray | NoneType
|
27 |
+
A feature vector that describes the object contained in this image.
|
28 |
+
|
29 |
+
"""
|
30 |
+
|
31 |
+
def __init__(self, tlwh, confidence, class_name, feature):
|
32 |
+
self.tlwh = np.asarray(tlwh, dtype=np.float32)
|
33 |
+
self.confidence = float(confidence)
|
34 |
+
self.class_name = class_name
|
35 |
+
self.feature = np.asarray(feature, dtype=np.float32)
|
36 |
+
|
37 |
+
def get_class(self):
|
38 |
+
return self.class_name
|
39 |
+
|
40 |
+
def to_tlbr(self):
|
41 |
+
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
42 |
+
`(top left, bottom right)`.
|
43 |
+
"""
|
44 |
+
ret = self.tlwh.copy()
|
45 |
+
ret[2:] += ret[:2]
|
46 |
+
return ret
|
47 |
+
|
48 |
+
def to_xyah(self):
|
49 |
+
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
50 |
+
height)`, where the aspect ratio is `width / height`.
|
51 |
+
"""
|
52 |
+
ret = self.tlwh.copy()
|
53 |
+
ret[:2] += ret[2:] / 2
|
54 |
+
ret[2] /= ret[3]
|
55 |
+
return ret
|
deep_sort/iou_matching.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
from __future__ import absolute_import
|
3 |
+
import numpy as np
|
4 |
+
from . import linear_assignment
|
5 |
+
|
6 |
+
|
7 |
+
def iou(bbox, candidates):
|
8 |
+
"""Computer intersection over union.
|
9 |
+
|
10 |
+
Parameters
|
11 |
+
----------
|
12 |
+
bbox : ndarray
|
13 |
+
A bounding box in format `(top left x, top left y, width, height)`.
|
14 |
+
candidates : ndarray
|
15 |
+
A matrix of candidate bounding boxes (one per row) in the same format
|
16 |
+
as `bbox`.
|
17 |
+
|
18 |
+
Returns
|
19 |
+
-------
|
20 |
+
ndarray
|
21 |
+
The intersection over union in [0, 1] between the `bbox` and each
|
22 |
+
candidate. A higher score means a larger fraction of the `bbox` is
|
23 |
+
occluded by the candidate.
|
24 |
+
|
25 |
+
"""
|
26 |
+
bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
|
27 |
+
candidates_tl = candidates[:, :2]
|
28 |
+
candidates_br = candidates[:, :2] + candidates[:, 2:]
|
29 |
+
|
30 |
+
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
|
31 |
+
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
|
32 |
+
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
|
33 |
+
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
|
34 |
+
wh = np.maximum(0., br - tl)
|
35 |
+
|
36 |
+
area_intersection = wh.prod(axis=1)
|
37 |
+
area_bbox = bbox[2:].prod()
|
38 |
+
area_candidates = candidates[:, 2:].prod(axis=1)
|
39 |
+
return area_intersection / (area_bbox + area_candidates - area_intersection)
|
40 |
+
|
41 |
+
|
42 |
+
def iou_cost(tracks, detections, track_indices=None,
|
43 |
+
detection_indices=None):
|
44 |
+
"""An intersection over union distance metric.
|
45 |
+
|
46 |
+
Parameters
|
47 |
+
----------
|
48 |
+
tracks : List[deep_sort.track.Track]
|
49 |
+
A list of tracks.
|
50 |
+
detections : List[deep_sort.detection.Detection]
|
51 |
+
A list of detections.
|
52 |
+
track_indices : Optional[List[int]]
|
53 |
+
A list of indices to tracks that should be matched. Defaults to
|
54 |
+
all `tracks`.
|
55 |
+
detection_indices : Optional[List[int]]
|
56 |
+
A list of indices to detections that should be matched. Defaults
|
57 |
+
to all `detections`.
|
58 |
+
|
59 |
+
Returns
|
60 |
+
-------
|
61 |
+
ndarray
|
62 |
+
Returns a cost matrix of shape
|
63 |
+
len(track_indices), len(detection_indices) where entry (i, j) is
|
64 |
+
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
|
65 |
+
|
66 |
+
"""
|
67 |
+
if track_indices is None:
|
68 |
+
track_indices = np.arange(len(tracks))
|
69 |
+
if detection_indices is None:
|
70 |
+
detection_indices = np.arange(len(detections))
|
71 |
+
|
72 |
+
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
|
73 |
+
for row, track_idx in enumerate(track_indices):
|
74 |
+
if tracks[track_idx].time_since_update > 1:
|
75 |
+
cost_matrix[row, :] = linear_assignment.INFTY_COST
|
76 |
+
continue
|
77 |
+
|
78 |
+
bbox = tracks[track_idx].to_tlwh()
|
79 |
+
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
|
80 |
+
cost_matrix[row, :] = 1. - iou(bbox, candidates)
|
81 |
+
return cost_matrix
|
deep_sort/kalman_filter.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
import numpy as np
|
3 |
+
import scipy.linalg
|
4 |
+
|
5 |
+
|
6 |
+
"""
|
7 |
+
Table for the 0.95 quantile of the chi-square distribution with N degrees of
|
8 |
+
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
|
9 |
+
function and used as Mahalanobis gating threshold.
|
10 |
+
"""
|
11 |
+
chi2inv95 = {
|
12 |
+
1: 3.8415,
|
13 |
+
2: 5.9915,
|
14 |
+
3: 7.8147,
|
15 |
+
4: 9.4877,
|
16 |
+
5: 11.070,
|
17 |
+
6: 12.592,
|
18 |
+
7: 14.067,
|
19 |
+
8: 15.507,
|
20 |
+
9: 16.919}
|
21 |
+
|
22 |
+
|
23 |
+
class KalmanFilter(object):
|
24 |
+
"""
|
25 |
+
A simple Kalman filter for tracking bounding boxes in image space.
|
26 |
+
|
27 |
+
The 8-dimensional state space
|
28 |
+
|
29 |
+
x, y, a, h, vx, vy, va, vh
|
30 |
+
|
31 |
+
contains the bounding box center position (x, y), aspect ratio a, height h,
|
32 |
+
and their respective velocities.
|
33 |
+
|
34 |
+
Object motion follows a constant velocity model. The bounding box location
|
35 |
+
(x, y, a, h) is taken as direct observation of the state space (linear
|
36 |
+
observation model).
|
37 |
+
|
38 |
+
"""
|
39 |
+
|
40 |
+
def __init__(self):
|
41 |
+
ndim, dt = 4, 1.
|
42 |
+
|
43 |
+
# Create Kalman filter model matrices.
|
44 |
+
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
|
45 |
+
for i in range(ndim):
|
46 |
+
self._motion_mat[i, ndim + i] = dt
|
47 |
+
self._update_mat = np.eye(ndim, 2 * ndim)
|
48 |
+
|
49 |
+
# Motion and observation uncertainty are chosen relative to the current
|
50 |
+
# state estimate. These weights control the amount of uncertainty in
|
51 |
+
# the model. This is a bit hacky.
|
52 |
+
self._std_weight_position = 1. / 20
|
53 |
+
self._std_weight_velocity = 1. / 160
|
54 |
+
|
55 |
+
def initiate(self, measurement):
|
56 |
+
"""Create track from unassociated measurement.
|
57 |
+
|
58 |
+
Parameters
|
59 |
+
----------
|
60 |
+
measurement : ndarray
|
61 |
+
Bounding box coordinates (x, y, a, h) with center position (x, y),
|
62 |
+
aspect ratio a, and height h.
|
63 |
+
|
64 |
+
Returns
|
65 |
+
-------
|
66 |
+
(ndarray, ndarray)
|
67 |
+
Returns the mean vector (8 dimensional) and covariance matrix (8x8
|
68 |
+
dimensional) of the new track. Unobserved velocities are initialized
|
69 |
+
to 0 mean.
|
70 |
+
|
71 |
+
"""
|
72 |
+
mean_pos = measurement
|
73 |
+
mean_vel = np.zeros_like(mean_pos)
|
74 |
+
mean = np.r_[mean_pos, mean_vel]
|
75 |
+
|
76 |
+
std = [
|
77 |
+
2 * self._std_weight_position * measurement[3],
|
78 |
+
2 * self._std_weight_position * measurement[3],
|
79 |
+
1e-2,
|
80 |
+
2 * self._std_weight_position * measurement[3],
|
81 |
+
10 * self._std_weight_velocity * measurement[3],
|
82 |
+
10 * self._std_weight_velocity * measurement[3],
|
83 |
+
1e-5,
|
84 |
+
10 * self._std_weight_velocity * measurement[3]]
|
85 |
+
covariance = np.diag(np.square(std))
|
86 |
+
return mean, covariance
|
87 |
+
|
88 |
+
def predict(self, mean, covariance):
|
89 |
+
"""Run Kalman filter prediction step.
|
90 |
+
|
91 |
+
Parameters
|
92 |
+
----------
|
93 |
+
mean : ndarray
|
94 |
+
The 8 dimensional mean vector of the object state at the previous
|
95 |
+
time step.
|
96 |
+
covariance : ndarray
|
97 |
+
The 8x8 dimensional covariance matrix of the object state at the
|
98 |
+
previous time step.
|
99 |
+
|
100 |
+
Returns
|
101 |
+
-------
|
102 |
+
(ndarray, ndarray)
|
103 |
+
Returns the mean vector and covariance matrix of the predicted
|
104 |
+
state. Unobserved velocities are initialized to 0 mean.
|
105 |
+
|
106 |
+
"""
|
107 |
+
std_pos = [
|
108 |
+
self._std_weight_position * mean[3],
|
109 |
+
self._std_weight_position * mean[3],
|
110 |
+
1e-2,
|
111 |
+
self._std_weight_position * mean[3]]
|
112 |
+
std_vel = [
|
113 |
+
self._std_weight_velocity * mean[3],
|
114 |
+
self._std_weight_velocity * mean[3],
|
115 |
+
1e-5,
|
116 |
+
self._std_weight_velocity * mean[3]]
|
117 |
+
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
|
118 |
+
|
119 |
+
mean = np.dot(self._motion_mat, mean)
|
120 |
+
covariance = np.linalg.multi_dot((
|
121 |
+
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
|
122 |
+
|
123 |
+
return mean, covariance
|
124 |
+
|
125 |
+
def project(self, mean, covariance):
|
126 |
+
"""Project state distribution to measurement space.
|
127 |
+
|
128 |
+
Parameters
|
129 |
+
----------
|
130 |
+
mean : ndarray
|
131 |
+
The state's mean vector (8 dimensional array).
|
132 |
+
covariance : ndarray
|
133 |
+
The state's covariance matrix (8x8 dimensional).
|
134 |
+
|
135 |
+
Returns
|
136 |
+
-------
|
137 |
+
(ndarray, ndarray)
|
138 |
+
Returns the projected mean and covariance matrix of the given state
|
139 |
+
estimate.
|
140 |
+
|
141 |
+
"""
|
142 |
+
std = [
|
143 |
+
self._std_weight_position * mean[3],
|
144 |
+
self._std_weight_position * mean[3],
|
145 |
+
1e-1,
|
146 |
+
self._std_weight_position * mean[3]]
|
147 |
+
innovation_cov = np.diag(np.square(std))
|
148 |
+
|
149 |
+
mean = np.dot(self._update_mat, mean)
|
150 |
+
covariance = np.linalg.multi_dot((
|
151 |
+
self._update_mat, covariance, self._update_mat.T))
|
152 |
+
return mean, covariance + innovation_cov
|
153 |
+
|
154 |
+
def update(self, mean, covariance, measurement):
|
155 |
+
"""Run Kalman filter correction step.
|
156 |
+
|
157 |
+
Parameters
|
158 |
+
----------
|
159 |
+
mean : ndarray
|
160 |
+
The predicted state's mean vector (8 dimensional).
|
161 |
+
covariance : ndarray
|
162 |
+
The state's covariance matrix (8x8 dimensional).
|
163 |
+
measurement : ndarray
|
164 |
+
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
|
165 |
+
is the center position, a the aspect ratio, and h the height of the
|
166 |
+
bounding box.
|
167 |
+
|
168 |
+
Returns
|
169 |
+
-------
|
170 |
+
(ndarray, ndarray)
|
171 |
+
Returns the measurement-corrected state distribution.
|
172 |
+
|
173 |
+
"""
|
174 |
+
projected_mean, projected_cov = self.project(mean, covariance)
|
175 |
+
|
176 |
+
chol_factor, lower = scipy.linalg.cho_factor(
|
177 |
+
projected_cov, lower=True, check_finite=False)
|
178 |
+
kalman_gain = scipy.linalg.cho_solve(
|
179 |
+
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
|
180 |
+
check_finite=False).T
|
181 |
+
innovation = measurement - projected_mean
|
182 |
+
|
183 |
+
new_mean = mean + np.dot(innovation, kalman_gain.T)
|
184 |
+
new_covariance = covariance - np.linalg.multi_dot((
|
185 |
+
kalman_gain, projected_cov, kalman_gain.T))
|
186 |
+
return new_mean, new_covariance
|
187 |
+
|
188 |
+
def gating_distance(self, mean, covariance, measurements,
|
189 |
+
only_position=False):
|
190 |
+
"""Compute gating distance between state distribution and measurements.
|
191 |
+
|
192 |
+
A suitable distance threshold can be obtained from `chi2inv95`. If
|
193 |
+
`only_position` is False, the chi-square distribution has 4 degrees of
|
194 |
+
freedom, otherwise 2.
|
195 |
+
|
196 |
+
Parameters
|
197 |
+
----------
|
198 |
+
mean : ndarray
|
199 |
+
Mean vector over the state distribution (8 dimensional).
|
200 |
+
covariance : ndarray
|
201 |
+
Covariance of the state distribution (8x8 dimensional).
|
202 |
+
measurements : ndarray
|
203 |
+
An Nx4 dimensional matrix of N measurements, each in
|
204 |
+
format (x, y, a, h) where (x, y) is the bounding box center
|
205 |
+
position, a the aspect ratio, and h the height.
|
206 |
+
only_position : Optional[bool]
|
207 |
+
If True, distance computation is done with respect to the bounding
|
208 |
+
box center position only.
|
209 |
+
|
210 |
+
Returns
|
211 |
+
-------
|
212 |
+
ndarray
|
213 |
+
Returns an array of length N, where the i-th element contains the
|
214 |
+
squared Mahalanobis distance between (mean, covariance) and
|
215 |
+
`measurements[i]`.
|
216 |
+
|
217 |
+
"""
|
218 |
+
mean, covariance = self.project(mean, covariance)
|
219 |
+
if only_position:
|
220 |
+
mean, covariance = mean[:2], covariance[:2, :2]
|
221 |
+
measurements = measurements[:, :2]
|
222 |
+
|
223 |
+
cholesky_factor = np.linalg.cholesky(covariance)
|
224 |
+
d = measurements - mean
|
225 |
+
z = scipy.linalg.solve_triangular(
|
226 |
+
cholesky_factor, d.T, lower=True, check_finite=False,
|
227 |
+
overwrite_b=True)
|
228 |
+
squared_maha = np.sum(z * z, axis=0)
|
229 |
+
return squared_maha
|
deep_sort/linear_assignment.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
from __future__ import absolute_import
|
3 |
+
import numpy as np
|
4 |
+
from scipy.optimize import linear_sum_assignment
|
5 |
+
from . import kalman_filter
|
6 |
+
|
7 |
+
|
8 |
+
INFTY_COST = 1e+5
|
9 |
+
|
10 |
+
|
11 |
+
def min_cost_matching(
|
12 |
+
distance_metric, max_distance, tracks, detections, track_indices=None,
|
13 |
+
detection_indices=None):
|
14 |
+
"""Solve linear assignment problem.
|
15 |
+
|
16 |
+
Parameters
|
17 |
+
----------
|
18 |
+
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
|
19 |
+
The distance metric is given a list of tracks and detections as well as
|
20 |
+
a list of N track indices and M detection indices. The metric should
|
21 |
+
return the NxM dimensional cost matrix, where element (i, j) is the
|
22 |
+
association cost between the i-th track in the given track indices and
|
23 |
+
the j-th detection in the given detection_indices.
|
24 |
+
max_distance : float
|
25 |
+
Gating threshold. Associations with cost larger than this value are
|
26 |
+
disregarded.
|
27 |
+
tracks : List[track.Track]
|
28 |
+
A list of predicted tracks at the current time step.
|
29 |
+
detections : List[detection.Detection]
|
30 |
+
A list of detections at the current time step.
|
31 |
+
track_indices : List[int]
|
32 |
+
List of track indices that maps rows in `cost_matrix` to tracks in
|
33 |
+
`tracks` (see description above).
|
34 |
+
detection_indices : List[int]
|
35 |
+
List of detection indices that maps columns in `cost_matrix` to
|
36 |
+
detections in `detections` (see description above).
|
37 |
+
|
38 |
+
Returns
|
39 |
+
-------
|
40 |
+
(List[(int, int)], List[int], List[int])
|
41 |
+
Returns a tuple with the following three entries:
|
42 |
+
* A list of matched track and detection indices.
|
43 |
+
* A list of unmatched track indices.
|
44 |
+
* A list of unmatched detection indices.
|
45 |
+
|
46 |
+
"""
|
47 |
+
if track_indices is None:
|
48 |
+
track_indices = np.arange(len(tracks))
|
49 |
+
if detection_indices is None:
|
50 |
+
detection_indices = np.arange(len(detections))
|
51 |
+
|
52 |
+
if len(detection_indices) == 0 or len(track_indices) == 0:
|
53 |
+
return [], track_indices, detection_indices # Nothing to match.
|
54 |
+
|
55 |
+
cost_matrix = distance_metric(
|
56 |
+
tracks, detections, track_indices, detection_indices)
|
57 |
+
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
|
58 |
+
indices = linear_sum_assignment(cost_matrix)
|
59 |
+
indices = np.asarray(indices)
|
60 |
+
indices = np.transpose(indices)
|
61 |
+
matches, unmatched_tracks, unmatched_detections = [], [], []
|
62 |
+
for col, detection_idx in enumerate(detection_indices):
|
63 |
+
if col not in indices[:, 1]:
|
64 |
+
unmatched_detections.append(detection_idx)
|
65 |
+
for row, track_idx in enumerate(track_indices):
|
66 |
+
if row not in indices[:, 0]:
|
67 |
+
unmatched_tracks.append(track_idx)
|
68 |
+
for row, col in indices:
|
69 |
+
track_idx = track_indices[row]
|
70 |
+
detection_idx = detection_indices[col]
|
71 |
+
if cost_matrix[row, col] > max_distance:
|
72 |
+
unmatched_tracks.append(track_idx)
|
73 |
+
unmatched_detections.append(detection_idx)
|
74 |
+
else:
|
75 |
+
matches.append((track_idx, detection_idx))
|
76 |
+
return matches, unmatched_tracks, unmatched_detections
|
77 |
+
|
78 |
+
|
79 |
+
def matching_cascade(
|
80 |
+
distance_metric, max_distance, cascade_depth, tracks, detections,
|
81 |
+
track_indices=None, detection_indices=None):
|
82 |
+
"""Run matching cascade.
|
83 |
+
|
84 |
+
Parameters
|
85 |
+
----------
|
86 |
+
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
|
87 |
+
The distance metric is given a list of tracks and detections as well as
|
88 |
+
a list of N track indices and M detection indices. The metric should
|
89 |
+
return the NxM dimensional cost matrix, where element (i, j) is the
|
90 |
+
association cost between the i-th track in the given track indices and
|
91 |
+
the j-th detection in the given detection indices.
|
92 |
+
max_distance : float
|
93 |
+
Gating threshold. Associations with cost larger than this value are
|
94 |
+
disregarded.
|
95 |
+
cascade_depth: int
|
96 |
+
The cascade depth, should be se to the maximum track age.
|
97 |
+
tracks : List[track.Track]
|
98 |
+
A list of predicted tracks at the current time step.
|
99 |
+
detections : List[detection.Detection]
|
100 |
+
A list of detections at the current time step.
|
101 |
+
track_indices : Optional[List[int]]
|
102 |
+
List of track indices that maps rows in `cost_matrix` to tracks in
|
103 |
+
`tracks` (see description above). Defaults to all tracks.
|
104 |
+
detection_indices : Optional[List[int]]
|
105 |
+
List of detection indices that maps columns in `cost_matrix` to
|
106 |
+
detections in `detections` (see description above). Defaults to all
|
107 |
+
detections.
|
108 |
+
|
109 |
+
Returns
|
110 |
+
-------
|
111 |
+
(List[(int, int)], List[int], List[int])
|
112 |
+
Returns a tuple with the following three entries:
|
113 |
+
* A list of matched track and detection indices.
|
114 |
+
* A list of unmatched track indices.
|
115 |
+
* A list of unmatched detection indices.
|
116 |
+
|
117 |
+
"""
|
118 |
+
if track_indices is None:
|
119 |
+
track_indices = list(range(len(tracks)))
|
120 |
+
if detection_indices is None:
|
121 |
+
detection_indices = list(range(len(detections)))
|
122 |
+
|
123 |
+
unmatched_detections = detection_indices
|
124 |
+
matches = []
|
125 |
+
for level in range(cascade_depth):
|
126 |
+
if len(unmatched_detections) == 0: # No detections left
|
127 |
+
break
|
128 |
+
|
129 |
+
track_indices_l = [
|
130 |
+
k for k in track_indices
|
131 |
+
if tracks[k].time_since_update == 1 + level
|
132 |
+
]
|
133 |
+
if len(track_indices_l) == 0: # Nothing to match at this level
|
134 |
+
continue
|
135 |
+
|
136 |
+
matches_l, _, unmatched_detections = \
|
137 |
+
min_cost_matching(
|
138 |
+
distance_metric, max_distance, tracks, detections,
|
139 |
+
track_indices_l, unmatched_detections)
|
140 |
+
matches += matches_l
|
141 |
+
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
|
142 |
+
return matches, unmatched_tracks, unmatched_detections
|
143 |
+
|
144 |
+
|
145 |
+
def gate_cost_matrix(
|
146 |
+
kf, cost_matrix, tracks, detections, track_indices, detection_indices,
|
147 |
+
gated_cost=INFTY_COST, only_position=False):
|
148 |
+
"""Invalidate infeasible entries in cost matrix based on the state
|
149 |
+
distributions obtained by Kalman filtering.
|
150 |
+
|
151 |
+
Parameters
|
152 |
+
----------
|
153 |
+
kf : The Kalman filter.
|
154 |
+
cost_matrix : ndarray
|
155 |
+
The NxM dimensional cost matrix, where N is the number of track indices
|
156 |
+
and M is the number of detection indices, such that entry (i, j) is the
|
157 |
+
association cost between `tracks[track_indices[i]]` and
|
158 |
+
`detections[detection_indices[j]]`.
|
159 |
+
tracks : List[track.Track]
|
160 |
+
A list of predicted tracks at the current time step.
|
161 |
+
detections : List[detection.Detection]
|
162 |
+
A list of detections at the current time step.
|
163 |
+
track_indices : List[int]
|
164 |
+
List of track indices that maps rows in `cost_matrix` to tracks in
|
165 |
+
`tracks` (see description above).
|
166 |
+
detection_indices : List[int]
|
167 |
+
List of detection indices that maps columns in `cost_matrix` to
|
168 |
+
detections in `detections` (see description above).
|
169 |
+
gated_cost : Optional[float]
|
170 |
+
Entries in the cost matrix corresponding to infeasible associations are
|
171 |
+
set this value. Defaults to a very large value.
|
172 |
+
only_position : Optional[bool]
|
173 |
+
If True, only the x, y position of the state distribution is considered
|
174 |
+
during gating. Defaults to False.
|
175 |
+
|
176 |
+
Returns
|
177 |
+
-------
|
178 |
+
ndarray
|
179 |
+
Returns the modified cost matrix.
|
180 |
+
|
181 |
+
"""
|
182 |
+
gating_dim = 2 if only_position else 4
|
183 |
+
gating_threshold = kalman_filter.chi2inv95[gating_dim]
|
184 |
+
measurements = np.asarray(
|
185 |
+
[detections[i].to_xyah() for i in detection_indices])
|
186 |
+
for row, track_idx in enumerate(track_indices):
|
187 |
+
track = tracks[track_idx]
|
188 |
+
gating_distance = kf.gating_distance(
|
189 |
+
track.mean, track.covariance, measurements, only_position)
|
190 |
+
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
191 |
+
return cost_matrix
|
deep_sort/nn_matching.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
def _pdist(a, b):
|
6 |
+
"""Compute pair-wise squared distance between points in `a` and `b`.
|
7 |
+
|
8 |
+
Parameters
|
9 |
+
----------
|
10 |
+
a : array_like
|
11 |
+
An NxM matrix of N samples of dimensionality M.
|
12 |
+
b : array_like
|
13 |
+
An LxM matrix of L samples of dimensionality M.
|
14 |
+
|
15 |
+
Returns
|
16 |
+
-------
|
17 |
+
ndarray
|
18 |
+
Returns a matrix of size len(a), len(b) such that eleement (i, j)
|
19 |
+
contains the squared distance between `a[i]` and `b[j]`.
|
20 |
+
|
21 |
+
"""
|
22 |
+
a, b = np.asarray(a), np.asarray(b)
|
23 |
+
if len(a) == 0 or len(b) == 0:
|
24 |
+
return np.zeros((len(a), len(b)))
|
25 |
+
a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
|
26 |
+
r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
|
27 |
+
r2 = np.clip(r2, 0., float(np.inf))
|
28 |
+
return r2
|
29 |
+
|
30 |
+
|
31 |
+
def _cosine_distance(a, b, data_is_normalized=False):
|
32 |
+
"""Compute pair-wise cosine distance between points in `a` and `b`.
|
33 |
+
|
34 |
+
Parameters
|
35 |
+
----------
|
36 |
+
a : array_like
|
37 |
+
An NxM matrix of N samples of dimensionality M.
|
38 |
+
b : array_like
|
39 |
+
An LxM matrix of L samples of dimensionality M.
|
40 |
+
data_is_normalized : Optional[bool]
|
41 |
+
If True, assumes rows in a and b are unit length vectors.
|
42 |
+
Otherwise, a and b are explicitly normalized to lenght 1.
|
43 |
+
|
44 |
+
Returns
|
45 |
+
-------
|
46 |
+
ndarray
|
47 |
+
Returns a matrix of size len(a), len(b) such that eleement (i, j)
|
48 |
+
contains the squared distance between `a[i]` and `b[j]`.
|
49 |
+
|
50 |
+
"""
|
51 |
+
if not data_is_normalized:
|
52 |
+
a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
|
53 |
+
b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
|
54 |
+
return 1. - np.dot(a, b.T)
|
55 |
+
|
56 |
+
|
57 |
+
def _nn_euclidean_distance(x, y):
|
58 |
+
""" Helper function for nearest neighbor distance metric (Euclidean).
|
59 |
+
|
60 |
+
Parameters
|
61 |
+
----------
|
62 |
+
x : ndarray
|
63 |
+
A matrix of N row-vectors (sample points).
|
64 |
+
y : ndarray
|
65 |
+
A matrix of M row-vectors (query points).
|
66 |
+
|
67 |
+
Returns
|
68 |
+
-------
|
69 |
+
ndarray
|
70 |
+
A vector of length M that contains for each entry in `y` the
|
71 |
+
smallest Euclidean distance to a sample in `x`.
|
72 |
+
|
73 |
+
"""
|
74 |
+
distances = _pdist(x, y)
|
75 |
+
return np.maximum(0.0, distances.min(axis=0))
|
76 |
+
|
77 |
+
|
78 |
+
def _nn_cosine_distance(x, y):
|
79 |
+
""" Helper function for nearest neighbor distance metric (cosine).
|
80 |
+
|
81 |
+
Parameters
|
82 |
+
----------
|
83 |
+
x : ndarray
|
84 |
+
A matrix of N row-vectors (sample points).
|
85 |
+
y : ndarray
|
86 |
+
A matrix of M row-vectors (query points).
|
87 |
+
|
88 |
+
Returns
|
89 |
+
-------
|
90 |
+
ndarray
|
91 |
+
A vector of length M that contains for each entry in `y` the
|
92 |
+
smallest cosine distance to a sample in `x`.
|
93 |
+
|
94 |
+
"""
|
95 |
+
distances = _cosine_distance(x, y)
|
96 |
+
return distances.min(axis=0)
|
97 |
+
|
98 |
+
|
99 |
+
class NearestNeighborDistanceMetric(object):
|
100 |
+
"""
|
101 |
+
A nearest neighbor distance metric that, for each target, returns
|
102 |
+
the closest distance to any sample that has been observed so far.
|
103 |
+
|
104 |
+
Parameters
|
105 |
+
----------
|
106 |
+
metric : str
|
107 |
+
Either "euclidean" or "cosine".
|
108 |
+
matching_threshold: float
|
109 |
+
The matching threshold. Samples with larger distance are considered an
|
110 |
+
invalid match.
|
111 |
+
budget : Optional[int]
|
112 |
+
If not None, fix samples per class to at most this number. Removes
|
113 |
+
the oldest samples when the budget is reached.
|
114 |
+
|
115 |
+
Attributes
|
116 |
+
----------
|
117 |
+
samples : Dict[int -> List[ndarray]]
|
118 |
+
A dictionary that maps from target identities to the list of samples
|
119 |
+
that have been observed so far.
|
120 |
+
|
121 |
+
"""
|
122 |
+
|
123 |
+
def __init__(self, metric, matching_threshold, budget=None):
|
124 |
+
|
125 |
+
|
126 |
+
if metric == "euclidean":
|
127 |
+
self._metric = _nn_euclidean_distance
|
128 |
+
elif metric == "cosine":
|
129 |
+
self._metric = _nn_cosine_distance
|
130 |
+
else:
|
131 |
+
raise ValueError(
|
132 |
+
"Invalid metric; must be either 'euclidean' or 'cosine'")
|
133 |
+
self.matching_threshold = matching_threshold
|
134 |
+
self.budget = budget
|
135 |
+
self.samples = {}
|
136 |
+
|
137 |
+
def partial_fit(self, features, targets, active_targets):
|
138 |
+
"""Update the distance metric with new data.
|
139 |
+
|
140 |
+
Parameters
|
141 |
+
----------
|
142 |
+
features : ndarray
|
143 |
+
An NxM matrix of N features of dimensionality M.
|
144 |
+
targets : ndarray
|
145 |
+
An integer array of associated target identities.
|
146 |
+
active_targets : List[int]
|
147 |
+
A list of targets that are currently present in the scene.
|
148 |
+
|
149 |
+
"""
|
150 |
+
for feature, target in zip(features, targets):
|
151 |
+
self.samples.setdefault(target, []).append(feature)
|
152 |
+
if self.budget is not None:
|
153 |
+
self.samples[target] = self.samples[target][-self.budget:]
|
154 |
+
self.samples = {k: self.samples[k] for k in active_targets}
|
155 |
+
|
156 |
+
def distance(self, features, targets):
|
157 |
+
"""Compute distance between features and targets.
|
158 |
+
|
159 |
+
Parameters
|
160 |
+
----------
|
161 |
+
features : ndarray
|
162 |
+
An NxM matrix of N features of dimensionality M.
|
163 |
+
targets : List[int]
|
164 |
+
A list of targets to match the given `features` against.
|
165 |
+
|
166 |
+
Returns
|
167 |
+
-------
|
168 |
+
ndarray
|
169 |
+
Returns a cost matrix of shape len(targets), len(features), where
|
170 |
+
element (i, j) contains the closest squared distance between
|
171 |
+
`targets[i]` and `features[j]`.
|
172 |
+
|
173 |
+
"""
|
174 |
+
cost_matrix = np.zeros((len(targets), len(features)))
|
175 |
+
for i, target in enumerate(targets):
|
176 |
+
cost_matrix[i, :] = self._metric(self.samples[target], features)
|
177 |
+
return cost_matrix
|
deep_sort/preprocessing.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
+
|
5 |
+
|
6 |
+
def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
|
7 |
+
"""Suppress overlapping detections.
|
8 |
+
|
9 |
+
Original code from [1]_ has been adapted to include confidence score.
|
10 |
+
|
11 |
+
.. [1] http://www.pyimagesearch.com/2015/02/16/
|
12 |
+
faster-non-maximum-suppression-python/
|
13 |
+
|
14 |
+
Examples
|
15 |
+
--------
|
16 |
+
|
17 |
+
>>> boxes = [d.roi for d in detections]
|
18 |
+
>>> classes = [d.classes for d in detections]
|
19 |
+
>>> scores = [d.confidence for d in detections]
|
20 |
+
>>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
|
21 |
+
>>> detections = [detections[i] for i in indices]
|
22 |
+
|
23 |
+
Parameters
|
24 |
+
----------
|
25 |
+
boxes : ndarray
|
26 |
+
Array of ROIs (x, y, width, height).
|
27 |
+
max_bbox_overlap : float
|
28 |
+
ROIs that overlap more than this values are suppressed.
|
29 |
+
scores : Optional[array_like]
|
30 |
+
Detector confidence score.
|
31 |
+
|
32 |
+
Returns
|
33 |
+
-------
|
34 |
+
List[int]
|
35 |
+
Returns indices of detections that have survived non-maxima suppression.
|
36 |
+
|
37 |
+
"""
|
38 |
+
if len(boxes) == 0:
|
39 |
+
return []
|
40 |
+
|
41 |
+
boxes = boxes.astype(np.float)
|
42 |
+
pick = []
|
43 |
+
|
44 |
+
x1 = boxes[:, 0]
|
45 |
+
y1 = boxes[:, 1]
|
46 |
+
x2 = boxes[:, 2] + boxes[:, 0]
|
47 |
+
y2 = boxes[:, 3] + boxes[:, 1]
|
48 |
+
|
49 |
+
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
50 |
+
if scores is not None:
|
51 |
+
idxs = np.argsort(scores)
|
52 |
+
else:
|
53 |
+
idxs = np.argsort(y2)
|
54 |
+
|
55 |
+
while len(idxs) > 0:
|
56 |
+
last = len(idxs) - 1
|
57 |
+
i = idxs[last]
|
58 |
+
pick.append(i)
|
59 |
+
|
60 |
+
xx1 = np.maximum(x1[i], x1[idxs[:last]])
|
61 |
+
yy1 = np.maximum(y1[i], y1[idxs[:last]])
|
62 |
+
xx2 = np.minimum(x2[i], x2[idxs[:last]])
|
63 |
+
yy2 = np.minimum(y2[i], y2[idxs[:last]])
|
64 |
+
|
65 |
+
w = np.maximum(0, xx2 - xx1 + 1)
|
66 |
+
h = np.maximum(0, yy2 - yy1 + 1)
|
67 |
+
|
68 |
+
overlap = (w * h) / area[idxs[:last]]
|
69 |
+
|
70 |
+
idxs = np.delete(
|
71 |
+
idxs, np.concatenate(
|
72 |
+
([last], np.where(overlap > max_bbox_overlap)[0])))
|
73 |
+
|
74 |
+
return pick
|
deep_sort/track.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
|
3 |
+
|
4 |
+
class TrackState:
|
5 |
+
"""
|
6 |
+
Enumeration type for the single target track state. Newly created tracks are
|
7 |
+
classified as `tentative` until enough evidence has been collected. Then,
|
8 |
+
the track state is changed to `confirmed`. Tracks that are no longer alive
|
9 |
+
are classified as `deleted` to mark them for removal from the set of active
|
10 |
+
tracks.
|
11 |
+
|
12 |
+
"""
|
13 |
+
|
14 |
+
Tentative = 1
|
15 |
+
Confirmed = 2
|
16 |
+
Deleted = 3
|
17 |
+
|
18 |
+
|
19 |
+
class Track:
|
20 |
+
"""
|
21 |
+
A single target track with state space `(x, y, a, h)` and associated
|
22 |
+
velocities, where `(x, y)` is the center of the bounding box, `a` is the
|
23 |
+
aspect ratio and `h` is the height.
|
24 |
+
|
25 |
+
Parameters
|
26 |
+
----------
|
27 |
+
mean : ndarray
|
28 |
+
Mean vector of the initial state distribution.
|
29 |
+
covariance : ndarray
|
30 |
+
Covariance matrix of the initial state distribution.
|
31 |
+
track_id : int
|
32 |
+
A unique track identifier.
|
33 |
+
n_init : int
|
34 |
+
Number of consecutive detections before the track is confirmed. The
|
35 |
+
track state is set to `Deleted` if a miss occurs within the first
|
36 |
+
`n_init` frames.
|
37 |
+
max_age : int
|
38 |
+
The maximum number of consecutive misses before the track state is
|
39 |
+
set to `Deleted`.
|
40 |
+
feature : Optional[ndarray]
|
41 |
+
Feature vector of the detection this track originates from. If not None,
|
42 |
+
this feature is added to the `features` cache.
|
43 |
+
|
44 |
+
Attributes
|
45 |
+
----------
|
46 |
+
mean : ndarray
|
47 |
+
Mean vector of the initial state distribution.
|
48 |
+
covariance : ndarray
|
49 |
+
Covariance matrix of the initial state distribution.
|
50 |
+
track_id : int
|
51 |
+
A unique track identifier.
|
52 |
+
hits : int
|
53 |
+
Total number of measurement updates.
|
54 |
+
age : int
|
55 |
+
Total number of frames since first occurance.
|
56 |
+
time_since_update : int
|
57 |
+
Total number of frames since last measurement update.
|
58 |
+
state : TrackState
|
59 |
+
The current track state.
|
60 |
+
features : List[ndarray]
|
61 |
+
A cache of features. On each measurement update, the associated feature
|
62 |
+
vector is added to this list.
|
63 |
+
|
64 |
+
"""
|
65 |
+
|
66 |
+
def __init__(self, mean, covariance, track_id, n_init, max_age,
|
67 |
+
feature=None, class_name=None):
|
68 |
+
self.mean = mean
|
69 |
+
self.covariance = covariance
|
70 |
+
self.track_id = track_id
|
71 |
+
self.hits = 1
|
72 |
+
self.age = 1
|
73 |
+
self.time_since_update = 0
|
74 |
+
|
75 |
+
self.state = TrackState.Tentative
|
76 |
+
self.features = []
|
77 |
+
if feature is not None:
|
78 |
+
self.features.append(feature)
|
79 |
+
|
80 |
+
self._n_init = n_init
|
81 |
+
self._max_age = max_age
|
82 |
+
self.class_name = class_name
|
83 |
+
|
84 |
+
def to_tlwh(self):
|
85 |
+
"""Get current position in bounding box format `(top left x, top left y,
|
86 |
+
width, height)`.
|
87 |
+
|
88 |
+
Returns
|
89 |
+
-------
|
90 |
+
ndarray
|
91 |
+
The bounding box.
|
92 |
+
|
93 |
+
"""
|
94 |
+
ret = self.mean[:4].copy()
|
95 |
+
ret[2] *= ret[3]
|
96 |
+
ret[:2] -= ret[2:] / 2
|
97 |
+
return ret
|
98 |
+
|
99 |
+
def to_tlbr(self):
|
100 |
+
"""Get current position in bounding box format `(min x, miny, max x,
|
101 |
+
max y)`.
|
102 |
+
|
103 |
+
Returns
|
104 |
+
-------
|
105 |
+
ndarray
|
106 |
+
The bounding box.
|
107 |
+
|
108 |
+
"""
|
109 |
+
ret = self.to_tlwh()
|
110 |
+
ret[2:] = ret[:2] + ret[2:]
|
111 |
+
return ret
|
112 |
+
|
113 |
+
def get_class(self):
|
114 |
+
return self.class_name
|
115 |
+
|
116 |
+
def predict(self, kf):
|
117 |
+
"""Propagate the state distribution to the current time step using a
|
118 |
+
Kalman filter prediction step.
|
119 |
+
|
120 |
+
Parameters
|
121 |
+
----------
|
122 |
+
kf : kalman_filter.KalmanFilter
|
123 |
+
The Kalman filter.
|
124 |
+
|
125 |
+
"""
|
126 |
+
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
|
127 |
+
self.age += 1
|
128 |
+
self.time_since_update += 1
|
129 |
+
|
130 |
+
def update(self, kf, detection):
|
131 |
+
"""Perform Kalman filter measurement update step and update the feature
|
132 |
+
cache.
|
133 |
+
|
134 |
+
Parameters
|
135 |
+
----------
|
136 |
+
kf : kalman_filter.KalmanFilter
|
137 |
+
The Kalman filter.
|
138 |
+
detection : Detection
|
139 |
+
The associated detection.
|
140 |
+
|
141 |
+
"""
|
142 |
+
self.mean, self.covariance = kf.update(
|
143 |
+
self.mean, self.covariance, detection.to_xyah())
|
144 |
+
self.features.append(detection.feature)
|
145 |
+
|
146 |
+
self.hits += 1
|
147 |
+
self.time_since_update = 0
|
148 |
+
if self.state == TrackState.Tentative and self.hits >= self._n_init:
|
149 |
+
self.state = TrackState.Confirmed
|
150 |
+
|
151 |
+
def mark_missed(self):
|
152 |
+
"""Mark this track as missed (no association at the current time step).
|
153 |
+
"""
|
154 |
+
if self.state == TrackState.Tentative:
|
155 |
+
self.state = TrackState.Deleted
|
156 |
+
elif self.time_since_update > self._max_age:
|
157 |
+
self.state = TrackState.Deleted
|
158 |
+
|
159 |
+
def is_tentative(self):
|
160 |
+
"""Returns True if this track is tentative (unconfirmed).
|
161 |
+
"""
|
162 |
+
return self.state == TrackState.Tentative
|
163 |
+
|
164 |
+
def is_confirmed(self):
|
165 |
+
"""Returns True if this track is confirmed."""
|
166 |
+
return self.state == TrackState.Confirmed
|
167 |
+
|
168 |
+
def is_deleted(self):
|
169 |
+
"""Returns True if this track is dead and should be deleted."""
|
170 |
+
return self.state == TrackState.Deleted
|
deep_sort/tracker.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
from __future__ import absolute_import
|
3 |
+
import numpy as np
|
4 |
+
from . import kalman_filter
|
5 |
+
from . import linear_assignment
|
6 |
+
from . import iou_matching
|
7 |
+
from .track import Track
|
8 |
+
|
9 |
+
|
10 |
+
class Tracker:
|
11 |
+
"""
|
12 |
+
This is the multi-target tracker.
|
13 |
+
|
14 |
+
Parameters
|
15 |
+
----------
|
16 |
+
metric : nn_matching.NearestNeighborDistanceMetric
|
17 |
+
A distance metric for measurement-to-track association.
|
18 |
+
max_age : int
|
19 |
+
Maximum number of missed misses before a track is deleted.
|
20 |
+
n_init : int
|
21 |
+
Number of consecutive detections before the track is confirmed. The
|
22 |
+
track state is set to `Deleted` if a miss occurs within the first
|
23 |
+
`n_init` frames.
|
24 |
+
|
25 |
+
Attributes
|
26 |
+
----------
|
27 |
+
metric : nn_matching.NearestNeighborDistanceMetric
|
28 |
+
The distance metric used for measurement to track association.
|
29 |
+
max_age : int
|
30 |
+
Maximum number of missed misses before a track is deleted.
|
31 |
+
n_init : int
|
32 |
+
Number of frames that a track remains in initialization phase.
|
33 |
+
kf : kalman_filter.KalmanFilter
|
34 |
+
A Kalman filter to filter target trajectories in image space.
|
35 |
+
tracks : List[Track]
|
36 |
+
The list of active tracks at the current time step.
|
37 |
+
|
38 |
+
"""
|
39 |
+
|
40 |
+
def __init__(self, metric, max_iou_distance=0.7, max_age=60, n_init=3):
|
41 |
+
self.metric = metric
|
42 |
+
self.max_iou_distance = max_iou_distance
|
43 |
+
self.max_age = max_age
|
44 |
+
self.n_init = n_init
|
45 |
+
|
46 |
+
self.kf = kalman_filter.KalmanFilter()
|
47 |
+
self.tracks = []
|
48 |
+
self._next_id = 1
|
49 |
+
|
50 |
+
def predict(self):
|
51 |
+
"""Propagate track state distributions one time step forward.
|
52 |
+
|
53 |
+
This function should be called once every time step, before `update`.
|
54 |
+
"""
|
55 |
+
for track in self.tracks:
|
56 |
+
track.predict(self.kf)
|
57 |
+
|
58 |
+
def update(self, detections):
|
59 |
+
"""Perform measurement update and track management.
|
60 |
+
|
61 |
+
Parameters
|
62 |
+
----------
|
63 |
+
detections : List[deep_sort.detection.Detection]
|
64 |
+
A list of detections at the current time step.
|
65 |
+
|
66 |
+
"""
|
67 |
+
# Run matching cascade.
|
68 |
+
matches, unmatched_tracks, unmatched_detections = \
|
69 |
+
self._match(detections)
|
70 |
+
|
71 |
+
# Update track set.
|
72 |
+
for track_idx, detection_idx in matches:
|
73 |
+
self.tracks[track_idx].update(
|
74 |
+
self.kf, detections[detection_idx])
|
75 |
+
for track_idx in unmatched_tracks:
|
76 |
+
self.tracks[track_idx].mark_missed()
|
77 |
+
for detection_idx in unmatched_detections:
|
78 |
+
self._initiate_track(detections[detection_idx])
|
79 |
+
self.tracks = [t for t in self.tracks if not t.is_deleted()]
|
80 |
+
|
81 |
+
# Update distance metric.
|
82 |
+
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
|
83 |
+
features, targets = [], []
|
84 |
+
for track in self.tracks:
|
85 |
+
if not track.is_confirmed():
|
86 |
+
continue
|
87 |
+
features += track.features
|
88 |
+
targets += [track.track_id for _ in track.features]
|
89 |
+
track.features = []
|
90 |
+
self.metric.partial_fit(
|
91 |
+
np.asarray(features), np.asarray(targets), active_targets)
|
92 |
+
|
93 |
+
def _match(self, detections):
|
94 |
+
|
95 |
+
def gated_metric(tracks, dets, track_indices, detection_indices):
|
96 |
+
features = np.array([dets[i].feature for i in detection_indices])
|
97 |
+
targets = np.array([tracks[i].track_id for i in track_indices])
|
98 |
+
cost_matrix = self.metric.distance(features, targets)
|
99 |
+
cost_matrix = linear_assignment.gate_cost_matrix(
|
100 |
+
self.kf, cost_matrix, tracks, dets, track_indices,
|
101 |
+
detection_indices)
|
102 |
+
|
103 |
+
return cost_matrix
|
104 |
+
|
105 |
+
# Split track set into confirmed and unconfirmed tracks.
|
106 |
+
confirmed_tracks = [
|
107 |
+
i for i, t in enumerate(self.tracks) if t.is_confirmed()]
|
108 |
+
unconfirmed_tracks = [
|
109 |
+
i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
|
110 |
+
|
111 |
+
# Associate confirmed tracks using appearance features.
|
112 |
+
matches_a, unmatched_tracks_a, unmatched_detections = \
|
113 |
+
linear_assignment.matching_cascade(
|
114 |
+
gated_metric, self.metric.matching_threshold, self.max_age,
|
115 |
+
self.tracks, detections, confirmed_tracks)
|
116 |
+
|
117 |
+
# Associate remaining tracks together with unconfirmed tracks using IOU.
|
118 |
+
iou_track_candidates = unconfirmed_tracks + [
|
119 |
+
k for k in unmatched_tracks_a if
|
120 |
+
self.tracks[k].time_since_update == 1]
|
121 |
+
unmatched_tracks_a = [
|
122 |
+
k for k in unmatched_tracks_a if
|
123 |
+
self.tracks[k].time_since_update != 1]
|
124 |
+
matches_b, unmatched_tracks_b, unmatched_detections = \
|
125 |
+
linear_assignment.min_cost_matching(
|
126 |
+
iou_matching.iou_cost, self.max_iou_distance, self.tracks,
|
127 |
+
detections, iou_track_candidates, unmatched_detections)
|
128 |
+
|
129 |
+
matches = matches_a + matches_b
|
130 |
+
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
|
131 |
+
return matches, unmatched_tracks, unmatched_detections
|
132 |
+
|
133 |
+
def _initiate_track(self, detection):
|
134 |
+
mean, covariance = self.kf.initiate(detection.to_xyah())
|
135 |
+
class_name = detection.get_class()
|
136 |
+
self.tracks.append(Track(
|
137 |
+
mean, covariance, self._next_id, self.n_init, self.max_age,
|
138 |
+
detection.feature, class_name))
|
139 |
+
self._next_id += 1
|
image/test
ADDED
File without changes
|
model_data/best.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:834cf683bc111bfe901203be83b65b1aa7267c07e8e75ee74ac359408c6ea46a
|
3 |
+
size 22552601
|
model_data/yolov8m.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c25b0b63b1a433843f06d821a9ac1deb8d5805f74f0f38772c7308c5adc55a5
|
3 |
+
size 52117635
|
object_tracker_demo.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
from ultralytics import YOLO
|
6 |
+
from tracker import Tracker
|
7 |
+
|
8 |
+
id2label = {0: "person", 1: "bicycle", 2:"car", 3:"motocycle", 5:"bus", 7:"truck"}
|
9 |
+
palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
|
10 |
+
# object_counter = {"person": 0, "car": 0, "motobike": 0, "bus": 0, "truck": 0, "other": 0}
|
11 |
+
object_ids = {"person": [], "bicycle": [], "car": [], "motocycle": [], "bus": [], "truck": [], "other": []}
|
12 |
+
|
13 |
+
video_path = os.path.join('.', 'video', 'NgaTu_01.mp4')
|
14 |
+
# video_out_path = os.path.join('.', 'out.mp4')
|
15 |
+
|
16 |
+
cap = cv2.VideoCapture(video_path)
|
17 |
+
ret, frame = cap.read()
|
18 |
+
|
19 |
+
cap_out = cv2.VideoWriter('out.avi', cv2.VideoWriter_fourcc(*'MJPG'), cap.get(cv2.CAP_PROP_FPS), (frame.shape[1], frame.shape[0]))
|
20 |
+
|
21 |
+
model = YOLO('model_data/yolov8m.pt')
|
22 |
+
|
23 |
+
tracker = Tracker()
|
24 |
+
|
25 |
+
colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for j in range(10)]
|
26 |
+
|
27 |
+
detection_threshold = 0.7
|
28 |
+
|
29 |
+
|
30 |
+
def compute_color_for_labels(label):
|
31 |
+
|
32 |
+
if label == 0: # person
|
33 |
+
color = (85, 45, 255)
|
34 |
+
elif label == 2: # Car
|
35 |
+
color = (222, 82, 175)
|
36 |
+
elif label == 3: # Motobike
|
37 |
+
color = (0, 204, 255)
|
38 |
+
elif label == 5: # Bus
|
39 |
+
color = (0, 149, 255)
|
40 |
+
else:
|
41 |
+
color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
|
42 |
+
return tuple(color)
|
43 |
+
|
44 |
+
while ret:
|
45 |
+
|
46 |
+
results = model.predict(frame)
|
47 |
+
|
48 |
+
for result in results:
|
49 |
+
detections = []
|
50 |
+
for r in result.boxes.data.tolist():
|
51 |
+
x1, y1, x2, y2, score, class_id = r
|
52 |
+
x1 = int(x1)
|
53 |
+
x2 = int(x2)
|
54 |
+
y1 = int(y1)
|
55 |
+
y2 = int(y2)
|
56 |
+
class_id = int(class_id)
|
57 |
+
if score > detection_threshold:
|
58 |
+
detections.append([x1, y1, x2, y2, class_id, score])
|
59 |
+
|
60 |
+
|
61 |
+
tracker.update(frame, detections)
|
62 |
+
|
63 |
+
for track in tracker.tracks:
|
64 |
+
bbox = track.bbox
|
65 |
+
x1, y1, x2, y2 = bbox
|
66 |
+
track_id = track.track_id
|
67 |
+
class_id = track.class_id
|
68 |
+
|
69 |
+
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (compute_color_for_labels(class_id)), 3)
|
70 |
+
|
71 |
+
label_name = id2label[class_id] if class_id in id2label.keys() else "other"
|
72 |
+
if track_id not in object_ids[label_name]:
|
73 |
+
object_ids[label_name].append(track_id)
|
74 |
+
|
75 |
+
cv2.putText(frame,f"{label_name}-{track_id}",
|
76 |
+
(int(x1) + 5, int(y1) - 5),
|
77 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA )
|
78 |
+
|
79 |
+
cap_out.write(frame)
|
80 |
+
cv2.imshow('frame',frame)
|
81 |
+
cv2.waitKey(2)
|
82 |
+
|
83 |
+
ret, frame = cap.read()
|
84 |
+
|
85 |
+
cap.release()
|
86 |
+
cv2.destroyAllWindows()
|
87 |
+
|
88 |
+
|
89 |
+
print(object_ids)
|
out_video/.gitkeep
ADDED
File without changes
|
output_video.avi
ADDED
Binary file (47.7 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.41.2
|
2 |
+
gradio_client==0.5.0
|
3 |
+
opencv-python==4.8.0.76
|
4 |
+
pandas==2.0.3
|
5 |
+
ultralytics==8.0.178
|
6 |
+
scipy==1.11.2
|
7 |
+
tensorflow==2.11.0
|
8 |
+
firebase_admin
|
testAPI.py
ADDED
File without changes
|
tools/freeze_model.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
import argparse
|
3 |
+
import tensorflow as tf
|
4 |
+
import tensorflow.contrib.slim as slim
|
5 |
+
|
6 |
+
|
7 |
+
def _batch_norm_fn(x, scope=None):
|
8 |
+
if scope is None:
|
9 |
+
scope = tf.get_variable_scope().name + "/bn"
|
10 |
+
return slim.batch_norm(x, scope=scope)
|
11 |
+
|
12 |
+
|
13 |
+
def create_link(
|
14 |
+
incoming, network_builder, scope, nonlinearity=tf.nn.elu,
|
15 |
+
weights_initializer=tf.truncated_normal_initializer(stddev=1e-3),
|
16 |
+
regularizer=None, is_first=False, summarize_activations=True):
|
17 |
+
if is_first:
|
18 |
+
network = incoming
|
19 |
+
else:
|
20 |
+
network = _batch_norm_fn(incoming, scope=scope + "/bn")
|
21 |
+
network = nonlinearity(network)
|
22 |
+
if summarize_activations:
|
23 |
+
tf.summary.histogram(scope+"/activations", network)
|
24 |
+
|
25 |
+
pre_block_network = network
|
26 |
+
post_block_network = network_builder(pre_block_network, scope)
|
27 |
+
|
28 |
+
incoming_dim = pre_block_network.get_shape().as_list()[-1]
|
29 |
+
outgoing_dim = post_block_network.get_shape().as_list()[-1]
|
30 |
+
if incoming_dim != outgoing_dim:
|
31 |
+
assert outgoing_dim == 2 * incoming_dim, \
|
32 |
+
"%d != %d" % (outgoing_dim, 2 * incoming)
|
33 |
+
projection = slim.conv2d(
|
34 |
+
incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None,
|
35 |
+
scope=scope+"/projection", weights_initializer=weights_initializer,
|
36 |
+
biases_initializer=None, weights_regularizer=regularizer)
|
37 |
+
network = projection + post_block_network
|
38 |
+
else:
|
39 |
+
network = incoming + post_block_network
|
40 |
+
return network
|
41 |
+
|
42 |
+
|
43 |
+
def create_inner_block(
|
44 |
+
incoming, scope, nonlinearity=tf.nn.elu,
|
45 |
+
weights_initializer=tf.truncated_normal_initializer(1e-3),
|
46 |
+
bias_initializer=tf.zeros_initializer(), regularizer=None,
|
47 |
+
increase_dim=False, summarize_activations=True):
|
48 |
+
n = incoming.get_shape().as_list()[-1]
|
49 |
+
stride = 1
|
50 |
+
if increase_dim:
|
51 |
+
n *= 2
|
52 |
+
stride = 2
|
53 |
+
|
54 |
+
incoming = slim.conv2d(
|
55 |
+
incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
|
56 |
+
normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
|
57 |
+
biases_initializer=bias_initializer, weights_regularizer=regularizer,
|
58 |
+
scope=scope + "/1")
|
59 |
+
if summarize_activations:
|
60 |
+
tf.summary.histogram(incoming.name + "/activations", incoming)
|
61 |
+
|
62 |
+
incoming = slim.dropout(incoming, keep_prob=0.6)
|
63 |
+
|
64 |
+
incoming = slim.conv2d(
|
65 |
+
incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
|
66 |
+
normalizer_fn=None, weights_initializer=weights_initializer,
|
67 |
+
biases_initializer=bias_initializer, weights_regularizer=regularizer,
|
68 |
+
scope=scope + "/2")
|
69 |
+
return incoming
|
70 |
+
|
71 |
+
|
72 |
+
def residual_block(incoming, scope, nonlinearity=tf.nn.elu,
|
73 |
+
weights_initializer=tf.truncated_normal_initializer(1e3),
|
74 |
+
bias_initializer=tf.zeros_initializer(), regularizer=None,
|
75 |
+
increase_dim=False, is_first=False,
|
76 |
+
summarize_activations=True):
|
77 |
+
|
78 |
+
def network_builder(x, s):
|
79 |
+
return create_inner_block(
|
80 |
+
x, s, nonlinearity, weights_initializer, bias_initializer,
|
81 |
+
regularizer, increase_dim, summarize_activations)
|
82 |
+
|
83 |
+
return create_link(
|
84 |
+
incoming, network_builder, scope, nonlinearity, weights_initializer,
|
85 |
+
regularizer, is_first, summarize_activations)
|
86 |
+
|
87 |
+
|
88 |
+
def _create_network(incoming, reuse=None, weight_decay=1e-8):
|
89 |
+
nonlinearity = tf.nn.elu
|
90 |
+
conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
|
91 |
+
conv_bias_init = tf.zeros_initializer()
|
92 |
+
conv_regularizer = slim.l2_regularizer(weight_decay)
|
93 |
+
fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
|
94 |
+
fc_bias_init = tf.zeros_initializer()
|
95 |
+
fc_regularizer = slim.l2_regularizer(weight_decay)
|
96 |
+
|
97 |
+
def batch_norm_fn(x):
|
98 |
+
return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")
|
99 |
+
|
100 |
+
network = incoming
|
101 |
+
network = slim.conv2d(
|
102 |
+
network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
|
103 |
+
padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1",
|
104 |
+
weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
|
105 |
+
weights_regularizer=conv_regularizer)
|
106 |
+
network = slim.conv2d(
|
107 |
+
network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
|
108 |
+
padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2",
|
109 |
+
weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
|
110 |
+
weights_regularizer=conv_regularizer)
|
111 |
+
|
112 |
+
# NOTE(nwojke): This is missing a padding="SAME" to match the CNN
|
113 |
+
# architecture in Table 1 of the paper. Information on how this affects
|
114 |
+
# performance on MOT 16 training sequences can be found in
|
115 |
+
# issue 10 https://github.com/nwojke/deep_sort/issues/10
|
116 |
+
network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")
|
117 |
+
|
118 |
+
network = residual_block(
|
119 |
+
network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init,
|
120 |
+
conv_regularizer, increase_dim=False, is_first=True)
|
121 |
+
network = residual_block(
|
122 |
+
network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init,
|
123 |
+
conv_regularizer, increase_dim=False)
|
124 |
+
|
125 |
+
network = residual_block(
|
126 |
+
network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init,
|
127 |
+
conv_regularizer, increase_dim=True)
|
128 |
+
network = residual_block(
|
129 |
+
network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init,
|
130 |
+
conv_regularizer, increase_dim=False)
|
131 |
+
|
132 |
+
network = residual_block(
|
133 |
+
network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init,
|
134 |
+
conv_regularizer, increase_dim=True)
|
135 |
+
network = residual_block(
|
136 |
+
network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init,
|
137 |
+
conv_regularizer, increase_dim=False)
|
138 |
+
|
139 |
+
feature_dim = network.get_shape().as_list()[-1]
|
140 |
+
network = slim.flatten(network)
|
141 |
+
|
142 |
+
network = slim.dropout(network, keep_prob=0.6)
|
143 |
+
network = slim.fully_connected(
|
144 |
+
network, feature_dim, activation_fn=nonlinearity,
|
145 |
+
normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer,
|
146 |
+
scope="fc1", weights_initializer=fc_weight_init,
|
147 |
+
biases_initializer=fc_bias_init)
|
148 |
+
|
149 |
+
features = network
|
150 |
+
|
151 |
+
# Features in rows, normalize axis 1.
|
152 |
+
features = slim.batch_norm(features, scope="ball", reuse=reuse)
|
153 |
+
feature_norm = tf.sqrt(
|
154 |
+
tf.constant(1e-8, tf.float32) +
|
155 |
+
tf.reduce_sum(tf.square(features), [1], keepdims=True))
|
156 |
+
features = features / feature_norm
|
157 |
+
return features, None
|
158 |
+
|
159 |
+
|
160 |
+
def _network_factory(weight_decay=1e-8):
|
161 |
+
|
162 |
+
def factory_fn(image, reuse):
|
163 |
+
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
164 |
+
is_training=False):
|
165 |
+
with slim.arg_scope([slim.conv2d, slim.fully_connected,
|
166 |
+
slim.batch_norm, slim.layer_norm],
|
167 |
+
reuse=reuse):
|
168 |
+
features, logits = _create_network(
|
169 |
+
image, reuse=reuse, weight_decay=weight_decay)
|
170 |
+
return features, logits
|
171 |
+
|
172 |
+
return factory_fn
|
173 |
+
|
174 |
+
|
175 |
+
def _preprocess(image):
|
176 |
+
image = image[:, :, ::-1] # BGR to RGB
|
177 |
+
return image
|
178 |
+
|
179 |
+
|
180 |
+
def parse_args():
|
181 |
+
"""Parse command line arguments.
|
182 |
+
"""
|
183 |
+
parser = argparse.ArgumentParser(description="Freeze old model")
|
184 |
+
parser.add_argument(
|
185 |
+
"--checkpoint_in",
|
186 |
+
default="resources/networks/mars-small128.ckpt-68577",
|
187 |
+
help="Path to checkpoint file")
|
188 |
+
parser.add_argument(
|
189 |
+
"--graphdef_out",
|
190 |
+
default="resources/networks/mars-small128.pb")
|
191 |
+
return parser.parse_args()
|
192 |
+
|
193 |
+
|
194 |
+
def main():
|
195 |
+
args = parse_args()
|
196 |
+
|
197 |
+
with tf.Session(graph=tf.Graph()) as session:
|
198 |
+
input_var = tf.placeholder(
|
199 |
+
tf.uint8, (None, 128, 64, 3), name="images")
|
200 |
+
image_var = tf.map_fn(
|
201 |
+
lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
|
202 |
+
back_prop=False)
|
203 |
+
|
204 |
+
factory_fn = _network_factory()
|
205 |
+
features, _ = factory_fn(image_var, reuse=None)
|
206 |
+
features = tf.identity(features, name="features")
|
207 |
+
|
208 |
+
saver = tf.train.Saver(slim.get_variables_to_restore())
|
209 |
+
saver.restore(session, args.checkpoint_in)
|
210 |
+
|
211 |
+
output_graph_def = tf.graph_util.convert_variables_to_constants(
|
212 |
+
session, tf.get_default_graph().as_graph_def(),
|
213 |
+
[features.name.split(":")[0]])
|
214 |
+
with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle:
|
215 |
+
file_handle.write(output_graph_def.SerializeToString())
|
216 |
+
|
217 |
+
|
218 |
+
if __name__ == "__main__":
|
219 |
+
main()
|
tools/generate_detections.py
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# vim: expandtab:ts=4:sw=4
|
2 |
+
import os
|
3 |
+
import errno
|
4 |
+
import argparse
|
5 |
+
import numpy as np
|
6 |
+
import cv2
|
7 |
+
import tensorflow.compat.v1 as tf
|
8 |
+
|
9 |
+
#tf.compat.v1.disable_eager_execution()
|
10 |
+
|
11 |
+
physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
12 |
+
if len(physical_devices) > 0:
|
13 |
+
tf.config.experimental.set_memory_growth(physical_devices[0], True)
|
14 |
+
|
15 |
+
def _run_in_batches(f, data_dict, out, batch_size):
|
16 |
+
data_len = len(out)
|
17 |
+
num_batches = int(data_len / batch_size)
|
18 |
+
|
19 |
+
s, e = 0, 0
|
20 |
+
for i in range(num_batches):
|
21 |
+
s, e = i * batch_size, (i + 1) * batch_size
|
22 |
+
batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
|
23 |
+
out[s:e] = f(batch_data_dict)
|
24 |
+
if e < len(out):
|
25 |
+
batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
|
26 |
+
out[e:] = f(batch_data_dict)
|
27 |
+
|
28 |
+
|
29 |
+
def extract_image_patch(image, bbox, patch_shape):
|
30 |
+
"""Extract image patch from bounding box.
|
31 |
+
|
32 |
+
Parameters
|
33 |
+
----------
|
34 |
+
image : ndarray
|
35 |
+
The full image.
|
36 |
+
bbox : array_like
|
37 |
+
The bounding box in format (x, y, width, height).
|
38 |
+
patch_shape : Optional[array_like]
|
39 |
+
This parameter can be used to enforce a desired patch shape
|
40 |
+
(height, width). First, the `bbox` is adapted to the aspect ratio
|
41 |
+
of the patch shape, then it is clipped at the image boundaries.
|
42 |
+
If None, the shape is computed from :arg:`bbox`.
|
43 |
+
|
44 |
+
Returns
|
45 |
+
-------
|
46 |
+
ndarray | NoneType
|
47 |
+
An image patch showing the :arg:`bbox`, optionally reshaped to
|
48 |
+
:arg:`patch_shape`.
|
49 |
+
Returns None if the bounding box is empty or fully outside of the image
|
50 |
+
boundaries.
|
51 |
+
|
52 |
+
"""
|
53 |
+
bbox = np.array(bbox)
|
54 |
+
if patch_shape is not None:
|
55 |
+
# correct aspect ratio to patch shape
|
56 |
+
target_aspect = float(patch_shape[1]) / patch_shape[0]
|
57 |
+
new_width = target_aspect * bbox[3]
|
58 |
+
bbox[0] -= (new_width - bbox[2]) / 2
|
59 |
+
bbox[2] = new_width
|
60 |
+
|
61 |
+
# convert to top left, bottom right
|
62 |
+
bbox[2:] += bbox[:2]
|
63 |
+
bbox = bbox.astype(np.int32)
|
64 |
+
|
65 |
+
# clip at image boundaries
|
66 |
+
bbox[:2] = np.maximum(0, bbox[:2])
|
67 |
+
bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
|
68 |
+
if np.any(bbox[:2] >= bbox[2:]):
|
69 |
+
return None
|
70 |
+
sx, sy, ex, ey = bbox
|
71 |
+
image = image[sy:ey, sx:ex]
|
72 |
+
image = cv2.resize(image, tuple(patch_shape[::-1]))
|
73 |
+
return image
|
74 |
+
|
75 |
+
|
76 |
+
class ImageEncoder(object):
|
77 |
+
|
78 |
+
def __init__(self, checkpoint_filename, input_name="images",
|
79 |
+
output_name="features"):
|
80 |
+
self.session = tf.Session()
|
81 |
+
with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
|
82 |
+
graph_def = tf.GraphDef()
|
83 |
+
graph_def.ParseFromString(file_handle.read())
|
84 |
+
tf.import_graph_def(graph_def, name="net")
|
85 |
+
self.input_var = tf.get_default_graph().get_tensor_by_name(
|
86 |
+
"%s:0" % input_name)
|
87 |
+
self.output_var = tf.get_default_graph().get_tensor_by_name(
|
88 |
+
"%s:0" % output_name)
|
89 |
+
|
90 |
+
assert len(self.output_var.get_shape()) == 2
|
91 |
+
assert len(self.input_var.get_shape()) == 4
|
92 |
+
self.feature_dim = self.output_var.get_shape().as_list()[-1]
|
93 |
+
self.image_shape = self.input_var.get_shape().as_list()[1:]
|
94 |
+
|
95 |
+
def __call__(self, data_x, batch_size=32):
|
96 |
+
out = np.zeros((len(data_x), self.feature_dim), np.float32)
|
97 |
+
_run_in_batches(
|
98 |
+
lambda x: self.session.run(self.output_var, feed_dict=x),
|
99 |
+
{self.input_var: data_x}, out, batch_size)
|
100 |
+
return out
|
101 |
+
|
102 |
+
|
103 |
+
def create_box_encoder(model_filename, input_name="images",
|
104 |
+
output_name="features", batch_size=32):
|
105 |
+
image_encoder = ImageEncoder(model_filename, input_name, output_name)
|
106 |
+
image_shape = image_encoder.image_shape
|
107 |
+
|
108 |
+
def encoder(image, boxes):
|
109 |
+
image_patches = []
|
110 |
+
for box in boxes:
|
111 |
+
patch = extract_image_patch(image, box, image_shape[:2])
|
112 |
+
if patch is None:
|
113 |
+
print("WARNING: Failed to extract image patch: %s." % str(box))
|
114 |
+
patch = np.random.uniform(
|
115 |
+
0., 255., image_shape).astype(np.uint8)
|
116 |
+
image_patches.append(patch)
|
117 |
+
image_patches = np.asarray(image_patches)
|
118 |
+
return image_encoder(image_patches, batch_size)
|
119 |
+
|
120 |
+
return encoder
|
121 |
+
|
122 |
+
|
123 |
+
def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
|
124 |
+
"""Generate detections with features.
|
125 |
+
|
126 |
+
Parameters
|
127 |
+
----------
|
128 |
+
encoder : Callable[image, ndarray] -> ndarray
|
129 |
+
The encoder function takes as input a BGR color image and a matrix of
|
130 |
+
bounding boxes in format `(x, y, w, h)` and returns a matrix of
|
131 |
+
corresponding feature vectors.
|
132 |
+
mot_dir : str
|
133 |
+
Path to the MOTChallenge directory (can be either train or test).
|
134 |
+
output_dir
|
135 |
+
Path to the output directory. Will be created if it does not exist.
|
136 |
+
detection_dir
|
137 |
+
Path to custom detections. The directory structure should be the default
|
138 |
+
MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
|
139 |
+
standard MOTChallenge detections.
|
140 |
+
|
141 |
+
"""
|
142 |
+
if detection_dir is None:
|
143 |
+
detection_dir = mot_dir
|
144 |
+
try:
|
145 |
+
os.makedirs(output_dir)
|
146 |
+
except OSError as exception:
|
147 |
+
if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
|
148 |
+
pass
|
149 |
+
else:
|
150 |
+
raise ValueError(
|
151 |
+
"Failed to created output directory '%s'" % output_dir)
|
152 |
+
|
153 |
+
for sequence in os.listdir(mot_dir):
|
154 |
+
print("Processing %s" % sequence)
|
155 |
+
sequence_dir = os.path.join(mot_dir, sequence)
|
156 |
+
|
157 |
+
image_dir = os.path.join(sequence_dir, "img1")
|
158 |
+
image_filenames = {
|
159 |
+
int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
|
160 |
+
for f in os.listdir(image_dir)}
|
161 |
+
|
162 |
+
detection_file = os.path.join(
|
163 |
+
detection_dir, sequence, "det/det.txt")
|
164 |
+
detections_in = np.loadtxt(detection_file, delimiter=',')
|
165 |
+
detections_out = []
|
166 |
+
|
167 |
+
frame_indices = detections_in[:, 0].astype(np.int32)
|
168 |
+
min_frame_idx = frame_indices.astype(np.int32).min()
|
169 |
+
max_frame_idx = frame_indices.astype(np.int32).max()
|
170 |
+
for frame_idx in range(min_frame_idx, max_frame_idx + 1):
|
171 |
+
print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
|
172 |
+
mask = frame_indices == frame_idx
|
173 |
+
rows = detections_in[mask]
|
174 |
+
|
175 |
+
if frame_idx not in image_filenames:
|
176 |
+
print("WARNING could not find image for frame %d" % frame_idx)
|
177 |
+
continue
|
178 |
+
bgr_image = cv2.imread(
|
179 |
+
image_filenames[frame_idx], cv2.IMREAD_COLOR)
|
180 |
+
features = encoder(bgr_image, rows[:, 2:6].copy())
|
181 |
+
detections_out += [np.r_[(row, feature)] for row, feature
|
182 |
+
in zip(rows, features)]
|
183 |
+
|
184 |
+
output_filename = os.path.join(output_dir, "%s.npy" % sequence)
|
185 |
+
np.save(
|
186 |
+
output_filename, np.asarray(detections_out), allow_pickle=False)
|
187 |
+
|
188 |
+
|
189 |
+
def parse_args():
|
190 |
+
"""Parse command line arguments.
|
191 |
+
"""
|
192 |
+
parser = argparse.ArgumentParser(description="Re-ID feature extractor")
|
193 |
+
parser.add_argument(
|
194 |
+
"--model",
|
195 |
+
default="resources/networks/mars-small128.pb",
|
196 |
+
help="Path to freezed inference graph protobuf.")
|
197 |
+
parser.add_argument(
|
198 |
+
"--mot_dir", help="Path to MOTChallenge directory (train or test)",
|
199 |
+
required=True)
|
200 |
+
parser.add_argument(
|
201 |
+
"--detection_dir", help="Path to custom detections. Defaults to "
|
202 |
+
"standard MOT detections Directory structure should be the default "
|
203 |
+
"MOTChallenge structure: [sequence]/det/det.txt", default=None)
|
204 |
+
parser.add_argument(
|
205 |
+
"--output_dir", help="Output directory. Will be created if it does not"
|
206 |
+
" exist.", default="detections")
|
207 |
+
return parser.parse_args()
|
208 |
+
|
209 |
+
|
210 |
+
def main():
|
211 |
+
args = parse_args()
|
212 |
+
encoder = create_box_encoder(args.model, batch_size=32)
|
213 |
+
generate_detections(encoder, args.mot_dir, args.output_dir,
|
214 |
+
args.detection_dir)
|
215 |
+
|
216 |
+
|
217 |
+
if __name__ == "__main__":
|
218 |
+
main()
|
tracker.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from deep_sort.tracker import Tracker as DeepSortTracker
|
2 |
+
from tools import generate_detections as gdet
|
3 |
+
from deep_sort import nn_matching
|
4 |
+
from deep_sort.detection import Detection
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
|
8 |
+
class Tracker:
|
9 |
+
tracker = None
|
10 |
+
encoder = None
|
11 |
+
tracks = None
|
12 |
+
|
13 |
+
def __init__(self):
|
14 |
+
max_cosine_distance = 0.4
|
15 |
+
nn_budget = None
|
16 |
+
|
17 |
+
encoder_model_filename = 'model_data/mars-small128.pb'
|
18 |
+
|
19 |
+
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
|
20 |
+
self.tracker = DeepSortTracker(metric)
|
21 |
+
self.encoder = gdet.create_box_encoder(encoder_model_filename, batch_size=1)
|
22 |
+
|
23 |
+
def update(self, frame, detections):
|
24 |
+
|
25 |
+
if len(detections) == 0:
|
26 |
+
self.tracker.predict()
|
27 |
+
self.tracker.update([])
|
28 |
+
self.update_tracks()
|
29 |
+
return
|
30 |
+
|
31 |
+
bboxes = np.asarray([d[:-2] for d in detections])
|
32 |
+
bboxes[:, 2:] = bboxes[:, 2:] - bboxes[:, 0:2]
|
33 |
+
scores = [d[-1] for d in detections]
|
34 |
+
class_ids = [d[-2] for d in detections]
|
35 |
+
features = self.encoder(frame, bboxes)
|
36 |
+
|
37 |
+
dets = []
|
38 |
+
for bbox_id, bbox in enumerate(bboxes):
|
39 |
+
dets.append(Detection(bbox, scores[bbox_id], class_ids[bbox_id], features[bbox_id]))
|
40 |
+
|
41 |
+
self.tracker.predict()
|
42 |
+
self.tracker.update(dets)
|
43 |
+
self.update_tracks()
|
44 |
+
|
45 |
+
def update_tracks(self):
|
46 |
+
tracks = []
|
47 |
+
for track in self.tracker.tracks:
|
48 |
+
if not track.is_confirmed() or track.time_since_update > 1:
|
49 |
+
continue
|
50 |
+
bbox = track.to_tlbr()
|
51 |
+
class_id = track.get_class()
|
52 |
+
id = track.track_id
|
53 |
+
|
54 |
+
tracks.append(Track(id, bbox, class_id))
|
55 |
+
|
56 |
+
self.tracks = tracks
|
57 |
+
|
58 |
+
|
59 |
+
class Track:
|
60 |
+
track_id = None
|
61 |
+
bbox = None
|
62 |
+
class_id = None
|
63 |
+
|
64 |
+
def __init__(self, id, bbox, class_id):
|
65 |
+
self.track_id = id
|
66 |
+
self.bbox = bbox
|
67 |
+
self.class_id = class_id
|
utils.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ID2LABEL = {0: "person", 1: "bicycle", 2:"car", 3:"motocycle", 5:"bus", 7:"truck"}
|
2 |
+
PALETTE = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
|
3 |
+
|
4 |
+
MODEL_PATH = "model_data/yolov8m.pt"
|
5 |
+
|
6 |
+
# MODEL_PATH = "model_data/best.pt"
|
7 |
+
|
8 |
+
AUTHEN_ACCOUNT = 'accountService.json'
|
9 |
+
|
10 |
+
def compute_color_for_labels(label):
|
11 |
+
|
12 |
+
if label == 0: # person
|
13 |
+
color = (85, 45, 255)
|
14 |
+
elif label == 2: # Car
|
15 |
+
color = (222, 82, 175)
|
16 |
+
elif label == 3: # Motobike
|
17 |
+
color = (0, 204, 255)
|
18 |
+
elif label == 5: # Bus
|
19 |
+
color = (0, 149, 255)
|
20 |
+
else:
|
21 |
+
color = [int((p * (label ** 2 - label + 1)) % 255) for p in PALETTE]
|
22 |
+
return tuple(color)
|
video/.gitkeep
ADDED
File without changes
|