Akjava commited on
Commit
661ec13
·
1 Parent(s): e20aa9a

broken opencv-version

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.task filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ files
app.py ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ import subprocess
4
+ from PIL import Image,ImageOps,ImageDraw,ImageFilter
5
+ import json
6
+ import os
7
+ import time
8
+ import mp_box
9
+ from mp_utils import get_pixel_cordinate_list,extract_landmark,get_pixel_cordinate,get_pixel_xyz
10
+ from glibvision.draw_utils import points_to_box,box_to_xy,plus_point
11
+
12
+
13
+ from glibvision.cv2_utils import plot_points,create_color_image,pil_to_bgr_image,set_plot_text,copy_image
14
+
15
+ from gradio_utils import save_image,save_buffer,clear_old_files ,read_file
16
+
17
+ import cv2
18
+ from cv2_pose_estimate import estimate_head_pose,draw_head_pose
19
+
20
+ import numpy as np
21
+ from numpy.typing import NDArray
22
+
23
+ '''
24
+ innner_eyes_blur - inner eyes blur
25
+ iris_mask_blur - final iris edge blur
26
+ '''
27
+
28
+ set_plot_text(False,0.5,(200,200,200))
29
+
30
+ depath_ratio = 1.0
31
+
32
+ model_cordinates = [ (0.0, 0.0, 0.0), # Nose tip
33
+ (0.0, 344.0, -40.0 * depath_ratio), # Chin
34
+ #(0.0, -160.0, -50.0),#center of eye
35
+ #INNER
36
+ (-110.0, -215.0, -60.0 * depath_ratio), #inner Left eye left corner
37
+ (110.0, -215.0, -60.0 * depath_ratio), #inner Right eye right corne
38
+
39
+ (-300.0, -250.0, -90.0 * depath_ratio), # Left eye left corner
40
+ (300.0, -250.0, -90.0 * depath_ratio), # Right eye right corne
41
+
42
+ (-125.0, 180.0, -70.0 * depath_ratio), # Left Mouth corner
43
+ (125.0, 180.0, -70.0 * depath_ratio) ] # Right mouth corner
44
+
45
+ def fit_cordinates(cordinates,center_x=512,center_y=512,base_distance = 344):
46
+ ratio = base_distance/(cordinates[1][1])
47
+ fitted_cordinates = []
48
+
49
+ for cordinate in model_cordinates:
50
+ fitted_cordinate = [
51
+ cordinate[0]*ratio+center_x,
52
+ cordinate[1]*ratio+center_y,
53
+ cordinate[2]*ratio
54
+ ]
55
+ fitted_cordinates.append(fitted_cordinate)
56
+ return fitted_cordinates
57
+
58
+
59
+ def plot_model(cv2_image=None,center_x=512,center_y=512,base_distance = 344):
60
+ if cv2_image is None:
61
+ #TODO add arg
62
+ cv2_image=create_color_image(np.zeros((1024, 1024,3),dtype=np.uint8))
63
+ fitted_cordinates = fit_cordinates(model_cordinates,center_x,center_y,base_distance)
64
+ ratio = base_distance/model_cordinates[1][1]
65
+
66
+ def adjust_cordinate(point):
67
+
68
+
69
+ return point
70
+
71
+ plot_points(cv2_image,[adjust_cordinate(fitted_cordinates[0])],False,6,(0,0,255),3,(255,0,0))
72
+ plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[1]))],False,6,(0,0,255),3,(255,0,0))
73
+
74
+ plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[2])),adjust_cordinate((fitted_cordinates[4]))],False,6,(0,0,255),3,(255,0,0))
75
+ plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[3])),adjust_cordinate((fitted_cordinates[5]))],False,6,(0,0,255),3,(255,0,0))
76
+ plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[6])),adjust_cordinate((fitted_cordinates[7]))],False,6,(0,0,255),3,(255,0,0))
77
+
78
+ return cv2_image
79
+
80
+
81
+ def set_model_cordinates(cordinates):
82
+ global model_cordinates
83
+ model_cordinates = cordinates
84
+
85
+ def process_images(image,base_image,
86
+ camera_fov,double_check_offset_center,
87
+ draw_base_model,fit_base_model,
88
+ first_pnp,second_refine,final_iterative,debug_process,draw_mediapipe_mesh,draw_mediapipe_result,z_multiply=0.8,
89
+ progress=gr.Progress(track_tqdm=True)):
90
+ clear_old_files()
91
+
92
+ image_indices = [4,199,#6,#center of eye
93
+ 133,362,#inner eye
94
+ 33,263, #outer eye
95
+ 61,291]#mouth
96
+
97
+
98
+ chin = 344
99
+ global model_cordinates
100
+
101
+ """ normalize ?
102
+ model_cordinates =[
103
+ [pt[0]/chin,pt[1]/chin,pt[2]/chin] for pt in model_cordinates
104
+ ]
105
+ """
106
+
107
+
108
+ def landmarks_to_model_corsinates(face_landmarks,indices,w,h):
109
+ cordinates = []
110
+ z_depth = w if w<h else h
111
+ z_depth *=z_multiply
112
+ for index in indices:
113
+ xyz = get_pixel_xyz(face_landmarker_result.face_landmarks,index,w,h)
114
+ #print(xyz,xyz[2]*z_multiply) #TODO chose?
115
+ cordinates.append([
116
+ xyz[0],xyz[1],xyz[2]*z_depth
117
+ ])
118
+ return cordinates
119
+
120
+ if image == None:
121
+ raise gr.Error("Need Image")
122
+ cv2_image = pil_to_bgr_image(image)
123
+ size = cv2_image.shape
124
+ center: tuple[float, float] = (size[1] / 2, size[0] / 2)
125
+
126
+ if base_image is not None:#additiona base image
127
+
128
+ base_image_indices = [
129
+ 6,197,195,5,4,#nose center
130
+ 122,196, 3, 51, 45,
131
+ 351,419,248,281,275,
132
+
133
+ 122,245,244,243,133, #eyes
134
+ 351,465,464,463,362 #eyes
135
+ ]
136
+ # TODO check same?
137
+ cv2_base_image = pil_to_bgr_image(base_image)
138
+ mp_image,face_landmarker_result = extract_landmark(cv2_base_image,"face_landmarker.task",0,0,True)
139
+ h,w = cv2_base_image.shape[:2]
140
+
141
+ image_indices = base_image_indices
142
+ set_model_cordinates(landmarks_to_model_corsinates(face_landmarker_result.face_landmarks,image_indices,w,h))
143
+ print(image_indices)
144
+
145
+ import math
146
+ def calculate_distance(xy, xy2):
147
+ return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
148
+
149
+ mp_image,face_landmarker_result = extract_landmark(cv2_image,"face_landmarker.task",0,0,True)
150
+ im = mp_image.numpy_view()
151
+ h,w = im.shape[:2]
152
+
153
+ first_landmarker_result = None
154
+ if double_check_offset_center:
155
+ root_cordinate = get_pixel_cordinate(face_landmarker_result.face_landmarks,image_indices[0],w,h)#nose tip
156
+ diff_center_x = center[0] - root_cordinate[0]
157
+ diff_center_y = center[1] - root_cordinate[1]
158
+ base = np.zeros_like(cv2_image)
159
+ copy_image(base,cv2_image,diff_center_x,diff_center_y)
160
+ first_landmarker_result = face_landmarker_result
161
+ mp_image,face_landmarker_result = extract_landmark(base,"face_landmarker.task",0,0,True)
162
+ im = mp_image.numpy_view()
163
+ else:
164
+ diff_center_x=0
165
+ diff_center_y=0
166
+ #return base,"",""
167
+
168
+ cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,image_indices,w,h)
169
+
170
+
171
+
172
+ if draw_mediapipe_mesh:
173
+ image = mp_box.draw_landmarks_on_image(face_landmarker_result,image)
174
+ cv2_image = pil_to_bgr_image(image)
175
+
176
+ chin_distance = calculate_distance(cordinates[0],cordinates[1])
177
+ #trying detect pnp from same pose,but seeems not working
178
+ #fitted_cordinates = fit_cordinates(model_cordinates,cordinates[0][0],cordinates[0][1],chin_distance)
179
+ if fit_base_model:
180
+ #not get good result
181
+ #model_points: NDArray = np.array(fitted_cordinates, dtype="double")
182
+ model_points: NDArray = np.array(model_cordinates, dtype="double")
183
+ else:
184
+ model_points: NDArray = np.array(model_cordinates, dtype="double")
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+ focal_length: float = calculate_distance(cordinates[0],cordinates[1])
193
+ focal_length = focal_length*camera_fov
194
+
195
+
196
+
197
+
198
+ #image_size = size[0] #TODO
199
+ #f = (image_size / 2) / np.tan(np.deg2rad(camera_fov / 2))
200
+ #focal_length = f
201
+ #print(f"fov ={camera_fov} size = {image_size} focal_length = {focal_length}")
202
+
203
+
204
+
205
+ camera_matrix: NDArray = np.array([
206
+ [focal_length, 0, center[0]],
207
+ [0, focal_length, center[1]],
208
+ [0, 0, 1]
209
+ ], dtype="double")
210
+ dist_coeffs: NDArray = np.zeros((4, 1))
211
+
212
+ # offset center usually improve result
213
+
214
+
215
+ image_points: NDArray = np.array(cordinates, dtype="double")
216
+
217
+
218
+ from scipy.spatial.transform import Rotation as R
219
+ def print_euler(rotation_vector,label=""):
220
+ order = "yxz"
221
+ rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
222
+
223
+ r = R.from_matrix(rotation_matrix)
224
+ euler_angles = r.as_euler(order, degrees=True)
225
+ label = f"{label} Euler Angles {order} (degrees): {euler_angles}"
226
+
227
+ return label
228
+
229
+ rotation_vector = None
230
+ translation_vector = None
231
+ im_with_pose = cv2_image
232
+ result_label = None
233
+ mediapipe_text = None
234
+
235
+ def face_landmarker_result_to_angle_label(face_landmarker_result,order="yxz"):
236
+ if len(face_landmarker_result.facial_transformation_matrixes)>0:
237
+
238
+ transformation_matrix=face_landmarker_result.facial_transformation_matrixes[0]
239
+
240
+ rotation_matrix, translation_vector = transformation_matrix[:3, :3],transformation_matrix[:3, 3]
241
+ #TODO change base-size
242
+ scaled_translation_vector =(translation_vector[0]*1024,translation_vector[1]*1024,translation_vector[2]*1024)
243
+ #scaled_translation_vector = (-512,-512,-1024)
244
+ if draw_mediapipe_result:
245
+ im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_matrix, scaled_translation_vector, camera_matrix, dist_coeffs,32,-diff_center_x,-diff_center_y)
246
+ #print("mediapipe",scaled_translation_vector)
247
+ #mediapipe_label = print_euler(rotation_vector,"MediaPipe")
248
+ r = R.from_matrix(rotation_matrix)
249
+ euler_angles = r.as_euler(order, degrees=True)
250
+ label = f"Media pipe Euler Angles {order} (degrees): {euler_angles}"
251
+ return label
252
+
253
+ if first_landmarker_result != None:
254
+ mediapipe_first_text = face_landmarker_result_to_angle_label(first_landmarker_result)
255
+ else:
256
+ mediapipe_first_text = ""
257
+
258
+ mediapipe_second_text = face_landmarker_result_to_angle_label(face_landmarker_result)
259
+
260
+ if first_pnp!="None":
261
+ if first_pnp == "EPNP":
262
+ flags = cv2.SOLVEPNP_EPNP
263
+ elif first_pnp == "ITERATIVE":
264
+ flags = cv2.SOLVEPNP_ITERATIVE
265
+ elif first_pnp == "IPPE":
266
+ flags = cv2.SOLVEPNP_IPPE
267
+ else:
268
+ flags = cv2.SOLVEPNP_SQPNP
269
+ if first_pnp == "Mediapipe":
270
+ rotation_vector, _ = cv2.Rodrigues(rotation_matrix)
271
+ translation_vector = scaled_translation_vector
272
+ else:
273
+ translation_vector = None
274
+ #translation_vector = np.array([cordinates[0][0],cordinates[0][1],focal_length],dtype="double")
275
+ #translation_vector = scaled_translation_vector
276
+ #print("initial",translation_vector,)
277
+ rotation_vector, translation_vector = estimate_head_pose(cv2_image, model_points,image_points, camera_matrix, dist_coeffs,flags,None,translation_vector)
278
+ #print(translation_vector)
279
+ im_with_pose = cv2_image
280
+ result_label = print_euler(rotation_vector,first_pnp)
281
+ print("firstpnp",translation_vector)
282
+ if debug_process:
283
+ im_with_pose = draw_head_pose(cv2_image, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128,-diff_center_x,-diff_center_y)
284
+
285
+ if first_pnp!="None" and second_refine!="None":
286
+ criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8) # 反復終了条件
287
+ if second_refine == "LM":
288
+ rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
289
+ else:
290
+ rotation_vector, translation_vector = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
291
+ if debug_process:
292
+ im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128+64,-diff_center_x,-diff_center_y)
293
+ result_label = print_euler(rotation_vector,second_refine)
294
+ #print("refine",translation_vector)
295
+
296
+ if final_iterative:
297
+ (success, rotation_vector, translation_vector) = cv2.solvePnP(
298
+ model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)
299
+ if success:
300
+ result_label = print_euler(rotation_vector,"SOLVEPNP_ITERATIVE")
301
+ else:
302
+
303
+ raise gr.Warning("final_iterative faild")
304
+ #draw final one
305
+ if rotation_vector is not None:
306
+ im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,255,-diff_center_x,-diff_center_y)
307
+
308
+ # mediapipe metrix
309
+ #print("opencv",translation_vector)
310
+
311
+
312
+ if draw_base_model:
313
+ if fit_base_model:
314
+ im_with_pose=plot_model(im_with_pose,cordinates[0][0],cordinates[0][1],chin_distance)
315
+ else:
316
+ im_with_pose=plot_model(im_with_pose)
317
+
318
+ return cv2.cvtColor(im_with_pose,cv2.COLOR_BGR2RGB),result_label,mediapipe_first_text,mediapipe_second_text
319
+
320
+
321
+
322
+ css="""
323
+ #col-left {
324
+ margin: 0 auto;
325
+ max-width: 640px;
326
+ }
327
+ #col-right {
328
+ margin: 0 auto;
329
+ max-width: 640px;
330
+ }
331
+ .grid-container {
332
+ display: flex;
333
+ align-items: center;
334
+ justify-content: center;
335
+ gap:10px
336
+ }
337
+
338
+ .image {
339
+ width: 128px;
340
+ height: 128px;
341
+ object-fit: cover;
342
+ }
343
+
344
+ .text {
345
+ font-size: 16px;
346
+ }
347
+ """
348
+
349
+ #css=css,
350
+
351
+
352
+
353
+ with gr.Blocks(css=css, elem_id="demo-container") as demo:
354
+ with gr.Column():
355
+ gr.HTML(read_file("demo_header.html"))
356
+ gr.HTML(read_file("demo_tools.html"))
357
+ with gr.Row():
358
+ with gr.Column():
359
+ image = gr.Image(height=800,sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Image")
360
+
361
+ with gr.Row(elem_id="prompt-container", equal_height=False):
362
+ with gr.Row():
363
+ btn = gr.Button("Pose Estimate", elem_id="run_button",variant="primary")
364
+
365
+
366
+
367
+ with gr.Accordion(label="Advanced Settings", open=True):
368
+ #need better landmarker
369
+ base_image = gr.Image(sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Image",visible=False)
370
+
371
+ with gr.Row( equal_height=True):
372
+ camera_fov = gr.Slider(info="not effect mediapipe,nose-chin x multiply",
373
+ label="Multiply value",
374
+ minimum=0.1,
375
+ maximum=2.0,
376
+ step=0.01,
377
+ value=1.2)
378
+ double_check_offset_center = gr.Checkbox(label="offset center point",value=True,info="move center and detect again(usually more accurate)")
379
+ z_multiply = gr.Slider(info="nose depth",
380
+ label="Z-Multiply",
381
+ minimum=0.1,
382
+ maximum=1.5,
383
+ step=0.01,
384
+ value=0.8)
385
+ with gr.Row( equal_height=True):
386
+ draw_base_model = gr.Checkbox(label="draw base model",value=False,info="draw base model")
387
+ fit_base_model = gr.Checkbox(label="fit base model",value=False,info="This is just for visual,not use as model")
388
+
389
+ first_pnp =gr.Radio(label="PnP",choices=["None","EPNP","SQPNP","IPPE","ITERATIVE","Mediapipe"],value="EPNP")
390
+ second_refine =gr.Radio(label="PnP refine",choices=["None","LM","VVS"],value="LM")
391
+ with gr.Row( equal_height=True):
392
+ final_iterative = gr.Checkbox(label="PnP final iterative",value=False,info="sometime good")
393
+ debug_process = gr.Checkbox(label="Debug Process",value=False)
394
+ draw_mediapipe_mesh = gr.Checkbox(label="Draw mediapipe mesh",value=False)
395
+ draw_mediapipe_result = gr.Checkbox(label="Draw mediapipe result",value=False)
396
+ plot_button = gr.Button("Plot Model", elem_id="run_button")
397
+
398
+ with gr.Column():
399
+ result_image = gr.Image(height=760,label="Result", elem_id="output-animation",image_mode='RGB')
400
+ result_text = gr.Textbox(label="cv2 result")
401
+ mediapipe_first_text = gr.Textbox(label="first mediapipe result")
402
+ mediapipe_last_text = gr.Textbox(label="2nd or last mediapipe result")
403
+
404
+ btn.click(fn=process_images, inputs=[image,base_image,
405
+ camera_fov,double_check_offset_center,
406
+ draw_base_model,fit_base_model,
407
+ first_pnp,second_refine,final_iterative,debug_process,draw_mediapipe_mesh,draw_mediapipe_result
408
+ ],outputs=[result_image,result_text,mediapipe_first_text,mediapipe_last_text] ,api_name='infer')
409
+ plot_button.click(fn=plot_model,inputs=[],outputs=[result_image])
410
+
411
+ example_images = [
412
+ ["examples/02316230.jpg"],
413
+ ["examples/00003245_00.jpg"],
414
+ ["examples/00827009.jpg"],
415
+ ["examples/00002062.jpg"],
416
+ ["examples/00824008.jpg"],
417
+ ["examples/00825000.jpg"],
418
+ ["examples/00826007.jpg"],
419
+ ["examples/00824006.jpg"],
420
+ ["examples/00828003.jpg"],
421
+ ["examples/00002200.jpg"],
422
+ ["examples/00005259.jpg"],
423
+ ["examples/00018022.jpg"],
424
+ ["examples/img-above.jpg"],
425
+ ["examples/00100265.jpg"],
426
+ ["examples/00039259.jpg"],
427
+
428
+ ]
429
+ example1=gr.Examples(
430
+ examples = example_images,label="Image",
431
+ inputs=[image],examples_per_page=8
432
+ )
433
+ gr.HTML(read_file("demo_footer.html"))
434
+
435
+ if __name__ == "__main__":
436
+ demo.launch()
cv2_pose_estimate.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from numpy.typing import NDArray
4
+ import sys
5
+ from mp_utils import get_pixel_cordinate_list,extract_landmark
6
+ def estimate_head_pose(im: NDArray, model_points: NDArray, image_points,camera_matrix: NDArray, dist_coeffs: NDArray,flags = cv2.SOLVEPNP_ITERATIVE,rotation_vector=None,translation_vector=None) -> tuple[NDArray, NDArray]:
7
+ """
8
+ Estimates the head pose from an image.
9
+
10
+ Args:
11
+ image_path: Path to the image file.
12
+ model_points: 3D model points.
13
+ camera_matrix: Camera intrinsic matrix.
14
+ dist_coeffs: Lens distortion coefficients.
15
+
16
+ Returns:
17
+ rotation_vector: Estimated rotation vector.
18
+ translation_vector: Estimated translation vector.
19
+ """
20
+ size = im.shape
21
+
22
+ '''
23
+ image_points: NDArray = np.array([
24
+ (359, 391), # Nose tip
25
+ (399, 561), # Chin
26
+ (337, 297), # Left eye left corner
27
+ (513, 301), # Right eye right corne
28
+ (345, 465), # Left Mouth corner
29
+ (453, 469) # Right mouth corner
30
+ ], dtype="double")
31
+ '''
32
+
33
+ model_points = model_points +500
34
+ (success, rotation_vector, translation_vector) = cv2.solvePnP(
35
+ model_points, image_points, camera_matrix, dist_coeffs,flags=flags,
36
+ )
37
+ print(model_points)
38
+ print(image_points)
39
+ print(camera_matrix)
40
+
41
+ if not success:
42
+ raise RuntimeError("solvePnP failed.")
43
+
44
+ return rotation_vector, translation_vector
45
+
46
+ import cv2
47
+ import numpy as np
48
+ from numpy.typing import NDArray
49
+
50
+ def draw_head_pose(image: NDArray, image_points: NDArray, rotation_vector: NDArray, translation_vector: NDArray, camera_matrix: NDArray, dist_coeffs: NDArray,color_max=255,offset_x=0,offset_y=0) -> NDArray:
51
+ """
52
+ Draws the head pose (XYZ axes) on the image.
53
+
54
+ Args:
55
+ image: Input image.
56
+ image_points: 2D image points.
57
+ rotation_vector: Estimated rotation vector.
58
+ translation_vector: Estimated translation vector.
59
+ camera_matrix: Camera intrinsic matrix.
60
+ dist_coeffs: Lens distortion coefficients.
61
+
62
+ Returns:
63
+ Image with head pose drawn.
64
+ """
65
+
66
+ # Define the 3D points for the XYZ axes
67
+ axis_length = 500.0 # Length of the axes
68
+ axis_points_3D: NDArray = np.array([
69
+ [0, 0, 0], # Origin
70
+ [axis_length, 0, 0], # X axis
71
+ [0, axis_length, 0], # Y axis
72
+ [0, 0, axis_length] # Z axis
73
+ ], dtype='float32')
74
+
75
+ # Project the 3D points to the 2D image plane
76
+ (axis_points_2D, _) = cv2.projectPoints(
77
+ axis_points_3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs
78
+ )
79
+ axis_points_2D = axis_points_2D.astype(int)
80
+
81
+ # Draw the axes on the image
82
+ origin = tuple(axis_points_2D[0].ravel())
83
+ cv2.line(image, origin, tuple(axis_points_2D[1].ravel()), (0, 0, color_max), 3) # X axis (Red)
84
+ cv2.line(image, origin, tuple(axis_points_2D[2].ravel()), (0, color_max, 0), 3) # Y axis (Green)
85
+ cv2.line(image, origin, tuple(axis_points_2D[3].ravel()), (color_max, 0, 0), 3) # Z axis (Blue)
86
+
87
+ for p in image_points:
88
+ cv2.circle(image, (int(p[0]+offset_x), int(p[1]+offset_y)), 3, (0, 0, 255), -1)
89
+
90
+ return image
91
+
92
+ def main():
93
+ # 3D model points.
94
+ '''
95
+ model_points: NDArray = np.array([
96
+ (0.0, 0.0, 0.0), # Nose tip
97
+ (0.0, 300.0, -65.0), # Chin
98
+ (-225.0, -170.0, -135.0), # Left eye left corner
99
+ (225.0, -170.0, -135.0), # Right eye right corne
100
+ (-150.0, -150.0, -125.0), # Left Mouth corner
101
+ (150.0, -150.0, -125.0) # Right mouth corner
102
+ ])
103
+ '''
104
+
105
+ model_points: NDArray = np.array([
106
+ (0.0, 0.0, 0.0), # Nose tip
107
+ (0.0, -344.0, -40.0), # Chin
108
+ #(0.0, -160.0, -50.0),#center of eye
109
+ (-110.0, 215.0, -60.0), #inner Left eye left corner
110
+ (110.0, 215.0, -60.0), #inner Right eye right corne
111
+ (-300.0, 250.0, -90.0), # Left eye left corner
112
+ (300.0, 250.0, -90.0), # Right eye right corne
113
+ (-185.0, -180.0, -70.0), # Left Mouth corner
114
+ (185.0, -180.0, -70.0) # Right mouth corner
115
+ ])
116
+
117
+
118
+
119
+ """
120
+
121
+ model_points: NDArray = np.array([
122
+ (0.0, 0.0, 0.0), # Nose tip
123
+ (0.0, -450.0, 0.0), # Chin
124
+ (-110.0, 175.0, -20.0), #inner Left eye left corner
125
+ (110.0, 175.0, -20.0), #inner Right eye right corne
126
+ (-300.0, 200.0, -40.0), # Left eye left corner
127
+ (300.0, 200.0, -40.0), # Right eye right corne
128
+ (-176.0, -200.0, -20.0), # Left Mouth corner
129
+ (175.0, -200.0, -20.0) # Right mouth corner
130
+ ])
131
+ """
132
+
133
+ square_model_points: NDArray = np.array([
134
+ (-100.0, -100.0, 0), # Left eye left corner
135
+ (100.0, -100.0, 0), # Right eye right corne
136
+ (-100.0, 100.0, 0), # Left Mouth corner
137
+ (100.0, 100.0, 0) # Right mouth corner
138
+ ])
139
+
140
+
141
+ # Example image and camera parameters (replace with actual values)
142
+ image_path = sys.argv[1]
143
+ mp_image,face_landmarker_result = extract_landmark(image_path)
144
+ im = mp_image.numpy_view()
145
+ h,w = im.shape[:2]
146
+ cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,[4,199,#6,#center of eye
147
+ 33,263,133,362,61,291],w,h)
148
+ print(cordinates)
149
+ image_points: NDArray = np.array(cordinates, dtype="double")
150
+
151
+
152
+
153
+ import math
154
+ def calculate_distance(xy, xy2):
155
+ return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
156
+
157
+ if im is None:
158
+ raise FileNotFoundError(f"Could not open or find the image file: {image_path}")
159
+ size = im.shape
160
+ focal_length: float = calculate_distance(cordinates[0],cordinates[1])
161
+ focal_length = focal_length*1.5
162
+ print("focal length",focal_length)
163
+ center: tuple[float, float] = (size[1] / 2, size[0] / 2)
164
+ center = cordinates[0]
165
+ camera_matrix: NDArray = np.array([
166
+ [focal_length, 0, center[0]],
167
+ [0, focal_length, center[1]],
168
+ [0, 0, 1]
169
+ ], dtype="double")
170
+ dist_coeffs: NDArray = np.zeros((4, 1)) # Assuming no lens distortion
171
+
172
+ # 2D image points. If you change the image, you need to change vector
173
+ '''
174
+ image_points: NDArray = np.array([
175
+ (321, 571), # Nose tip
176
+ (423, 852), # Chin
177
+ (201, 406), # Left eye left corner
178
+ (529, 363), # Right eye right corne
179
+ (336, 705), # Left Mouth corner
180
+ (483, 693) # Right mouth corner
181
+ ], dtype="double")
182
+ '''
183
+ """
184
+ image_points: NDArray = np.array([
185
+ #(663, 325), # Nose tip
186
+ (655,388),
187
+ (705, 555), # Chin
188
+ (549, 296), # inner Left eye left corner
189
+ (651, 291), # inner Right eye right corne
190
+ (453, 303), # Left eye left corner
191
+ (718, 294), # Right eye right corne
192
+ (591, 474), # Left Mouth corner
193
+ (715, 472) # Right mouth corner
194
+ ], dtype="double")
195
+ """
196
+
197
+
198
+ square_image_points: NDArray = np.array([
199
+ (549, 296), # Nose tip
200
+ (651, 291), # Chin
201
+ (573, 386), # Left eye left corner
202
+ (691, 370), # Right eye right corne
203
+ ], dtype="double")
204
+
205
+ flags_list = [
206
+ cv2.SOLVEPNP_EPNP#cv2.SOLVEPNP_ITERATIVE#,cv2.SOLVEPNP_SQPNP,cv2.SOLVEPNP_EPNP
207
+ ]
208
+ im_with_pose = im.copy()
209
+ for flags in flags_list:
210
+ rotation_vector, translation_vector = estimate_head_pose(image_path, model_points,image_points, camera_matrix, dist_coeffs,flags)
211
+ #print(f"Rotation Vector:\n {rotation_vector}")
212
+ #print(f"Translation Vector:\n {translation_vector}")
213
+ #initial
214
+ #im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
215
+
216
+ from scipy.spatial.transform import Rotation as R
217
+ def print_euler(rotation_vector):
218
+ order = "yxz"
219
+ rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
220
+
221
+ r = R.from_matrix(rotation_matrix)
222
+ euler_angles = r.as_euler(order, degrees=True)
223
+ print(f"Euler Angles {order} (degrees): {euler_angles}")
224
+
225
+ print_euler(rotation_vector)
226
+ criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8) # 反復終了条件
227
+
228
+ rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
229
+ im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128)
230
+ print_euler(rotation_vector)
231
+
232
+ #rotation_vector[0]=0
233
+ #rotation_vector[1]=0
234
+ #rotation_vector[2]=0
235
+
236
+ #(success, rotation_vector, translation_vector) = cv2.solvePnP(
237
+ # model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)
238
+
239
+ im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
240
+
241
+ #print_euler(rotation_vector)
242
+
243
+ (rotation_matrix, jacobian) = cv2.Rodrigues(rotation_vector)
244
+ mat = np.hstack((rotation_matrix, translation_vector))
245
+
246
+ #yaw,pitch,rollの取り出し
247
+ (_, _, _, _, _, _, eulerAngles) = cv2.decomposeProjectionMatrix(mat)
248
+ print(eulerAngles)
249
+ #rvec, tvec = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
250
+ #im_with_pose = draw_head_pose(im_with_pose, image_points, rvec, tvec, camera_matrix, dist_coeffs)
251
+
252
+
253
+ #square
254
+ #rvec, tvec = estimate_head_pose(image_path, square_model_points,square_image_points, camera_matrix, dist_coeffs,cv2.SOLVEPNP_IPPE_SQUARE)
255
+ #not so good
256
+ #im_with_pose = draw_head_pose(im_with_pose, square_image_points, rvec, tvec, camera_matrix, dist_coeffs)
257
+
258
+
259
+
260
+ #print(rotation_matrix)
261
+ # 回転行列をオイラー角に変換
262
+ #euler_angles = cv2.decomposeProjectionMatrix(rotation_matrix)[-1]
263
+
264
+ # オイラー角の表示 (x, y, z)
265
+
266
+ # Display image
267
+ cv2.imshow("Output", cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
268
+ cv2.waitKey(0)
269
+ cv2.destroyAllWindows()
270
+ cv2.imwrite("result.jpg",cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
271
+
272
+ if __name__ == "__main__":
273
+ main()
demo_footer.html ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ <div>
2
+ <P> Images are generated with <a href="https://huggingface.co/black-forest-labs/FLUX.1-schnell">FLUX.1-schnell</a> and licensed under <a href="http://www.apache.org/licenses/LICENSE-2.0">the Apache 2.0 License</a>
3
+ </div>
demo_header.html ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div style="text-align: center;">
2
+ <h1>
3
+ Mediapipe Face-Pose Estimation plus OpenCV
4
+ </h1>
5
+ <div class="grid-container">
6
+ <img src="https://akjava.github.io/AIDiagramChatWithVoice-FaceCharacter/webp/128/00544245.webp" alt="Mediapipe Face Detection" class="image">
7
+
8
+ <p class="text">
9
+ This Space use <a href="http://www.apache.org/licenses/LICENSE-2.0">the Apache 2.0</a> Licensed <a href="https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker">Mediapipe FaceLandmarker</a> <br>
10
+ "Current MediaPipe face-landmark detection struggle with faces rotated more than 45 degrees (due to limitations in training data).<br>
11
+ A new tool or method is needed to achieve improved accuracy."<br>
12
+ OpenCV:I've tried it out, but there's still room for improvement in how the features work together.<br>
13
+ TODO:change base-model
14
+ </p>
15
+ </div>
16
+
17
+ </div>
demo_tools.html ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div style="text-align: center;">
2
+ <p>
3
+ <a href="https://huggingface.co/spaces/Akjava/flux1-schnell-img2img">Flux1-Img2Img(GPU)</a> |
4
+ <a href="https://huggingface.co/spaces/Akjava/flux1-schnell-mask-inpaint">Flux1-Inpaint(GPU)</a> |
5
+ <a href="https://huggingface.co/spaces/Akjava/mediapipe-68-points-facial-mask">Create 68 points Parts Mask</a> |
6
+ <a href="https://huggingface.co/spaces/Akjava/histgram-color-matching">Histgram Color Matching</a> |
7
+ <a href="https://huggingface.co/spaces/Akjava/WebPTalkHead">WebP anime with 3 images</a> |
8
+ <a href="https://huggingface.co/spaces/Akjava/WebP-Resize-Convert">WebP Resize Animation</a>
9
+ </p>
10
+ <p></p>
11
+ </div>
examples/00002062.jpg ADDED
examples/00002200.jpg ADDED
examples/00003245_00.jpg ADDED
examples/00005259.jpg ADDED
examples/00018022.jpg ADDED
examples/00039259.jpg ADDED
examples/00100265.jpg ADDED
examples/00824006.jpg ADDED
examples/00824008.jpg ADDED
examples/00825000.jpg ADDED
examples/00826007.jpg ADDED
examples/00827009.jpg ADDED
examples/00828003.jpg ADDED
examples/02316230.jpg ADDED
examples/img-above.jpg ADDED
face_landmarker.task ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
3
+ size 3758596
face_landmarker.task.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Face landmark detection
2
+ https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
3
+
4
+ model card page is
5
+ https://storage.googleapis.com/mediapipe-assets/MediaPipe%20BlazeFace%20Model%20Card%20(Short%20Range).pdf
6
+
7
+ license is Apache2.0
8
+ https://www.apache.org/licenses/LICENSE-2.0.html
glibvision/common_utils.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ def check_exists_files(files,dirs,exit_on_error=True):
3
+ if files is not None:
4
+ if isinstance(files, str):
5
+ files = [files]
6
+ for file in files:
7
+ if not os.path.isfile(file):
8
+ print(f"File {file} not found")
9
+ if exit_on_error:
10
+ exit(1)
11
+ else:
12
+ return 1
13
+ if dirs is not None:
14
+ if isinstance(dirs, str):
15
+ dirs = [dirs]
16
+ for dir in dirs:
17
+ if not os.path.isdir(dir):
18
+ print(f"Dir {dir} not found")
19
+ if exit_on_error:
20
+ exit(1)
21
+ else:
22
+ return 1
23
+ return 0
24
+
25
+ image_extensions =[".jpg"]
26
+
27
+ def add_name_suffix(file_name,suffix,replace_suffix=False):
28
+ if not suffix.startswith("_"):#force add
29
+ suffix="_"+suffix
30
+
31
+ name,ext = os.path.splitext(file_name)
32
+ if replace_suffix:
33
+ index = name.rfind("_")
34
+ if index!=-1:
35
+ return f"{name[0:index]}{suffix}{ext}"
36
+
37
+ return f"{name}{suffix}{ext}"
38
+
39
+ def replace_extension(file_name,new_extension,suffix=None,replace_suffix=False):
40
+ if not new_extension.startswith("."):
41
+ new_extension="."+new_extension
42
+
43
+ name,ext = os.path.splitext(file_name)
44
+ new_file = f"{name}{new_extension}"
45
+ if suffix:
46
+ return add_name_suffix(name+new_extension,suffix,replace_suffix)
47
+ return new_file
48
+
49
+ def list_digit_images(input_dir,sort=True):
50
+ digit_images = []
51
+ global image_extensions
52
+ files = os.listdir(input_dir)
53
+ for file in files:
54
+ if file.endswith(".jpg"):#TODO check image
55
+ base,ext = os.path.splitext(file)
56
+ if not base.isdigit():
57
+ continue
58
+ digit_images.append(file)
59
+
60
+ if sort:
61
+ digit_images.sort()
62
+
63
+ return digit_images
64
+ def list_suffix_images(input_dir,suffix,is_digit=True,sort=True):
65
+ digit_images = []
66
+ global image_extensions
67
+ files = os.listdir(input_dir)
68
+ for file in files:
69
+ if file.endswith(".jpg"):#TODO check image
70
+ base,ext = os.path.splitext(file)
71
+ if base.endswith(suffix):
72
+ if is_digit:
73
+ if not base.replace(suffix,"").isdigit():
74
+ continue
75
+ digit_images.append(file)
76
+
77
+ if sort:
78
+ digit_images.sort()
79
+
80
+ return digit_images
81
+
82
+ import time
83
+
84
+ class ProgressTracker:
85
+ """
86
+ 処理の進捗状況を追跡し、経過時間と残り時間を表示するクラス。
87
+ """
88
+
89
+ def __init__(self,key, total_target):
90
+ """
91
+ コンストラクタ
92
+
93
+ Args:
94
+ total_target (int): 処理対象の総数
95
+ """
96
+ self.key = key
97
+ self.total_target = total_target
98
+ self.complete_target = 0
99
+ self.start_time = time.time()
100
+
101
+ def update(self):
102
+ """
103
+ 進捗を1つ進める。
104
+ 経過時間と残り時間を表示する。
105
+ """
106
+ self.complete_target += 1
107
+ current_time = time.time()
108
+ consumed_time = current_time - self.start_time
109
+ remain_time = (consumed_time / self.complete_target) * (self.total_target - self.complete_target) if self.complete_target > 0 else 0
110
+ print(f"stepped {self.key} {self.total_target} of {self.complete_target}, consumed {(consumed_time / 60):.1f} min, remain {(remain_time / 60):.1f} min")
111
+
112
+
glibvision/cv2_utils.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+ #2024-11-27 add copy image
5
+
6
+ def draw_bbox(image,box,color=(255,0,0),thickness=1):
7
+ if thickness==0:
8
+ return
9
+
10
+ left = int(box[0])
11
+ top = int(box[1])
12
+ right = int(box[0]+box[2])
13
+ bottom = int(box[1]+box[3])
14
+ box_points =[(left,top),(right,top),(right,bottom),(left,bottom)]
15
+
16
+ cv2.polylines(image, [np.array(box_points)], isClosed=True, color=color, thickness=thickness)
17
+
18
+
19
+ def to_int_points(points):
20
+ int_points=[]
21
+ for point in points:
22
+ int_points.append([int(point[0]),int(point[1])])
23
+ return int_points
24
+
25
+ def draw_text(img, text, point, font_scale=0.5, color=(200, 200, 200), thickness=1):
26
+ font = cv2.FONT_HERSHEY_SIMPLEX
27
+ cv2.putText(img, str(text), point, font, font_scale, color, thickness, cv2.LINE_AA)
28
+
29
+ plot_text_color = (200, 200, 200)
30
+ plot_text_font_scale = 0.5
31
+ plot_index = 1
32
+ plot_text = True
33
+
34
+ def set_plot_text(is_plot,text_font_scale,text_color):
35
+ global plot_index,plot_text,plot_text_font_scale,plot_text_color
36
+ plot_text = is_plot
37
+ plot_index = 1
38
+ plot_text_font_scale = text_font_scale
39
+ plot_text_color = text_color
40
+
41
+ def plot_points(image,points,isClosed=False,circle_size=3,circle_color=(255,0,0),line_size=1,line_color=(0,0,255)):
42
+
43
+ global plot_index,plot_text
44
+ int_points = to_int_points(points)
45
+ if circle_size>0:
46
+ for point in int_points:
47
+ cv2.circle(image,point,circle_size,circle_color,-1)
48
+ if plot_text:
49
+ draw_text(image,plot_index,point,plot_text_font_scale,plot_text_color)
50
+ plot_index+=1
51
+ if line_size>0:
52
+ cv2.polylines(image, [np.array(int_points)], isClosed=isClosed, color=line_color, thickness=line_size)
53
+
54
+ def fill_points(image,points,thickness=1,line_color=(255,255,255),fill_color = (255,255,255)):
55
+ np_points = np.array(points,dtype=np.int32)
56
+ cv2.fillPoly(image, [np_points], fill_color)
57
+ cv2.polylines(image, [np_points], isClosed=True, color=line_color, thickness=thickness)
58
+
59
+ def get_image_size(cv2_image):
60
+ return cv2_image.shape[:2]
61
+
62
+ def get_channel(np_array):
63
+ return np_array.shape[2] if np_array.ndim == 3 else 1
64
+
65
+ def get_numpy_text(np_array,key=""):
66
+ channel = get_channel(np_array)
67
+ return f"{key} shape = {np_array.shape} channel = {channel} ndim = {np_array.ndim} size = {np_array.size}"
68
+
69
+
70
+ def gray3d_to_2d(grayscale: np.ndarray) -> np.ndarray:
71
+ channel = get_channel(grayscale)
72
+ if channel!=1:
73
+ raise ValueError(f"color maybe rgb or rgba {get_numpy_text(grayscale)}")
74
+ """
75
+ 3 次元グレースケール画像 (チャンネル数 1) を 2 次元に変換する。
76
+
77
+ Args:
78
+ grayscale (np.ndarray): 3 次元グレースケール画像 (チャンネル数 1)。
79
+
80
+ Returns:
81
+ np.ndarray: 2 次元グレースケール画像。
82
+ """
83
+
84
+ if grayscale.ndim == 2:
85
+ return grayscale
86
+ return np.squeeze(grayscale)
87
+
88
+ def blend_rgb_images(image1: np.ndarray, image2: np.ndarray, mask: np.ndarray) -> np.ndarray:
89
+ """
90
+ 2 つの RGB 画像をマスク画像を使用してブレンドする。
91
+
92
+ Args:
93
+ image1 (np.ndarray): 最初の画像 (RGB)。
94
+ image2 (np.ndarray): 2 番目の画像 (RGB)。
95
+ mask (np.ndarray): マスク画像 (グレースケール)。
96
+
97
+ Returns:
98
+ np.ndarray: ブレンドされた画像 (RGB)。
99
+
100
+ Raises:
101
+ ValueError: 入力画像の形状が一致しない場合。
102
+ """
103
+
104
+ if image1.shape != image2.shape or image1.shape[:2] != mask.shape:
105
+ raise ValueError("入力画像の形状が一致しません。")
106
+
107
+ # 画像を float 型に変換
108
+ image1 = image1.astype(float)
109
+ image2 = image2.astype(float)
110
+
111
+ # マスクを 3 チャンネルに変換し、0-1 の範囲にスケール
112
+ alpha = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR).astype(float) / 255.0
113
+
114
+ # ブレンド計算
115
+ blended = (1 - alpha) * image1 + alpha * image2
116
+
117
+ return blended.astype(np.uint8)
118
+
119
+ def create_color_image(img,color=(255,255,255)):
120
+ mask = np.zeros_like(img)
121
+
122
+ h, w = img.shape[:2]
123
+ cv2.rectangle(mask, (0, 0), (w, h), color, -1)
124
+ return mask
125
+
126
+ def pil_to_bgr_image(image):
127
+ np_image = np.array(image, dtype=np.uint8)
128
+ if np_image.shape[2] == 4:
129
+ bgr_img = cv2.cvtColor(np_image, cv2.COLOR_RGBA2BGRA)
130
+ else:
131
+ bgr_img = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
132
+ return bgr_img
133
+
134
+ def bgr_to_rgb(np_image):
135
+ if np_image.shape[2] == 4:
136
+ bgr_img = cv2.cvtColor(np_image, cv2.COLOR_RBGRA2RGBA)
137
+ else:
138
+ bgr_img = cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
139
+ return bgr_img
140
+
141
+ def copy_image(img1: np.ndarray, img2: np.ndarray, x: int, y: int) -> None:
142
+ # チャネル数と次元数のチェック
143
+ if img1.ndim != 3 or img2.ndim != 3:
144
+ raise ValueError("Both img1 and img2 must be 3-dimensional arrays.")
145
+ elif img1.shape[2] != img2.shape[2]:
146
+ raise ValueError(f"img1 and img2 must have the same number of channels. img1 has {img1.shape[2]} channels, but img2 has {img2.shape[1]} channels.")
147
+
148
+ # Type check
149
+ if not isinstance(img1, np.ndarray) or not isinstance(img2, np.ndarray):
150
+ raise TypeError("img1 and img2 must be NumPy arrays.")
151
+
152
+ if x>=0:
153
+ offset_x=0
154
+ w = min(img1.shape[1]-x,img2.shape[1])
155
+ else:
156
+ w = min(img1.shape[1],img2.shape[1]+x)
157
+ offset_x=int(-x)
158
+ x = 0
159
+
160
+ if y>=0:
161
+ h = min(img1.shape[0]-y,img2.shape[0])
162
+ offset_y=0
163
+ else:
164
+ h = min(img1.shape[0]-y,img2.shape[0]+y)
165
+ offset_y=int(-y)
166
+ y = 0
167
+ x=int(x)
168
+ y=int(y)
169
+ h=int(h)
170
+ w=int(w)
171
+
172
+
173
+ print(f"img1 {img1.shape} img2{img2.shape} x={x} y={y} w={w} h={h}")
174
+ # Paste the overlapping part
175
+ img1[y:y+h, x:x+w] = img2[offset_y:h+offset_y, offset_x:w+offset_x]
glibvision/draw_utils.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DrawUtils
2
+ # not PIL,CV2,Numpy drawing method
3
+ import math
4
+ # 2024-11-29 add calculate_distance
5
+ def points_to_box(points):
6
+ x1=float('inf')
7
+ x2=0
8
+ y1=float('inf')
9
+ y2=0
10
+ for point in points:
11
+ if point[0]<x1:
12
+ x1=point[0]
13
+ if point[0]>x2:
14
+ x2=point[0]
15
+ if point[1]<y1:
16
+ y1=point[1]
17
+ if point[1]>y2:
18
+ y2=point[1]
19
+ return [x1,y1,x2-x1,y2-y1]
20
+
21
+ def box_to_point(box):
22
+ return [
23
+ [box[0],box[1]],
24
+ [box[0]+box[2],box[1]],
25
+ [box[0]+box[2],box[1]+box[3]],
26
+ [box[0],box[1]+box[3]]
27
+ ]
28
+
29
+ def plus_point(base_pt,add_pt):
30
+ return [base_pt[0]+add_pt[0],base_pt[1]+add_pt[1]]
31
+
32
+ def box_to_xy(box):
33
+ return [box[0],box[1],box[2]+box[0],box[3]+box[1]]
34
+
35
+ def to_int_points(points):
36
+ int_points=[]
37
+ for point in points:
38
+ int_points.append([int(point[0]),int(point[1])])
39
+ return int_points
40
+
41
+ def calculate_distance(xy, xy2):
42
+ return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
glibvision/glandmark_utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ #simple single version
5
+ def bbox_to_glandmarks(file_name,bbox,points = None):
6
+ base,ext = os.path.splitext(file_name)
7
+ glandmark = {"image":{
8
+ "boxes":[{
9
+ "left":int(bbox[0]),"top":int(bbox[1]),"width":int(bbox[2]),"height":int(bbox[3])
10
+ }],
11
+ "file":file_name,
12
+ "id":int(base)
13
+ # width,height ignore here
14
+ }}
15
+ if points is not None:
16
+ parts=[
17
+ ]
18
+ for point in points:
19
+ parts.append({"x":int(point[0]),"y":int(point[1])})
20
+ glandmark["image"]["boxes"][0]["parts"] = parts
21
+ return glandmark
22
+
23
+ #technically this is not g-landmark/dlib ,
24
+ def convert_to_landmark_group_json(points):
25
+ if len(points)!=68:
26
+ print(f"points must be 68 but {len(points)}")
27
+ return None
28
+ new_points=list(points)
29
+
30
+ result = [ # possible multi person ,just possible any func support multi person
31
+
32
+ { # index start 0 but index-number start 1
33
+ "chin":new_points[0:17],
34
+ "left_eyebrow":new_points[17:22],
35
+ "right_eyebrow":new_points[22:27],
36
+ "nose_bridge":new_points[27:31],
37
+ "nose_tip":new_points[31:36],
38
+ "left_eye":new_points[36:42],
39
+ "right_eye":new_points[42:48],
40
+
41
+ # lip points customized structure
42
+ # MIT licensed face_recognition
43
+ # https://github.com/ageitgey/face_recognition
44
+ "top_lip":new_points[48:55]+[new_points[64]]+[new_points[63]]+[new_points[62]]+[new_points[61]]+[new_points[60]],
45
+ "bottom_lip":new_points[54:60]+[new_points[48]]+[new_points[60]]+[new_points[67]]+[new_points[66]]+[new_points[65]]+[new_points[64]],
46
+ }
47
+ ]
48
+ return result
glibvision/numpy_utils.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def apply_binary_mask_to_color(base_image,color,mask):
5
+ """
6
+ 二値マスクを使用して、画像の一部を別の画像にコピーする。
7
+
8
+ Args:
9
+ base_image (np.ndarray): コピー先の画像。
10
+ paste_image (np.ndarray): コピー元の画像。
11
+ mask (np.ndarray): 二値マスク画像。
12
+
13
+ Returns:
14
+ np.ndarray: マスクを適用した画像。
15
+
16
+ """
17
+ # TODO check all shape
18
+ #print_numpy(base_image)
19
+ #print_numpy(paste_image)
20
+ #print_numpy(mask)
21
+ if mask.ndim == 2:
22
+ condition = mask == 255
23
+ else:
24
+ condition = mask[:,:,0] == 255
25
+
26
+ base_image[condition] = color
27
+ return base_image
28
+
29
+ def apply_binary_mask_to_image(base_image,paste_image,mask):
30
+ """
31
+ 二値マスクを使用して、画像の一部を別の画像にコピーする。
32
+
33
+ Args:
34
+ base_image (np.ndarray): コピー先の画像。
35
+ paste_image (np.ndarray): コピー元の画像。
36
+ mask (np.ndarray): 二値マスク画像。
37
+
38
+ Returns:
39
+ np.ndarray: マスクを適用した画像。
40
+
41
+ """
42
+ # TODO check all shape
43
+ #print_numpy(base_image)
44
+ #print_numpy(paste_image)
45
+ #print_numpy(mask)
46
+ if mask.ndim == 2:
47
+ condition = mask == 255
48
+ else:
49
+ condition = mask[:,:,0] == 255
50
+
51
+ base_image[condition] = paste_image[condition]
52
+ return base_image
53
+
54
+ def pil_to_numpy(image):
55
+ return np.array(image, dtype=np.uint8)
56
+
57
+ def extruce_points(points,index,ratio=1.5):
58
+ """
59
+ indexのポイントをratio倍だけ、点群の中心から、外側に膨らます。
60
+ """
61
+ center_point = np.mean(points, axis=0)
62
+ if index < 0 or index > len(points):
63
+ raise ValueError(f"index must be range(0,{len(points)} but value = {index})")
64
+ point1 =points[index]
65
+ print(f"center = {center_point}")
66
+ vec_to_center = point1 - center_point
67
+ return vec_to_center*ratio + center_point
68
+
69
+
70
+ def bulge_polygon(points, bulge_factor=0.1,isClosed=True):
71
+ """
72
+ ポリゴンの辺の中間に点を追加し、外側に膨らませる
73
+ ndarrayを返すので注意
74
+ """
75
+ # 入力 points を NumPy 配列に変換
76
+ points = np.array(points)
77
+
78
+ # ポリゴン全体の重心を求める
79
+ center_point = np.mean(points, axis=0)
80
+ #print(f"center = {center_point}")
81
+ new_points = []
82
+ num_points = len(points)
83
+ for i in range(num_points):
84
+ if i == num_points -1 and not isClosed:
85
+ break
86
+ p1 = points[i]
87
+ #print(f"p{i} = {p1}")
88
+ # 重心から頂点へのベクトル
89
+ #vec_to_center = p1 - center_point
90
+
91
+ # 辺のベクトルを求める
92
+ mid_diff = points[(i + 1) % num_points] - p1
93
+ mid = p1+(mid_diff/2)
94
+
95
+ #print(f"mid = {mid}")
96
+ out_vec = mid - center_point
97
+
98
+ # 重心からのベクトルに bulge_vec を加算
99
+ new_point = mid + out_vec * bulge_factor
100
+
101
+ new_points.append(p1)
102
+ new_points.append(new_point.astype(np.int32))
103
+
104
+ return np.array(new_points)
105
+
106
+
107
+ # image.shape rgb are (1024,1024,3) use 1024,1024 as 2-dimensional
108
+ def create_2d_image(shape):
109
+ grayscale_image = np.zeros(shape[:2], dtype=np.uint8)
110
+ return grayscale_image
glibvision/pil_utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image,ImageDraw
2
+ from .draw_utils import box_to_xy,to_int_points,box_to_point
3
+ #ver-2024-11-18
4
+ def create_color_image(width, height, color=(255,255,255)):
5
+ if color == None:
6
+ color = (0,0,0)
7
+
8
+ if len(color )== 3:
9
+ mode ="RGB"
10
+ elif len(color )== 4:
11
+ mode ="RGBA"
12
+
13
+ img = Image.new(mode, (width, height), color)
14
+ return img
15
+
16
+ # deprecated
17
+ def fill_points(image,points,color=(255,255,255)):
18
+ return draw_points(image,points,fill=color)
19
+
20
+ def draw_points(image,points,outline=None,fill=None,width=1):
21
+
22
+ draw = ImageDraw.Draw(image)
23
+ int_points = [(int(x), int(y)) for x, y in points]
24
+
25
+ if outline is not None or fill is not None:
26
+ draw.polygon(int_points, outline=outline,fill=fill,width=width)
27
+
28
+ return image
29
+
30
+ def draw_box(image,box,outline=None,fill=None):
31
+ points = to_int_points(box_to_point(box))
32
+ return draw_points(image,points,outline,fill)
33
+
34
+ def from_numpy(numpy_array):
35
+ return Image.fromarray(numpy_array)
gradio_utils.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+ import time
5
+ import io
6
+ import hashlib
7
+
8
+ def clear_old_files(dir="files",passed_time=60*60):
9
+ try:
10
+ files = os.listdir(dir)
11
+ current_time = time.time()
12
+ for file in files:
13
+ file_path = os.path.join(dir,file)
14
+
15
+ ctime = os.stat(file_path).st_ctime
16
+ diff = current_time - ctime
17
+ #print(f"ctime={ctime},current_time={current_time},passed_time={passed_time},diff={diff}")
18
+ if diff > passed_time:
19
+ os.remove(file_path)
20
+ except:
21
+ print("maybe still gallery using error")
22
+
23
+ def get_buffer_id(buffer):
24
+ hash_object = hashlib.sha256(buffer.getvalue())
25
+ hex_dig = hash_object.hexdigest()
26
+ unique_id = hex_dig[:32]
27
+ return unique_id
28
+
29
+ def get_image_id(image):
30
+ buffer = io.BytesIO()
31
+ image.save(buffer, format='PNG')
32
+ return get_buffer_id(buffer)
33
+
34
+ def save_image(image,extension="jpg",dir_name="files"):
35
+ id = get_image_id(image)
36
+ os.makedirs(dir_name,exist_ok=True)
37
+ file_path = f"{dir_name}/{id}.{extension}"
38
+
39
+ image.save(file_path)
40
+ return file_path
41
+
42
+ def save_buffer(buffer,extension="webp",dir_name="files"):
43
+ id = get_buffer_id(buffer)
44
+ os.makedirs(dir_name,exist_ok=True)
45
+ file_path = f"{dir_name}/{id}.{extension}"
46
+
47
+ with open(file_path,"wb") as f:
48
+ f.write(buffer.getvalue())
49
+ return file_path
50
+
51
+ def write_file(file_path,text):
52
+ with open(file_path, 'w', encoding='utf-8') as f:
53
+ f.write(text)
54
+
55
+ def read_file(file_path):
56
+ """read the text of target file
57
+ """
58
+ with open(file_path, 'r', encoding='utf-8') as f:
59
+ content = f.read()
60
+ return content
mp_box.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mediapipe as mp
2
+ from mediapipe.tasks import python
3
+ from mediapipe.tasks.python import vision
4
+ from mediapipe.framework.formats import landmark_pb2
5
+ from mediapipe import solutions
6
+ import numpy as np
7
+
8
+ # heavy changed in gradio app
9
+
10
+ # for X,Y,W,H to x1,y1,x2,y2(Left-top,right-bottom style)
11
+ def xywh_to_xyxy(box):
12
+ return [box[0],box[1],box[0]+box[2],box[1]+box[3]]
13
+
14
+ def to_int_box(box):
15
+ return [int(box[0]),int(box[1]),int(box[2]),int(box[3])]
16
+
17
+ def convert_to_box(face_landmarks_list,indices,w=1024,h=1024):
18
+ x1=w
19
+ y1=h
20
+ x2=0
21
+ y2=0
22
+ for index in indices:
23
+ x=min(w,max(0,(face_landmarks_list[0][index].x*w)))
24
+ y=min(h,max(0,(face_landmarks_list[0][index].y*h)))
25
+ if x<x1:
26
+ x1=x
27
+
28
+ if y<y1:
29
+ y1=y
30
+
31
+ if x>x2:
32
+ x2=x
33
+ if y>y2:
34
+ y2=y
35
+
36
+
37
+ return [int(x1),int(y1),int(x2-x1),int(y2-y1)]
38
+
39
+
40
+ def box_to_square(bbox):
41
+ box=list(bbox)
42
+ if box[2]>box[3]:
43
+ diff = box[2]-box[3]
44
+ box[3]+=diff
45
+ box[1]-=diff/2
46
+ elif box[3]>box[2]:
47
+ diff = box[3]-box[2]
48
+ box[2]+=diff
49
+ box[0]-=diff/2
50
+ return box
51
+
52
+
53
+ def face_landmark_result_to_box(face_landmarker_result,width=1024,height=1024):
54
+ face_landmarks_list = face_landmarker_result.face_landmarks
55
+
56
+
57
+ full_indices = list(range(456))
58
+
59
+ MIDDLE_FOREHEAD = 151
60
+ BOTTOM_CHIN_EX = 152
61
+ BOTTOM_CHIN = 175
62
+ CHIN_TO_MIDDLE_FOREHEAD = [200,14,1,6,18,9]
63
+ MOUTH_BOTTOM = [202,200,422]
64
+ EYEBROW_CHEEK_LEFT_RIGHT = [46,226,50,1,280,446,276]
65
+
66
+ LEFT_HEAD_OUTER_EX = 251 #on side face almost same as full
67
+ LEFT_HEAD_OUTER = 301
68
+ LEFT_EYE_OUTER_EX = 356
69
+ LEFT_EYE_OUTER = 264
70
+ LEFT_MOUTH_OUTER_EX = 288
71
+ LEFT_MOUTH_OUTER = 288
72
+ LEFT_CHIN_OUTER = 435
73
+ RIGHT_HEAD_OUTER_EX = 21
74
+ RIGHT_HEAD_OUTER = 71
75
+ RIGHT_EYE_OUTER_EX = 127
76
+ RIGHT_EYE_OUTER = 34
77
+ RIGHT_MOUTH_OUTER_EX = 58
78
+ RIGHT_MOUTH_OUTER = 215
79
+ RIGHT_CHIN_OUTER = 150
80
+
81
+ # TODO naming line
82
+ min_indices=CHIN_TO_MIDDLE_FOREHEAD+EYEBROW_CHEEK_LEFT_RIGHT+MOUTH_BOTTOM
83
+
84
+ chin_to_brow_indices = [LEFT_CHIN_OUTER,LEFT_MOUTH_OUTER,LEFT_EYE_OUTER,LEFT_HEAD_OUTER,MIDDLE_FOREHEAD,RIGHT_HEAD_OUTER,RIGHT_EYE_OUTER,RIGHT_MOUTH_OUTER,RIGHT_CHIN_OUTER,BOTTOM_CHIN]+min_indices
85
+
86
+ box1 = convert_to_box(face_landmarks_list,min_indices,width,height)
87
+ box2 = convert_to_box(face_landmarks_list,chin_to_brow_indices,width,height)
88
+ box3 = convert_to_box(face_landmarks_list,full_indices,width,height)
89
+ #print(box)
90
+
91
+ return [box1,box2,box3,box_to_square(box1),box_to_square(box2),box_to_square(box3)]
92
+
93
+
94
+ def draw_landmarks_on_image(detection_result,rgb_image):
95
+ face_landmarks_list = detection_result.face_landmarks
96
+ annotated_image = np.copy(rgb_image)
97
+
98
+ # Loop through the detected faces to visualize.
99
+ for idx in range(len(face_landmarks_list)):
100
+ face_landmarks = face_landmarks_list[idx]
101
+
102
+ # Draw the face landmarks.
103
+ face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
104
+ face_landmarks_proto.landmark.extend([
105
+ landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
106
+ ])
107
+
108
+ solutions.drawing_utils.draw_landmarks(
109
+ image=annotated_image,
110
+ landmark_list=face_landmarks_proto,
111
+ connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
112
+ landmark_drawing_spec=None,
113
+ connection_drawing_spec=mp.solutions.drawing_styles
114
+ .get_default_face_mesh_tesselation_style())
115
+
116
+ return annotated_image
117
+
118
+ def mediapipe_to_box(image_data,model_path="face_landmarker.task"):
119
+ BaseOptions = mp.tasks.BaseOptions
120
+ FaceLandmarker = mp.tasks.vision.FaceLandmarker
121
+ FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
122
+ VisionRunningMode = mp.tasks.vision.RunningMode
123
+
124
+ options = FaceLandmarkerOptions(
125
+ base_options=BaseOptions(model_asset_path=model_path),
126
+ running_mode=VisionRunningMode.IMAGE
127
+ ,min_face_detection_confidence=0, min_face_presence_confidence=0
128
+ )
129
+
130
+
131
+ with FaceLandmarker.create_from_options(options) as landmarker:
132
+ if isinstance(image_data,str):
133
+ mp_image = mp.Image.create_from_file(image_data)
134
+ else:
135
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(image_data))
136
+ face_landmarker_result = landmarker.detect(mp_image)
137
+ boxes = face_landmark_result_to_box(face_landmarker_result,mp_image.width,mp_image.height)
138
+ return boxes,mp_image,face_landmarker_result
mp_utils.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import mediapipe as mp
4
+ from mediapipe.tasks import python
5
+ from mediapipe.tasks.python import vision
6
+ from mediapipe.framework.formats import landmark_pb2
7
+ from mediapipe import solutions
8
+ import numpy as np
9
+
10
+ # 2024-11-27 -extract_landmark :add args
11
+ # add get_pixel_xyz
12
+ # 2024-11-28 add get_normalized_xyz
13
+ def calculate_distance(p1, p2):
14
+ """
15
+
16
+ """
17
+ return math.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)
18
+ def to_int_points(points):
19
+ ints=[]
20
+ for pt in points:
21
+ #print(pt)
22
+ value = [int(pt[0]),int(pt[1])]
23
+ #print(value)
24
+ ints.append(value)
25
+ return ints
26
+
27
+ debug = False
28
+ def divide_line_to_points(points,divided): # return divided + 1
29
+ total_length = 0
30
+ line_length_list = []
31
+ for i in range(len(points)-1):
32
+ pt_length = calculate_distance(points[i],points[i+1])
33
+ total_length += pt_length
34
+ line_length_list.append(pt_length)
35
+
36
+ splited_length = total_length/divided
37
+
38
+ def get_new_point(index,lerp):
39
+ pt1 = points[index]
40
+ pt2 = points[index+1]
41
+ diff = [pt2[0] - pt1[0], pt2[1]-pt1[1]]
42
+ new_point = [pt1[0]+diff[0]*lerp,pt1[1]+diff[1]*lerp]
43
+ if debug:
44
+ print(f"pt1 ={pt1} pt2 ={pt2} diff={diff} new_point={new_point}")
45
+
46
+ return new_point
47
+
48
+ if debug:
49
+ print(f"{total_length} splitted = {splited_length} line-length-list = {len(line_length_list)}")
50
+ splited_points=[points[0]]
51
+ for i in range(1,divided):
52
+ need_length = splited_length*i
53
+ if debug:
54
+ print(f"{i} need length = {need_length}")
55
+ current_length = 0
56
+ for j in range(len(line_length_list)):
57
+ line_length = line_length_list[j]
58
+ current_length+=line_length
59
+ if current_length>need_length:
60
+ if debug:
61
+ print(f"over need length index = {j} current={current_length}")
62
+ diff = current_length - need_length
63
+
64
+ lerp_point = 1.0 - (diff/line_length)
65
+ if debug:
66
+ print(f"over = {diff} lerp ={lerp_point}")
67
+ new_point = get_new_point(j,lerp_point)
68
+
69
+ splited_points.append(new_point)
70
+ break
71
+
72
+ splited_points.append(points[-1]) # last one
73
+ splited_points=to_int_points(splited_points)
74
+
75
+ if debug:
76
+ print(f"sp={len(splited_points)}")
77
+ return splited_points
78
+
79
+
80
+
81
+ def expand_bbox(bbox,left=5,top=5,right=5,bottom=5):
82
+ left_pixel = bbox[2]*(float(left)/100)
83
+ top_pixel = bbox[3]*(float(top)/100)
84
+ right_pixel = bbox[2]*(float(right)/100)
85
+ bottom_pixel = bbox[3]*(float(bottom)/100)
86
+ new_box = list(bbox)
87
+ new_box[0] -=left_pixel
88
+ new_box[1] -=top_pixel
89
+ new_box[2] +=left_pixel+right_pixel
90
+ new_box[3] +=top_pixel+bottom_pixel
91
+ return new_box
92
+
93
+ #normalized value index see mp_constants
94
+ def get_normalized_cordinate(face_landmarks_list,index):
95
+ x=face_landmarks_list[0][index].x
96
+ y=face_landmarks_list[0][index].y
97
+ return x,y
98
+
99
+ def get_normalized_xyz(face_landmarks_list,index):
100
+ x=face_landmarks_list[0][index].x
101
+ y=face_landmarks_list[0][index].y
102
+ z=face_landmarks_list[0][index].z
103
+ return x,y,z
104
+
105
+ # z is normalized
106
+ def get_pixel_xyz(face_landmarks_list,landmark,width,height):
107
+ point = get_normalized_cordinate(face_landmarks_list,landmark)
108
+ z = y=face_landmarks_list[0][landmark].z
109
+ return int(point[0]*width),int(point[1]*height),z
110
+
111
+ def get_pixel_cordinate(face_landmarks_list,landmark,width,height):
112
+ point = get_normalized_cordinate(face_landmarks_list,landmark)
113
+ return int(point[0]*width),int(point[1]*height)
114
+
115
+ def get_pixel_cordinate_list(face_landmarks_list,indices,width,height):
116
+ cordinates = []
117
+ for index in indices:
118
+ cordinates.append(get_pixel_cordinate(face_landmarks_list,index,width,height))
119
+ return cordinates
120
+
121
+ def extract_landmark(image_data,model_path="face_landmarker.task",min_face_detection_confidence=0, min_face_presence_confidence=0,output_facial_transformation_matrixes=False):
122
+ BaseOptions = mp.tasks.BaseOptions
123
+ FaceLandmarker = mp.tasks.vision.FaceLandmarker
124
+ FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
125
+ VisionRunningMode = mp.tasks.vision.RunningMode
126
+
127
+ options = FaceLandmarkerOptions(
128
+ base_options=BaseOptions(model_asset_path=model_path),
129
+ running_mode=VisionRunningMode.IMAGE
130
+ ,min_face_detection_confidence=min_face_detection_confidence, min_face_presence_confidence=min_face_presence_confidence,
131
+ output_facial_transformation_matrixes=output_facial_transformation_matrixes
132
+ )
133
+
134
+ with FaceLandmarker.create_from_options(options) as landmarker:
135
+ if isinstance(image_data,str):
136
+ mp_image = mp.Image.create_from_file(image_data)
137
+ else:
138
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(image_data))
139
+ face_landmarker_result = landmarker.detect(mp_image)
140
+ return mp_image,face_landmarker_result