Spaces:

Akjava
/

mediapipe-head-pose-estimation

Running

App Files Files Community

Akjava commited on Dec 3, 2024

Commit

661ec13

1 Parent(s): e20aa9a

broken opencv-version

Browse files

Files changed (33) hide show

.gitattributes +1 -0
.gitignore +2 -0
app.py +436 -0
cv2_pose_estimate.py +273 -0
demo_footer.html +3 -0
demo_header.html +17 -0
demo_tools.html +11 -0
examples/00002062.jpg +0 -0
examples/00002200.jpg +0 -0
examples/00003245_00.jpg +0 -0
examples/00005259.jpg +0 -0
examples/00018022.jpg +0 -0
examples/00039259.jpg +0 -0
examples/00100265.jpg +0 -0
examples/00824006.jpg +0 -0
examples/00824008.jpg +0 -0
examples/00825000.jpg +0 -0
examples/00826007.jpg +0 -0
examples/00827009.jpg +0 -0
examples/00828003.jpg +0 -0
examples/02316230.jpg +0 -0
examples/img-above.jpg +0 -0
face_landmarker.task +3 -0
face_landmarker.task.txt +8 -0
glibvision/common_utils.py +112 -0
glibvision/cv2_utils.py +175 -0
glibvision/draw_utils.py +42 -0
glibvision/glandmark_utils.py +48 -0
glibvision/numpy_utils.py +110 -0
glibvision/pil_utils.py +35 -0
gradio_utils.py +60 -0
mp_box.py +138 -0
mp_utils.py +140 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.task filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ files

app.py ADDED Viewed

	@@ -0,0 +1,436 @@

+import spaces
+import gradio as gr
+import subprocess
+from PIL import Image,ImageOps,ImageDraw,ImageFilter
+import json
+import os
+import time
+import mp_box
+from mp_utils import get_pixel_cordinate_list,extract_landmark,get_pixel_cordinate,get_pixel_xyz
+from glibvision.draw_utils import points_to_box,box_to_xy,plus_point
+from glibvision.cv2_utils import plot_points,create_color_image,pil_to_bgr_image,set_plot_text,copy_image
+from gradio_utils import save_image,save_buffer,clear_old_files ,read_file
+import cv2
+from  cv2_pose_estimate import estimate_head_pose,draw_head_pose
+import numpy as np
+from numpy.typing import NDArray
+'''
+    innner_eyes_blur - inner eyes blur
+    iris_mask_blur - final iris edge blur
+'''
+set_plot_text(False,0.5,(200,200,200))
+depath_ratio = 1.0
+model_cordinates = [ (0.0, 0.0, 0.0),  # Nose tip
+    (0.0, 344.0, -40.0 * depath_ratio),  # Chin
+    #(0.0, -160.0, -50.0),#center of eye
+    #INNER
+    (-110.0, -215.0, -60.0 * depath_ratio),  #inner Left eye left corner
+    (110.0, -215.0, -60.0 * depath_ratio),  #inner Right eye right corne
+    (-300.0, -250.0, -90.0 * depath_ratio),  # Left eye left corner
+    (300.0, -250.0, -90.0 * depath_ratio),  # Right eye right corne
+    (-125.0, 180.0, -70.0 * depath_ratio),  # Left Mouth corner
+    (125.0, 180.0, -70.0 * depath_ratio) ] # Right mouth corner
+def fit_cordinates(cordinates,center_x=512,center_y=512,base_distance = 344):
+    ratio = base_distance/(cordinates[1][1])
+    fitted_cordinates = []
+    for cordinate in model_cordinates:
+        fitted_cordinate = [
+            cordinate[0]*ratio+center_x,
+            cordinate[1]*ratio+center_y,
+            cordinate[2]*ratio
+        ]
+        fitted_cordinates.append(fitted_cordinate)
+    return fitted_cordinates
+def plot_model(cv2_image=None,center_x=512,center_y=512,base_distance = 344):
+    if cv2_image is None:
+        #TODO add arg
+        cv2_image=create_color_image(np.zeros((1024, 1024,3),dtype=np.uint8))
+    fitted_cordinates = fit_cordinates(model_cordinates,center_x,center_y,base_distance)
+    ratio = base_distance/model_cordinates[1][1]
+    def adjust_cordinate(point):
+        return point
+    plot_points(cv2_image,[adjust_cordinate(fitted_cordinates[0])],False,6,(0,0,255),3,(255,0,0))
+    plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[1]))],False,6,(0,0,255),3,(255,0,0))
+    plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[2])),adjust_cordinate((fitted_cordinates[4]))],False,6,(0,0,255),3,(255,0,0))
+    plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[3])),adjust_cordinate((fitted_cordinates[5]))],False,6,(0,0,255),3,(255,0,0))
+    plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[6])),adjust_cordinate((fitted_cordinates[7]))],False,6,(0,0,255),3,(255,0,0))
+    return cv2_image
+def set_model_cordinates(cordinates):
+    global model_cordinates
+    model_cordinates = cordinates
+def process_images(image,base_image,
+                   camera_fov,double_check_offset_center,
+                   draw_base_model,fit_base_model,
+                   first_pnp,second_refine,final_iterative,debug_process,draw_mediapipe_mesh,draw_mediapipe_result,z_multiply=0.8,
+        progress=gr.Progress(track_tqdm=True)):
+    clear_old_files()
+    image_indices = [4,199,#6,#center of eye
+                        133,362,#inner eye
+                        33,263, #outer eye
+                        61,291]#mouth
+    chin = 344
+    global model_cordinates
+    """ normalize ?
+    model_cordinates =[
+        [pt[0]/chin,pt[1]/chin,pt[2]/chin] for pt in model_cordinates
+    ]
+    """
+    def landmarks_to_model_corsinates(face_landmarks,indices,w,h):
+        cordinates = []
+        z_depth = w if w<h else h
+        z_depth *=z_multiply
+        for index in indices:
+            xyz = get_pixel_xyz(face_landmarker_result.face_landmarks,index,w,h)
+            #print(xyz,xyz[2]*z_multiply) #TODO chose?
+            cordinates.append([
+                xyz[0],xyz[1],xyz[2]*z_depth
+            ])
+        return cordinates
+    if image == None:
+        raise gr.Error("Need Image")
+    cv2_image = pil_to_bgr_image(image)
+    size = cv2_image.shape
+    center: tuple[float, float] = (size[1] / 2, size[0] / 2)
+    if base_image is not None:#additiona base image
+        base_image_indices = [
+            6,197,195,5,4,#nose center
+            122,196,  3, 51, 45,
+            351,419,248,281,275,
+            122,245,244,243,133, #eyes
+            351,465,464,463,362 #eyes
+        ]
+        # TODO check same?
+        cv2_base_image = pil_to_bgr_image(base_image)
+        mp_image,face_landmarker_result = extract_landmark(cv2_base_image,"face_landmarker.task",0,0,True)
+        h,w = cv2_base_image.shape[:2]
+        image_indices = base_image_indices
+        set_model_cordinates(landmarks_to_model_corsinates(face_landmarker_result.face_landmarks,image_indices,w,h))
+    print(image_indices)
+    import math
+    def calculate_distance(xy, xy2):
+        return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
+    mp_image,face_landmarker_result = extract_landmark(cv2_image,"face_landmarker.task",0,0,True)
+    im = mp_image.numpy_view()
+    h,w = im.shape[:2]
+    first_landmarker_result = None
+    if double_check_offset_center:
+        root_cordinate = get_pixel_cordinate(face_landmarker_result.face_landmarks,image_indices[0],w,h)#nose tip
+        diff_center_x = center[0] - root_cordinate[0]
+        diff_center_y = center[1] - root_cordinate[1]
+        base = np.zeros_like(cv2_image)
+        copy_image(base,cv2_image,diff_center_x,diff_center_y)
+        first_landmarker_result = face_landmarker_result
+        mp_image,face_landmarker_result = extract_landmark(base,"face_landmarker.task",0,0,True)
+        im = mp_image.numpy_view()
+    else:
+        diff_center_x=0
+        diff_center_y=0
+        #return base,"",""
+    cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,image_indices,w,h)
+    if draw_mediapipe_mesh:
+        image = mp_box.draw_landmarks_on_image(face_landmarker_result,image)
+        cv2_image = pil_to_bgr_image(image)
+    chin_distance = calculate_distance(cordinates[0],cordinates[1])
+    #trying detect pnp from same pose,but seeems not working
+    #fitted_cordinates = fit_cordinates(model_cordinates,cordinates[0][0],cordinates[0][1],chin_distance)
+    if fit_base_model:
+        #not get good result
+        #model_points: NDArray = np.array(fitted_cordinates, dtype="double")
+        model_points: NDArray = np.array(model_cordinates, dtype="double")
+    else:
+        model_points: NDArray = np.array(model_cordinates, dtype="double")
+    focal_length: float = calculate_distance(cordinates[0],cordinates[1])
+    focal_length = focal_length*camera_fov
+    #image_size = size[0] #TODO
+    #f = (image_size / 2) / np.tan(np.deg2rad(camera_fov / 2))
+    #focal_length = f
+    #print(f"fov ={camera_fov} size = {image_size} focal_length = {focal_length}")
+    camera_matrix: NDArray = np.array([
+        [focal_length, 0, center[0]],
+        [0, focal_length, center[1]],
+        [0, 0, 1]
+    ], dtype="double")
+    dist_coeffs: NDArray = np.zeros((4, 1))
+    # offset center usually improve result
+    image_points: NDArray = np.array(cordinates, dtype="double")
+    from scipy.spatial.transform import Rotation as R
+    def print_euler(rotation_vector,label=""):
+        order = "yxz"
+        rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
+        r = R.from_matrix(rotation_matrix)
+        euler_angles = r.as_euler(order, degrees=True)
+        label = f"{label} Euler Angles {order} (degrees): {euler_angles}"
+        return label
+    rotation_vector = None
+    translation_vector = None
+    im_with_pose = cv2_image
+    result_label = None
+    mediapipe_text = None
+    def face_landmarker_result_to_angle_label(face_landmarker_result,order="yxz"):
+        if len(face_landmarker_result.facial_transformation_matrixes)>0:
+            transformation_matrix=face_landmarker_result.facial_transformation_matrixes[0]
+            rotation_matrix, translation_vector = transformation_matrix[:3, :3],transformation_matrix[:3, 3]
+            #TODO change base-size
+            scaled_translation_vector =(translation_vector[0]*1024,translation_vector[1]*1024,translation_vector[2]*1024)
+            #scaled_translation_vector = (-512,-512,-1024)
+            if draw_mediapipe_result:
+                im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_matrix, scaled_translation_vector, camera_matrix, dist_coeffs,32,-diff_center_x,-diff_center_y)
+            #print("mediapipe",scaled_translation_vector)
+            #mediapipe_label = print_euler(rotation_vector,"MediaPipe")
+            r = R.from_matrix(rotation_matrix)
+            euler_angles = r.as_euler(order, degrees=True)
+            label = f"Media pipe Euler Angles {order} (degrees): {euler_angles}"
+            return label
+    if first_landmarker_result != None:
+        mediapipe_first_text = face_landmarker_result_to_angle_label(first_landmarker_result)
+    else:
+        mediapipe_first_text = ""
+    mediapipe_second_text = face_landmarker_result_to_angle_label(face_landmarker_result)
+    if first_pnp!="None":
+        if first_pnp == "EPNP":
+            flags = cv2.SOLVEPNP_EPNP
+        elif first_pnp == "ITERATIVE":
+            flags = cv2.SOLVEPNP_ITERATIVE
+        elif first_pnp == "IPPE":
+            flags = cv2.SOLVEPNP_IPPE
+        else:
+            flags = cv2.SOLVEPNP_SQPNP
+        if first_pnp == "Mediapipe":
+            rotation_vector, _ = cv2.Rodrigues(rotation_matrix)
+            translation_vector =  scaled_translation_vector
+        else:
+            translation_vector = None
+            #translation_vector = np.array([cordinates[0][0],cordinates[0][1],focal_length],dtype="double")
+            #translation_vector = scaled_translation_vector
+            #print("initial",translation_vector,)
+            rotation_vector, translation_vector = estimate_head_pose(cv2_image, model_points,image_points, camera_matrix, dist_coeffs,flags,None,translation_vector)
+            #print(translation_vector)
+            im_with_pose = cv2_image
+            result_label = print_euler(rotation_vector,first_pnp)
+            print("firstpnp",translation_vector)
+            if debug_process:
+                im_with_pose = draw_head_pose(cv2_image, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128,-diff_center_x,-diff_center_y)
+    if first_pnp!="None" and second_refine!="None":
+        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8)  # 反復終了条件
+        if second_refine == "LM":
+            rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
+        else:
+            rotation_vector, translation_vector = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
+        if debug_process:
+            im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128+64,-diff_center_x,-diff_center_y)
+        result_label = print_euler(rotation_vector,second_refine)
+        #print("refine",translation_vector)
+    if final_iterative:
+        (success, rotation_vector, translation_vector) = cv2.solvePnP(
+            model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)
+        if success:
+             result_label = print_euler(rotation_vector,"SOLVEPNP_ITERATIVE")
+        else:
+            raise gr.Warning("final_iterative faild")
+    #draw final one
+    if rotation_vector is not None:
+        im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,255,-diff_center_x,-diff_center_y)
+        # mediapipe metrix
+        #print("opencv",translation_vector)
+    if draw_base_model:
+        if fit_base_model:
+            im_with_pose=plot_model(im_with_pose,cordinates[0][0],cordinates[0][1],chin_distance)
+        else:
+             im_with_pose=plot_model(im_with_pose)
+    return cv2.cvtColor(im_with_pose,cv2.COLOR_BGR2RGB),result_label,mediapipe_first_text,mediapipe_second_text
+css="""
+#col-left {
+    margin: 0 auto;
+    max-width: 640px;
+}
+#col-right {
+    margin: 0 auto;
+    max-width: 640px;
+}
+.grid-container {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap:10px
+}
+.image {
+  width: 128px;
+  height: 128px;
+  object-fit: cover;
+}
+.text {
+  font-size: 16px;
+}
+"""
+#css=css,
+with gr.Blocks(css=css, elem_id="demo-container") as demo:
+    with gr.Column():
+        gr.HTML(read_file("demo_header.html"))
+        gr.HTML(read_file("demo_tools.html"))
+    with gr.Row():
+                with gr.Column():
+                    image = gr.Image(height=800,sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Image")
+                    with gr.Row(elem_id="prompt-container",  equal_height=False):
+                        with gr.Row():
+                            btn = gr.Button("Pose Estimate", elem_id="run_button",variant="primary")
+                    with gr.Accordion(label="Advanced Settings", open=True):
+                        #need better landmarker
+                        base_image = gr.Image(sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Image",visible=False)
+                        with gr.Row( equal_height=True):
+                            camera_fov = gr.Slider(info="not effect mediapipe,nose-chin x multiply",
+                            label="Multiply value",
+                            minimum=0.1,
+                            maximum=2.0,
+                            step=0.01,
+                            value=1.2)
+                            double_check_offset_center = gr.Checkbox(label="offset center point",value=True,info="move center and detect again(usually more accurate)")
+                            z_multiply = gr.Slider(info="nose depth",
+                            label="Z-Multiply",
+                            minimum=0.1,
+                            maximum=1.5,
+                            step=0.01,
+                            value=0.8)
+                        with gr.Row( equal_height=True):
+                            draw_base_model = gr.Checkbox(label="draw base model",value=False,info="draw base model")
+                            fit_base_model = gr.Checkbox(label="fit base model",value=False,info="This is just for visual,not use as model")
+                        first_pnp =gr.Radio(label="PnP",choices=["None","EPNP","SQPNP","IPPE","ITERATIVE","Mediapipe"],value="EPNP")
+                        second_refine =gr.Radio(label="PnP refine",choices=["None","LM","VVS"],value="LM")
+                        with gr.Row( equal_height=True):
+                            final_iterative = gr.Checkbox(label="PnP final iterative",value=False,info="sometime good")
+                            debug_process = gr.Checkbox(label="Debug Process",value=False)
+                            draw_mediapipe_mesh = gr.Checkbox(label="Draw mediapipe mesh",value=False)
+                            draw_mediapipe_result = gr.Checkbox(label="Draw mediapipe result",value=False)
+                        plot_button = gr.Button("Plot Model", elem_id="run_button")
+                with gr.Column():
+                    result_image = gr.Image(height=760,label="Result", elem_id="output-animation",image_mode='RGB')
+                    result_text = gr.Textbox(label="cv2 result")
+                    mediapipe_first_text = gr.Textbox(label="first mediapipe result")
+                    mediapipe_last_text = gr.Textbox(label="2nd or last mediapipe result")
+    btn.click(fn=process_images, inputs=[image,base_image,
+                                         camera_fov,double_check_offset_center,
+                                         draw_base_model,fit_base_model,
+                                         first_pnp,second_refine,final_iterative,debug_process,draw_mediapipe_mesh,draw_mediapipe_result
+                                         ],outputs=[result_image,result_text,mediapipe_first_text,mediapipe_last_text] ,api_name='infer')
+    plot_button.click(fn=plot_model,inputs=[],outputs=[result_image])
+    example_images = [
+                     ["examples/02316230.jpg"],
+                    ["examples/00003245_00.jpg"],
+                   ["examples/00827009.jpg"],
+                     ["examples/00002062.jpg"],
+                    ["examples/00824008.jpg"],
+                    ["examples/00825000.jpg"],
+                    ["examples/00826007.jpg"],
+                     ["examples/00824006.jpg"],
+                    ["examples/00828003.jpg"],
+                     ["examples/00002200.jpg"],
+                    ["examples/00005259.jpg"],
+                    ["examples/00018022.jpg"],
+                    ["examples/img-above.jpg"],
+                     ["examples/00100265.jpg"],
+                      ["examples/00039259.jpg"],
+                ]
+    example1=gr.Examples(
+                examples = example_images,label="Image",
+                inputs=[image],examples_per_page=8
+    )
+    gr.HTML(read_file("demo_footer.html"))
+    if __name__ == "__main__":
+        demo.launch()

cv2_pose_estimate.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import cv2
+import numpy as np
+from numpy.typing import NDArray
+import sys
+from mp_utils import get_pixel_cordinate_list,extract_landmark
+def estimate_head_pose(im: NDArray, model_points: NDArray, image_points,camera_matrix: NDArray, dist_coeffs: NDArray,flags = cv2.SOLVEPNP_ITERATIVE,rotation_vector=None,translation_vector=None) -> tuple[NDArray, NDArray]:
+    """
+    Estimates the head pose from an image.
+    Args:
+        image_path: Path to the image file.
+        model_points: 3D model points.
+        camera_matrix: Camera intrinsic matrix.
+        dist_coeffs: Lens distortion coefficients.
+    Returns:
+        rotation_vector: Estimated rotation vector.
+        translation_vector: Estimated translation vector.
+    """
+    size = im.shape
+    '''
+    image_points: NDArray = np.array([
+        (359, 391),  # Nose tip
+        (399, 561),  # Chin
+        (337, 297),  # Left eye left corner
+        (513, 301),  # Right eye right corne
+        (345, 465),  # Left Mouth corner
+        (453, 469)  # Right mouth corner
+    ], dtype="double")
+'''
+    model_points = model_points +500
+    (success, rotation_vector, translation_vector) = cv2.solvePnP(
+        model_points, image_points, camera_matrix, dist_coeffs,flags=flags,
+    )
+    print(model_points)
+    print(image_points)
+    print(camera_matrix)
+    if not success:
+        raise RuntimeError("solvePnP failed.")
+    return rotation_vector, translation_vector
+import cv2
+import numpy as np
+from numpy.typing import NDArray
+def draw_head_pose(image: NDArray, image_points: NDArray, rotation_vector: NDArray, translation_vector: NDArray, camera_matrix: NDArray, dist_coeffs: NDArray,color_max=255,offset_x=0,offset_y=0) -> NDArray:
+    """
+    Draws the head pose (XYZ axes) on the image.
+    Args:
+        image: Input image.
+        image_points: 2D image points.
+        rotation_vector: Estimated rotation vector.
+        translation_vector: Estimated translation vector.
+        camera_matrix: Camera intrinsic matrix.
+        dist_coeffs: Lens distortion coefficients.
+    Returns:
+        Image with head pose drawn.
+    """
+    # Define the 3D points for the XYZ axes
+    axis_length = 500.0  # Length of the axes
+    axis_points_3D: NDArray = np.array([
+        [0, 0, 0],      # Origin
+        [axis_length, 0, 0],  # X axis
+        [0, axis_length, 0],  # Y axis
+        [0, 0, axis_length]   # Z axis
+    ], dtype='float32')
+    # Project the 3D points to the 2D image plane
+    (axis_points_2D, _) = cv2.projectPoints(
+        axis_points_3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs
+    )
+    axis_points_2D = axis_points_2D.astype(int)
+    # Draw the axes on the image
+    origin = tuple(axis_points_2D[0].ravel())
+    cv2.line(image, origin, tuple(axis_points_2D[1].ravel()), (0, 0, color_max), 3)  # X axis (Red)
+    cv2.line(image, origin, tuple(axis_points_2D[2].ravel()), (0, color_max, 0), 3)  # Y axis (Green)
+    cv2.line(image, origin, tuple(axis_points_2D[3].ravel()), (color_max, 0, 0), 3)  # Z axis (Blue)
+    for p in image_points:
+        cv2.circle(image, (int(p[0]+offset_x), int(p[1]+offset_y)), 3, (0, 0, 255), -1)
+    return image
+def main():
+    # 3D model points.
+    '''
+     model_points: NDArray = np.array([
+        (0.0, 0.0, 0.0),  # Nose tip
+        (0.0, 300.0, -65.0),  # Chin
+        (-225.0, -170.0, -135.0),  # Left eye left corner
+        (225.0, -170.0, -135.0),  # Right eye right corne
+        (-150.0, -150.0, -125.0),  # Left Mouth corner
+        (150.0, -150.0, -125.0)  # Right mouth corner
+    ])
+    '''
+    model_points: NDArray = np.array([
+        (0.0, 0.0, 0.0),  # Nose tip
+        (0.0, -344.0, -40.0),  # Chin
+        #(0.0, -160.0, -50.0),#center of eye
+        (-110.0, 215.0, -60.0),  #inner Left eye left corner
+        (110.0, 215.0, -60.0),  #inner Right eye right corne
+        (-300.0, 250.0, -90.0),  # Left eye left corner
+        (300.0, 250.0, -90.0),  # Right eye right corne
+        (-185.0, -180.0, -70.0),  # Left Mouth corner
+        (185.0, -180.0, -70.0)  # Right mouth corner
+    ])
+    """
+    model_points: NDArray = np.array([
+        (0.0, 0.0, 0.0),  # Nose tip
+        (0.0, -450.0, 0.0),  # Chin
+        (-110.0, 175.0, -20.0),  #inner Left eye left corner
+        (110.0, 175.0, -20.0),  #inner Right eye right corne
+        (-300.0, 200.0, -40.0),  # Left eye left corner
+        (300.0, 200.0, -40.0),  # Right eye right corne
+        (-176.0, -200.0, -20.0),  # Left Mouth corner
+        (175.0, -200.0, -20.0)  # Right mouth corner
+    ])
+    """
+    square_model_points: NDArray = np.array([
+        (-100.0, -100.0, 0),  # Left eye left corner
+        (100.0, -100.0, 0),  # Right eye right corne
+        (-100.0, 100.0, 0),  # Left Mouth corner
+        (100.0, 100.0, 0)  # Right mouth corner
+    ])
+    # Example image and camera parameters (replace with actual values)
+    image_path = sys.argv[1]
+    mp_image,face_landmarker_result = extract_landmark(image_path)
+    im = mp_image.numpy_view()
+    h,w = im.shape[:2]
+    cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,[4,199,#6,#center of eye
+                                                                                 33,263,133,362,61,291],w,h)
+    print(cordinates)
+    image_points: NDArray = np.array(cordinates, dtype="double")
+    import math
+    def calculate_distance(xy, xy2):
+        return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
+    if im is None:
+        raise FileNotFoundError(f"Could not open or find the image file: {image_path}")
+    size = im.shape
+    focal_length: float = calculate_distance(cordinates[0],cordinates[1])
+    focal_length = focal_length*1.5
+    print("focal length",focal_length)
+    center: tuple[float, float] = (size[1] / 2, size[0] / 2)
+    center = cordinates[0]
+    camera_matrix: NDArray = np.array([
+        [focal_length, 0, center[0]],
+        [0, focal_length, center[1]],
+        [0, 0, 1]
+    ], dtype="double")
+    dist_coeffs: NDArray = np.zeros((4, 1))  # Assuming no lens distortion
+    # 2D image points. If you change the image, you need to change vector
+    '''
+        image_points: NDArray = np.array([
+        (321, 571),  # Nose tip
+        (423, 852),  # Chin
+        (201, 406),  # Left eye left corner
+        (529, 363),  # Right eye right corne
+        (336, 705),  # Left Mouth corner
+        (483, 693)  # Right mouth corner
+    ], dtype="double")
+    '''
+    """
+        image_points: NDArray = np.array([
+        #(663, 325),  # Nose tip
+        (655,388),
+        (705, 555),  # Chin
+        (549, 296),  # inner Left eye left corner
+        (651, 291),  # inner Right eye right corne
+        (453, 303),  # Left eye left corner
+        (718, 294),  # Right eye right corne
+        (591, 474),  # Left Mouth corner
+        (715, 472)  # Right mouth corner
+    ], dtype="double")
+    """
+    square_image_points: NDArray = np.array([
+        (549, 296),  # Nose tip
+        (651, 291),  # Chin
+        (573, 386),  # Left eye left corner
+        (691, 370),  # Right eye right corne
+    ], dtype="double")
+    flags_list = [
+        cv2.SOLVEPNP_EPNP#cv2.SOLVEPNP_ITERATIVE#,cv2.SOLVEPNP_SQPNP,cv2.SOLVEPNP_EPNP
+    ]
+    im_with_pose = im.copy()
+    for flags in flags_list:
+        rotation_vector, translation_vector = estimate_head_pose(image_path, model_points,image_points, camera_matrix, dist_coeffs,flags)
+        #print(f"Rotation Vector:\n {rotation_vector}")
+        #print(f"Translation Vector:\n {translation_vector}")
+        #initial
+        #im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
+    from scipy.spatial.transform import Rotation as R
+    def print_euler(rotation_vector):
+        order = "yxz"
+        rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
+        r = R.from_matrix(rotation_matrix)
+        euler_angles = r.as_euler(order, degrees=True)
+        print(f"Euler Angles {order} (degrees): {euler_angles}")
+    print_euler(rotation_vector)
+    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8)  # 反復終了条件
+    rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
+    im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128)
+    print_euler(rotation_vector)
+    #rotation_vector[0]=0
+    #rotation_vector[1]=0
+    #rotation_vector[2]=0
+    #(success, rotation_vector, translation_vector) = cv2.solvePnP(
+    #    model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)
+    im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
+    #print_euler(rotation_vector)
+    (rotation_matrix, jacobian) = cv2.Rodrigues(rotation_vector)
+    mat = np.hstack((rotation_matrix, translation_vector))
+        #yaw,pitch,rollの取り出し
+    (_, _, _, _, _, _, eulerAngles) = cv2.decomposeProjectionMatrix(mat)
+    print(eulerAngles)
+    #rvec, tvec = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
+    #im_with_pose = draw_head_pose(im_with_pose, image_points, rvec, tvec, camera_matrix, dist_coeffs)
+    #square
+    #rvec, tvec = estimate_head_pose(image_path, square_model_points,square_image_points, camera_matrix, dist_coeffs,cv2.SOLVEPNP_IPPE_SQUARE)
+    #not so good
+    #im_with_pose = draw_head_pose(im_with_pose, square_image_points, rvec, tvec, camera_matrix, dist_coeffs)
+    #print(rotation_matrix)
+    # 回転行列をオイラー角に変換
+    #euler_angles = cv2.decomposeProjectionMatrix(rotation_matrix)[-1]
+    # オイラー角の表示 (x, y, z)
+    # Display image
+    cv2.imshow("Output", cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+    cv2.imwrite("result.jpg",cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
+if __name__ == "__main__":
+    main()

demo_footer.html ADDED Viewed

	@@ -0,0 +1,3 @@

+<div>
+    <P> Images are generated with <a href="https://huggingface.co/black-forest-labs/FLUX.1-schnell">FLUX.1-schnell</a> and licensed under <a href="http://www.apache.org/licenses/LICENSE-2.0">the Apache 2.0 License</a>
+</div>

demo_header.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<div style="text-align: center;">
+    <h1>
+        Mediapipe Face-Pose Estimation plus OpenCV
+    </h1>
+    <div  class="grid-container">
+        <img src="https://akjava.github.io/AIDiagramChatWithVoice-FaceCharacter/webp/128/00544245.webp" alt="Mediapipe Face Detection" class="image">
+        <p class="text">
+            This Space use <a href="http://www.apache.org/licenses/LICENSE-2.0">the Apache 2.0</a> Licensed <a href="https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker">Mediapipe FaceLandmarker</a> <br>
+            "Current MediaPipe face-landmark detection struggle with faces rotated more than 45 degrees (due to limitations in training data).<br>
+            A new tool or method is needed to achieve improved accuracy."<br>
+            OpenCV:I've tried it out, but there's still room for improvement in how the features work together.<br>
+            TODO:change base-model
+        </p>
+    </div>
+</div>

demo_tools.html ADDED Viewed

	@@ -0,0 +1,11 @@

+<div style="text-align: center;">
+    <p>
+        <a href="https://huggingface.co/spaces/Akjava/flux1-schnell-img2img">Flux1-Img2Img(GPU)</a>  |
+        <a href="https://huggingface.co/spaces/Akjava/flux1-schnell-mask-inpaint">Flux1-Inpaint(GPU)</a>  |
+        <a href="https://huggingface.co/spaces/Akjava/mediapipe-68-points-facial-mask">Create 68 points Parts Mask</a> |
+        <a href="https://huggingface.co/spaces/Akjava/histgram-color-matching">Histgram Color Matching</a> |
+        <a href="https://huggingface.co/spaces/Akjava/WebPTalkHead">WebP anime with 3 images</a> |
+        <a href="https://huggingface.co/spaces/Akjava/WebP-Resize-Convert">WebP Resize Animation</a>
+    </p>
+    <p></p>
+</div>

examples/00002062.jpg ADDED Viewed

examples/00002200.jpg ADDED Viewed

examples/00003245_00.jpg ADDED Viewed

examples/00005259.jpg ADDED Viewed

examples/00018022.jpg ADDED Viewed

examples/00039259.jpg ADDED Viewed

examples/00100265.jpg ADDED Viewed

examples/00824006.jpg ADDED Viewed

examples/00824008.jpg ADDED Viewed

examples/00825000.jpg ADDED Viewed

examples/00826007.jpg ADDED Viewed

examples/00827009.jpg ADDED Viewed

examples/00828003.jpg ADDED Viewed

examples/02316230.jpg ADDED Viewed

examples/img-above.jpg ADDED Viewed

face_landmarker.task ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
+size 3758596

face_landmarker.task.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+Face landmark detection
+https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
+model card page is
+https://storage.googleapis.com/mediapipe-assets/MediaPipe%20BlazeFace%20Model%20Card%20(Short%20Range).pdf
+license is Apache2.0
+https://www.apache.org/licenses/LICENSE-2.0.html

glibvision/common_utils.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+def check_exists_files(files,dirs,exit_on_error=True):
+    if files is not None:
+        if isinstance(files, str):
+            files = [files]
+        for file in files:
+            if not os.path.isfile(file):
+                print(f"File {file} not found")
+                if exit_on_error:
+                    exit(1)
+                else:
+                    return 1
+    if dirs is not None:
+        if isinstance(dirs, str):
+            dirs = [dirs]
+        for dir in dirs:
+            if not os.path.isdir(dir):
+                print(f"Dir {dir} not found")
+                if exit_on_error:
+                    exit(1)
+                else:
+                    return 1
+    return 0
+image_extensions =[".jpg"]
+def add_name_suffix(file_name,suffix,replace_suffix=False):
+    if not suffix.startswith("_"):#force add
+        suffix="_"+suffix
+    name,ext = os.path.splitext(file_name)
+    if replace_suffix:
+        index = name.rfind("_")
+        if index!=-1:
+            return f"{name[0:index]}{suffix}{ext}"
+    return f"{name}{suffix}{ext}"
+def replace_extension(file_name,new_extension,suffix=None,replace_suffix=False):
+    if not new_extension.startswith("."):
+        new_extension="."+new_extension
+    name,ext = os.path.splitext(file_name)
+    new_file = f"{name}{new_extension}"
+    if suffix:
+        return add_name_suffix(name+new_extension,suffix,replace_suffix)
+    return new_file
+def list_digit_images(input_dir,sort=True):
+    digit_images = []
+    global image_extensions
+    files = os.listdir(input_dir)
+    for file in files:
+        if file.endswith(".jpg"):#TODO check image
+            base,ext = os.path.splitext(file)
+            if not base.isdigit():
+                continue
+            digit_images.append(file)
+    if sort:
+        digit_images.sort()
+    return digit_images
+def list_suffix_images(input_dir,suffix,is_digit=True,sort=True):
+    digit_images = []
+    global image_extensions
+    files = os.listdir(input_dir)
+    for file in files:
+        if file.endswith(".jpg"):#TODO check image
+            base,ext = os.path.splitext(file)
+            if base.endswith(suffix):
+                if is_digit:
+                    if not base.replace(suffix,"").isdigit():
+                        continue
+                digit_images.append(file)
+    if sort:
+        digit_images.sort()
+    return digit_images
+import time
+class ProgressTracker:
+    """
+    処理の進捗状況を追跡し、経過時間と残り時間を表示するクラス。
+    """
+    def __init__(self,key, total_target):
+        """
+        コンストラクタ
+        Args:
+            total_target (int): 処理対象の総数
+        """
+        self.key = key
+        self.total_target = total_target
+        self.complete_target = 0
+        self.start_time = time.time()
+    def update(self):
+        """
+        進捗を1つ進める。
+        経過時間と残り時間を表示する。
+        """
+        self.complete_target += 1
+        current_time = time.time()
+        consumed_time = current_time - self.start_time
+        remain_time = (consumed_time / self.complete_target) * (self.total_target - self.complete_target) if self.complete_target > 0 else 0
+        print(f"stepped {self.key} {self.total_target} of {self.complete_target}, consumed {(consumed_time / 60):.1f} min, remain {(remain_time / 60):.1f} min")

glibvision/cv2_utils.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import cv2
+import numpy as np
+#2024-11-27 add copy image
+def draw_bbox(image,box,color=(255,0,0),thickness=1):
+  if thickness==0:
+    return
+  left = int(box[0])
+  top = int(box[1])
+  right = int(box[0]+box[2])
+  bottom = int(box[1]+box[3])
+  box_points =[(left,top),(right,top),(right,bottom),(left,bottom)]
+  cv2.polylines(image, [np.array(box_points)], isClosed=True, color=color, thickness=thickness)
+def to_int_points(points):
+  int_points=[]
+  for point in points:
+    int_points.append([int(point[0]),int(point[1])])
+  return int_points
+def draw_text(img, text, point, font_scale=0.5, color=(200, 200, 200), thickness=1):
+  font = cv2.FONT_HERSHEY_SIMPLEX
+  cv2.putText(img, str(text), point, font, font_scale, color, thickness, cv2.LINE_AA)
+plot_text_color = (200, 200, 200)
+plot_text_font_scale = 0.5
+plot_index = 1
+plot_text = True
+def set_plot_text(is_plot,text_font_scale,text_color):
+  global plot_index,plot_text,plot_text_font_scale,plot_text_color
+  plot_text = is_plot
+  plot_index = 1
+  plot_text_font_scale = text_font_scale
+  plot_text_color = text_color
+def plot_points(image,points,isClosed=False,circle_size=3,circle_color=(255,0,0),line_size=1,line_color=(0,0,255)):
+    global plot_index,plot_text
+    int_points = to_int_points(points)
+    if circle_size>0:
+      for point in int_points:
+        cv2.circle(image,point,circle_size,circle_color,-1)
+        if plot_text:
+          draw_text(image,plot_index,point,plot_text_font_scale,plot_text_color)
+        plot_index+=1
+    if line_size>0:
+      cv2.polylines(image, [np.array(int_points)], isClosed=isClosed, color=line_color, thickness=line_size)
+def fill_points(image,points,thickness=1,line_color=(255,255,255),fill_color = (255,255,255)):
+    np_points = np.array(points,dtype=np.int32)
+    cv2.fillPoly(image, [np_points], fill_color)
+    cv2.polylines(image, [np_points], isClosed=True, color=line_color, thickness=thickness)
+def get_image_size(cv2_image):
+    return cv2_image.shape[:2]
+def get_channel(np_array):
+    return np_array.shape[2] if np_array.ndim == 3 else 1
+def get_numpy_text(np_array,key=""):
+    channel = get_channel(np_array)
+    return f"{key} shape = {np_array.shape} channel = {channel} ndim = {np_array.ndim} size = {np_array.size}"
+def gray3d_to_2d(grayscale: np.ndarray) -> np.ndarray:
+    channel = get_channel(grayscale)
+    if channel!=1:
+        raise ValueError(f"color maybe rgb or rgba {get_numpy_text(grayscale)}")
+    """
+    3 次元グレースケール画像 (チャンネル数 1) を 2 次元に変換する。
+    Args:
+        grayscale (np.ndarray): 3 次元グレースケール画像 (チャンネル数 1)。
+    Returns:
+        np.ndarray: 2 次元グレースケール画像。
+    """
+    if grayscale.ndim == 2:
+        return grayscale
+    return np.squeeze(grayscale)
+def blend_rgb_images(image1: np.ndarray, image2: np.ndarray, mask: np.ndarray) -> np.ndarray:
+    """
+    2 つの RGB 画像をマスク画像を使用してブレンドする。
+    Args:
+        image1 (np.ndarray): 最初の画像 (RGB)。
+        image2 (np.ndarray): 2 番目の画像 (RGB)。
+        mask (np.ndarray): マスク画像 (グレースケール)。
+    Returns:
+        np.ndarray: ブレンドされた画像 (RGB)。
+    Raises:
+        ValueError: 入力画像の形状が一致しない場合。
+    """
+    if image1.shape != image2.shape or image1.shape[:2] != mask.shape:
+        raise ValueError("入力画像の形状が一致しません。")
+    # 画像を float 型に変換
+    image1 = image1.astype(float)
+    image2 = image2.astype(float)
+    # マスクを 3 チャンネルに変換し、0-1 の範囲にスケール
+    alpha = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR).astype(float) / 255.0
+    # ブレンド計算
+    blended = (1 - alpha) * image1 + alpha * image2
+    return blended.astype(np.uint8)
+def create_color_image(img,color=(255,255,255)):
+    mask = np.zeros_like(img)
+    h, w = img.shape[:2]
+    cv2.rectangle(mask, (0, 0), (w, h), color, -1)
+    return mask
+def pil_to_bgr_image(image):
+  np_image = np.array(image, dtype=np.uint8)
+  if np_image.shape[2] == 4:
+      bgr_img = cv2.cvtColor(np_image, cv2.COLOR_RGBA2BGRA)
+  else:
+      bgr_img = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
+  return bgr_img
+def bgr_to_rgb(np_image):
+  if np_image.shape[2] == 4:
+      bgr_img = cv2.cvtColor(np_image, cv2.COLOR_RBGRA2RGBA)
+  else:
+      bgr_img = cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
+  return bgr_img
+def copy_image(img1: np.ndarray, img2: np.ndarray, x: int, y: int) -> None:
+    # チャネル数と次元数のチェック
+    if img1.ndim != 3 or img2.ndim != 3:
+        raise ValueError("Both img1 and img2 must be 3-dimensional arrays.")
+    elif img1.shape[2] != img2.shape[2]:
+        raise ValueError(f"img1 and img2 must have the same number of channels. img1 has {img1.shape[2]} channels, but img2 has {img2.shape[1]} channels.")
+    # Type check
+    if not isinstance(img1, np.ndarray) or not isinstance(img2, np.ndarray):
+        raise TypeError("img1 and img2 must be NumPy arrays.")
+    if x>=0:
+      offset_x=0
+      w = min(img1.shape[1]-x,img2.shape[1])
+    else:
+      w = min(img1.shape[1],img2.shape[1]+x)
+      offset_x=int(-x)
+      x = 0
+    if y>=0:
+      h = min(img1.shape[0]-y,img2.shape[0])
+      offset_y=0
+    else:
+      h = min(img1.shape[0]-y,img2.shape[0]+y)
+      offset_y=int(-y)
+      y = 0
+    x=int(x)
+    y=int(y)
+    h=int(h)
+    w=int(w)
+    print(f"img1 {img1.shape} img2{img2.shape} x={x} y={y} w={w} h={h}")
+    # Paste the overlapping part
+    img1[y:y+h, x:x+w] = img2[offset_y:h+offset_y, offset_x:w+offset_x]

glibvision/draw_utils.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# DrawUtils
+# not PIL,CV2,Numpy drawing method
+import math
+# 2024-11-29 add calculate_distance
+def points_to_box(points):
+  x1=float('inf')
+  x2=0
+  y1=float('inf')
+  y2=0
+  for point in points:
+    if point[0]<x1:
+      x1=point[0]
+    if point[0]>x2:
+      x2=point[0]
+    if point[1]<y1:
+      y1=point[1]
+    if point[1]>y2:
+      y2=point[1]
+  return [x1,y1,x2-x1,y2-y1]
+def box_to_point(box):
+  return [
+    [box[0],box[1]],
+    [box[0]+box[2],box[1]],
+    [box[0]+box[2],box[1]+box[3]],
+    [box[0],box[1]+box[3]]
+  ]
+def plus_point(base_pt,add_pt):
+  return [base_pt[0]+add_pt[0],base_pt[1]+add_pt[1]]
+def box_to_xy(box):
+  return [box[0],box[1],box[2]+box[0],box[3]+box[1]]
+def to_int_points(points):
+  int_points=[]
+  for point in points:
+    int_points.append([int(point[0]),int(point[1])])
+  return int_points
+def calculate_distance(xy, xy2):
+    return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)

glibvision/glandmark_utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+#simple single version
+def bbox_to_glandmarks(file_name,bbox,points = None):
+     base,ext = os.path.splitext(file_name)
+     glandmark = {"image":{
+            "boxes":[{
+                "left":int(bbox[0]),"top":int(bbox[1]),"width":int(bbox[2]),"height":int(bbox[3])
+            }],
+            "file":file_name,
+            "id":int(base)
+            # width,height ignore here
+            }}
+     if points is not None:
+          parts=[
+          ]
+          for point in points:
+               parts.append({"x":int(point[0]),"y":int(point[1])})
+          glandmark["image"]["boxes"][0]["parts"] = parts
+     return glandmark
+#technically this is not g-landmark/dlib ,
+def convert_to_landmark_group_json(points):
+     if len(points)!=68:
+          print(f"points must be 68 but {len(points)}")
+          return None
+     new_points=list(points)
+     result = [ # possible multi person ,just possible any func support multi person
+          {    # index start 0 but index-number start 1
+               "chin":new_points[0:17],
+               "left_eyebrow":new_points[17:22],
+               "right_eyebrow":new_points[22:27],
+               "nose_bridge":new_points[27:31],
+               "nose_tip":new_points[31:36],
+               "left_eye":new_points[36:42],
+               "right_eye":new_points[42:48],
+               # lip points customized structure
+               # MIT licensed face_recognition
+               # https://github.com/ageitgey/face_recognition
+               "top_lip":new_points[48:55]+[new_points[64]]+[new_points[63]]+[new_points[62]]+[new_points[61]]+[new_points[60]],
+               "bottom_lip":new_points[54:60]+[new_points[48]]+[new_points[60]]+[new_points[67]]+[new_points[66]]+[new_points[65]]+[new_points[64]],
+          }
+     ]
+     return result

glibvision/numpy_utils.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import numpy as np
+def apply_binary_mask_to_color(base_image,color,mask):
+    """
+    二値マスクを使用して、画像の一部を別の画像にコピーする。
+    Args:
+        base_image (np.ndarray): コピー先の画像。
+        paste_image (np.ndarray): コピー元の画像。
+        mask (np.ndarray): 二値マスク画像。
+    Returns:
+        np.ndarray: マスクを適用した画像。
+    """
+    # TODO check all shape
+    #print_numpy(base_image)
+    #print_numpy(paste_image)
+    #print_numpy(mask)
+    if mask.ndim == 2:
+        condition = mask == 255
+    else:
+        condition = mask[:,:,0] == 255
+    base_image[condition] = color
+    return base_image
+def apply_binary_mask_to_image(base_image,paste_image,mask):
+    """
+    二値マスクを使用して、画像の一部を別の画像にコピーする。
+    Args:
+        base_image (np.ndarray): コピー先の画像。
+        paste_image (np.ndarray): コピー元の画像。
+        mask (np.ndarray): 二値マスク画像。
+    Returns:
+        np.ndarray: マスクを適用した画像。
+    """
+    # TODO check all shape
+    #print_numpy(base_image)
+    #print_numpy(paste_image)
+    #print_numpy(mask)
+    if mask.ndim == 2:
+        condition = mask == 255
+    else:
+        condition = mask[:,:,0] == 255
+    base_image[condition] = paste_image[condition]
+    return base_image
+def pil_to_numpy(image):
+    return np.array(image, dtype=np.uint8)
+def extruce_points(points,index,ratio=1.5):
+    """
+    indexのポイントをratio倍だけ、点群の中心から、外側に膨らます。
+    """
+    center_point = np.mean(points, axis=0)
+    if index < 0 or index > len(points):
+        raise ValueError(f"index must be range(0,{len(points)} but value = {index})")
+    point1 =points[index]
+    print(f"center = {center_point}")
+    vec_to_center = point1 - center_point
+    return vec_to_center*ratio + center_point
+def bulge_polygon(points, bulge_factor=0.1,isClosed=True):
+    """
+    ポリゴンの辺の中間に点を追加し、外側に膨らませる
+    ndarrayを返すので注意
+    """
+    # 入力 points を NumPy 配列に変換
+    points = np.array(points)
+    # ポリゴン全体の重心を求める
+    center_point = np.mean(points, axis=0)
+    #print(f"center = {center_point}")
+    new_points = []
+    num_points = len(points)
+    for i in range(num_points):
+        if i == num_points -1 and not isClosed:
+            break
+        p1 = points[i]
+        #print(f"p{i} = {p1}")
+        # 重心から頂点へのベクトル
+        #vec_to_center = p1 - center_point
+        # 辺のベクトルを求める
+        mid_diff = points[(i + 1) % num_points] - p1
+        mid = p1+(mid_diff/2)
+        #print(f"mid = {mid}")
+        out_vec = mid - center_point
+        # 重心からのベクトルに bulge_vec を加算
+        new_point = mid + out_vec * bulge_factor
+        new_points.append(p1)
+        new_points.append(new_point.astype(np.int32))
+    return np.array(new_points)
+# image.shape rgb are (1024,1024,3) use 1024,1024 as 2-dimensional
+def create_2d_image(shape):
+    grayscale_image = np.zeros(shape[:2], dtype=np.uint8)
+    return grayscale_image

glibvision/pil_utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from PIL import Image,ImageDraw
+from .draw_utils import box_to_xy,to_int_points,box_to_point
+#ver-2024-11-18
+def create_color_image(width, height, color=(255,255,255)):
+    if color == None:
+        color = (0,0,0)
+    if len(color )== 3:
+        mode ="RGB"
+    elif len(color )== 4:
+        mode ="RGBA"
+    img = Image.new(mode, (width, height), color)
+    return img
+# deprecated
+def fill_points(image,points,color=(255,255,255)):
+    return draw_points(image,points,fill=color)
+def draw_points(image,points,outline=None,fill=None,width=1):
+    draw = ImageDraw.Draw(image)
+    int_points = [(int(x), int(y)) for x, y in points]
+    if outline is not  None or fill is not None:
+        draw.polygon(int_points, outline=outline,fill=fill,width=width)
+    return image
+def draw_box(image,box,outline=None,fill=None):
+    points = to_int_points(box_to_point(box))
+    return draw_points(image,points,outline,fill)
+def from_numpy(numpy_array):
+    return Image.fromarray(numpy_array)

gradio_utils.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import time
+import io
+import hashlib
+def clear_old_files(dir="files",passed_time=60*60):
+    try:
+        files = os.listdir(dir)
+        current_time = time.time()
+        for file in files:
+            file_path = os.path.join(dir,file)
+            ctime = os.stat(file_path).st_ctime
+            diff = current_time - ctime
+            #print(f"ctime={ctime},current_time={current_time},passed_time={passed_time},diff={diff}")
+            if diff > passed_time:
+                os.remove(file_path)
+    except:
+            print("maybe still gallery using error")
+def get_buffer_id(buffer):
+    hash_object = hashlib.sha256(buffer.getvalue())
+    hex_dig = hash_object.hexdigest()
+    unique_id = hex_dig[:32]
+    return unique_id
+def get_image_id(image):
+    buffer = io.BytesIO()
+    image.save(buffer, format='PNG')
+    return get_buffer_id(buffer)
+def save_image(image,extension="jpg",dir_name="files"):
+    id = get_image_id(image)
+    os.makedirs(dir_name,exist_ok=True)
+    file_path = f"{dir_name}/{id}.{extension}"
+    image.save(file_path)
+    return file_path
+def save_buffer(buffer,extension="webp",dir_name="files"):
+    id = get_buffer_id(buffer)
+    os.makedirs(dir_name,exist_ok=True)
+    file_path = f"{dir_name}/{id}.{extension}"
+    with open(file_path,"wb") as f:
+         f.write(buffer.getvalue())
+    return file_path
+def write_file(file_path,text):
+    with open(file_path, 'w', encoding='utf-8') as f:
+        f.write(text)
+def read_file(file_path):
+    """read the text of target file
+    """
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return content

mp_box.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+from mediapipe.framework.formats import landmark_pb2
+from mediapipe import solutions
+import numpy as np
+# heavy changed in gradio app
+# for X,Y,W,H to x1,y1,x2,y2(Left-top,right-bottom style)
+def xywh_to_xyxy(box):
+  return [box[0],box[1],box[0]+box[2],box[1]+box[3]]
+def to_int_box(box):
+  return [int(box[0]),int(box[1]),int(box[2]),int(box[3])]
+def convert_to_box(face_landmarks_list,indices,w=1024,h=1024):
+  x1=w
+  y1=h
+  x2=0
+  y2=0
+  for index in indices:
+    x=min(w,max(0,(face_landmarks_list[0][index].x*w)))
+    y=min(h,max(0,(face_landmarks_list[0][index].y*h)))
+    if x<x1:
+      x1=x
+    if y<y1:
+      y1=y
+    if x>x2:
+      x2=x
+    if y>y2:
+      y2=y
+  return [int(x1),int(y1),int(x2-x1),int(y2-y1)]
+def box_to_square(bbox):
+  box=list(bbox)
+  if box[2]>box[3]:
+    diff = box[2]-box[3]
+    box[3]+=diff
+    box[1]-=diff/2
+  elif box[3]>box[2]:
+    diff = box[3]-box[2]
+    box[2]+=diff
+    box[0]-=diff/2
+  return box
+def face_landmark_result_to_box(face_landmarker_result,width=1024,height=1024):
+  face_landmarks_list = face_landmarker_result.face_landmarks
+  full_indices  = list(range(456))
+  MIDDLE_FOREHEAD = 151
+  BOTTOM_CHIN_EX = 152
+  BOTTOM_CHIN = 175
+  CHIN_TO_MIDDLE_FOREHEAD = [200,14,1,6,18,9]
+  MOUTH_BOTTOM = [202,200,422]
+  EYEBROW_CHEEK_LEFT_RIGHT = [46,226,50,1,280,446,276]
+  LEFT_HEAD_OUTER_EX = 251  #on side face almost same as full
+  LEFT_HEAD_OUTER = 301
+  LEFT_EYE_OUTER_EX = 356
+  LEFT_EYE_OUTER = 264
+  LEFT_MOUTH_OUTER_EX = 288
+  LEFT_MOUTH_OUTER = 288
+  LEFT_CHIN_OUTER = 435
+  RIGHT_HEAD_OUTER_EX = 21
+  RIGHT_HEAD_OUTER = 71
+  RIGHT_EYE_OUTER_EX = 127
+  RIGHT_EYE_OUTER = 34
+  RIGHT_MOUTH_OUTER_EX = 58
+  RIGHT_MOUTH_OUTER = 215
+  RIGHT_CHIN_OUTER = 150
+  # TODO naming line
+  min_indices=CHIN_TO_MIDDLE_FOREHEAD+EYEBROW_CHEEK_LEFT_RIGHT+MOUTH_BOTTOM
+  chin_to_brow_indices = [LEFT_CHIN_OUTER,LEFT_MOUTH_OUTER,LEFT_EYE_OUTER,LEFT_HEAD_OUTER,MIDDLE_FOREHEAD,RIGHT_HEAD_OUTER,RIGHT_EYE_OUTER,RIGHT_MOUTH_OUTER,RIGHT_CHIN_OUTER,BOTTOM_CHIN]+min_indices
+  box1 = convert_to_box(face_landmarks_list,min_indices,width,height)
+  box2 = convert_to_box(face_landmarks_list,chin_to_brow_indices,width,height)
+  box3 = convert_to_box(face_landmarks_list,full_indices,width,height)
+  #print(box)
+  return [box1,box2,box3,box_to_square(box1),box_to_square(box2),box_to_square(box3)]
+def draw_landmarks_on_image(detection_result,rgb_image):
+  face_landmarks_list = detection_result.face_landmarks
+  annotated_image = np.copy(rgb_image)
+  # Loop through the detected faces to visualize.
+  for idx in range(len(face_landmarks_list)):
+    face_landmarks = face_landmarks_list[idx]
+    # Draw the face landmarks.
+    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
+    face_landmarks_proto.landmark.extend([
+      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
+    ])
+    solutions.drawing_utils.draw_landmarks(
+        image=annotated_image,
+        landmark_list=face_landmarks_proto,
+        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=mp.solutions.drawing_styles
+        .get_default_face_mesh_tesselation_style())
+  return annotated_image
+def mediapipe_to_box(image_data,model_path="face_landmarker.task"):
+  BaseOptions = mp.tasks.BaseOptions
+  FaceLandmarker = mp.tasks.vision.FaceLandmarker
+  FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
+  VisionRunningMode = mp.tasks.vision.RunningMode
+  options = FaceLandmarkerOptions(
+      base_options=BaseOptions(model_asset_path=model_path),
+      running_mode=VisionRunningMode.IMAGE
+      ,min_face_detection_confidence=0, min_face_presence_confidence=0
+      )
+  with FaceLandmarker.create_from_options(options) as landmarker:
+    if isinstance(image_data,str):
+        mp_image = mp.Image.create_from_file(image_data)
+    else:
+        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(image_data))
+    face_landmarker_result = landmarker.detect(mp_image)
+    boxes = face_landmark_result_to_box(face_landmarker_result,mp_image.width,mp_image.height)
+    return boxes,mp_image,face_landmarker_result

mp_utils.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import math
+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+from mediapipe.framework.formats import landmark_pb2
+from mediapipe import solutions
+import numpy as np
+# 2024-11-27 -extract_landmark :add args
+# add get_pixel_xyz
+# 2024-11-28 add get_normalized_xyz
+def calculate_distance(p1, p2):
+  """
+  """
+  return math.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)
+def to_int_points(points):
+    ints=[]
+    for pt in points:
+        #print(pt)
+        value = [int(pt[0]),int(pt[1])]
+        #print(value)
+        ints.append(value)
+    return ints
+debug = False
+def divide_line_to_points(points,divided): # return divided + 1
+    total_length = 0
+    line_length_list = []
+    for i in range(len(points)-1):
+        pt_length = calculate_distance(points[i],points[i+1])
+        total_length += pt_length
+        line_length_list.append(pt_length)
+    splited_length = total_length/divided
+    def get_new_point(index,lerp):
+        pt1 = points[index]
+        pt2 = points[index+1]
+        diff = [pt2[0] - pt1[0], pt2[1]-pt1[1]]
+        new_point = [pt1[0]+diff[0]*lerp,pt1[1]+diff[1]*lerp]
+        if debug:
+          print(f"pt1 ={pt1}  pt2 ={pt2} diff={diff} new_point={new_point}")
+        return new_point
+    if debug:
+      print(f"{total_length} splitted = {splited_length} line-length-list = {len(line_length_list)}")
+    splited_points=[points[0]]
+    for i in range(1,divided):
+        need_length = splited_length*i
+        if debug:
+          print(f"{i} need length = {need_length}")
+        current_length = 0
+        for j in range(len(line_length_list)):
+            line_length = line_length_list[j]
+            current_length+=line_length
+            if current_length>need_length:
+                if debug:
+                  print(f"over need length index = {j} current={current_length}")
+                diff = current_length - need_length
+                lerp_point = 1.0 - (diff/line_length)
+                if debug:
+                  print(f"over = {diff} lerp ={lerp_point}")
+                new_point = get_new_point(j,lerp_point)
+                splited_points.append(new_point)
+                break
+    splited_points.append(points[-1]) # last one
+    splited_points=to_int_points(splited_points)
+    if debug:
+      print(f"sp={len(splited_points)}")
+    return splited_points
+def expand_bbox(bbox,left=5,top=5,right=5,bottom=5):
+   left_pixel = bbox[2]*(float(left)/100)
+   top_pixel = bbox[3]*(float(top)/100)
+   right_pixel = bbox[2]*(float(right)/100)
+   bottom_pixel = bbox[3]*(float(bottom)/100)
+   new_box = list(bbox)
+   new_box[0] -=left_pixel
+   new_box[1] -=top_pixel
+   new_box[2] +=left_pixel+right_pixel
+   new_box[3] +=top_pixel+bottom_pixel
+   return new_box
+#normalized value index see mp_constants
+def get_normalized_cordinate(face_landmarks_list,index):
+    x=face_landmarks_list[0][index].x
+    y=face_landmarks_list[0][index].y
+    return x,y
+def get_normalized_xyz(face_landmarks_list,index):
+    x=face_landmarks_list[0][index].x
+    y=face_landmarks_list[0][index].y
+    z=face_landmarks_list[0][index].z
+    return x,y,z
+# z is normalized
+def get_pixel_xyz(face_landmarks_list,landmark,width,height):
+    point = get_normalized_cordinate(face_landmarks_list,landmark)
+    z = y=face_landmarks_list[0][landmark].z
+    return int(point[0]*width),int(point[1]*height),z
+def get_pixel_cordinate(face_landmarks_list,landmark,width,height):
+    point = get_normalized_cordinate(face_landmarks_list,landmark)
+    return int(point[0]*width),int(point[1]*height)
+def get_pixel_cordinate_list(face_landmarks_list,indices,width,height):
+   cordinates = []
+   for index in indices:
+      cordinates.append(get_pixel_cordinate(face_landmarks_list,index,width,height))
+   return cordinates
+def extract_landmark(image_data,model_path="face_landmarker.task",min_face_detection_confidence=0, min_face_presence_confidence=0,output_facial_transformation_matrixes=False):
+  BaseOptions = mp.tasks.BaseOptions
+  FaceLandmarker = mp.tasks.vision.FaceLandmarker
+  FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
+  VisionRunningMode = mp.tasks.vision.RunningMode
+  options = FaceLandmarkerOptions(
+      base_options=BaseOptions(model_asset_path=model_path),
+      running_mode=VisionRunningMode.IMAGE
+      ,min_face_detection_confidence=min_face_detection_confidence, min_face_presence_confidence=min_face_presence_confidence,
+      output_facial_transformation_matrixes=output_facial_transformation_matrixes
+      )
+  with FaceLandmarker.create_from_options(options) as landmarker:
+    if isinstance(image_data,str):
+        mp_image = mp.Image.create_from_file(image_data)
+    else:
+        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(image_data))
+    face_landmarker_result = landmarker.detect(mp_image)
+    return mp_image,face_landmarker_result