Spaces:

Afnan214
/

pose-detection

Sleeping

App Files Files Community

pose-detection / app.py

Afnan214

creditting

13420d7 unverified 4 months ago

raw

history blame contribute delete

4.17 kB

	import cv2
	import streamlit as st
	import tempfile
	import numpy as np
	from face_detection import FaceDetector
	from mark_detection import MarkDetector
	from pose_estimation import PoseEstimator
	from utils import refine
	from PIL import Image
	st.title("Head Pose Estimation")
	st.text("Just a heads up (pun intended)... The code used for this space is largely borrowed from https://github.com/yinguobing/head-pose-estimation. Slightly altered to fit image needs and make it work on huggingface.")
	# Choose between Image or Video file upload
	file_type = st.selectbox("Choose the type of file you want to upload", ("Image", "Video"))
	uploaded_file = st.file_uploader(
	"Upload an image or video file of your face",
	type=["jpg", "jpeg", "png", "mp4", "mov", "avi", "mkv"]
	)

	# Display placeholder for real-time video output
	FRAME_WINDOW = st.image([])

	if uploaded_file is not None:
	# Video processing
	if file_type == "Video":
	tfile = tempfile.NamedTemporaryFile(delete=False)
	tfile.write(uploaded_file.read())
	cap = cv2.VideoCapture(tfile.name)
	st.write(f"Video source: {tfile.name}")

	# Getting frame sizes
	frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	# Initialize face detection, landmark detection, and pose estimation models
	face_detector = FaceDetector("assets/face_detector.onnx")
	mark_detector = MarkDetector("assets/face_landmarks.onnx")
	pose_estimator = PoseEstimator(frame_width, frame_height)

	# Process each frame
	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Step 1: Detect faces in the frame
	faces, _ = face_detector.detect(frame, 0.7)

	# If a face is detected, proceed with pose estimation
	if len(faces) > 0:
	# Detect landmarks for the first face
	face = refine(faces, frame_width, frame_height, 0.15)[0]
	x1, y1, x2, y2 = face[:4].astype(int)
	patch = frame[y1:y2, x1:x2]

	# Run landmark detection and convert local face area to global image
	marks = mark_detector.detect([patch])[0].reshape([68, 2])
	marks *= (x2 - x1)
	marks[:, 0] += x1
	marks[:, 1] += y1

	# Pose estimation with the detected landmarks
	pose = pose_estimator.solve(marks)

	# Draw the pose on the frame
	pose_estimator.visualize(frame, pose, color=(0, 255, 0))

	# Convert frame to RGB for Streamlit display
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	FRAME_WINDOW.image(frame_rgb)

	cap.release()

	# Image processing
	elif file_type == "Image":
	# Load and process uploaded image
	image = np.array(Image.open(uploaded_file))
	frame_height, frame_width, _ = image.shape

	# Initialize models for detection and pose estimation
	face_detector = FaceDetector("assets/face_detector.onnx")
	mark_detector = MarkDetector("assets/face_landmarks.onnx")
	pose_estimator = PoseEstimator(frame_width, frame_height)

	# Detect face and landmarks
	faces, _ = face_detector.detect(image, 0.7)
	if len(faces) > 0:
	face = refine(faces, frame_width, frame_height, 0.15)[0]
	x1, y1, x2, y2 = face[:4].astype(int)
	patch = image[y1:y2, x1:x2]

	# Detect landmarks and map them to global image coordinates
	marks = mark_detector.detect([patch])[0].reshape([68, 2])
	marks *= (x2 - x1)
	marks[:, 0] += x1
	marks[:, 1] += y1

	# Estimate pose and visualize on image
	pose = pose_estimator.solve(marks)
	pose_estimator.visualize(image, pose, color=(0, 255, 0))

	# Convert image to RGB and display in Streamlit
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	st.image(image_rgb, caption="Pose Estimated Image", use_column_width=True)