feat: detecting human features

This commit is contained in:
2025-04-23 13:09:41 +02:00
parent f13878d8bc
commit bcfc90acdf
2 changed files with 44 additions and 38 deletions

View File

@@ -3,10 +3,9 @@ import urllib.request
from typing import Dict, List
import json
from ultralytics import YOLO
import cv2
import numpy as np
import mediapipe as mp
MODEL_PATH = Path("assets", "models")
@@ -39,52 +38,39 @@ def require_net(name: str):
)
def detect_humans_mediapipe(to_detect: str):
def detect_human_parts(human: dict):
parts = human["parts"]
to_detect = human["crop"]["file"]
_p = Path(to_detect)
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
print(f"detecting humans: {to_detect} => {detected}")
print(f"detecting human parts: {to_detect} => {detected}")
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
model = YOLO('yolov8n-pose.pt') # You can also try 'yolov8s-pose.pt' for better accuracy
results = model(to_detect)[0]
# Load your image
image = cv2.imread(to_detect)
# Convert the BGR image to RGB (MediaPipe uses RGB format)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
did_detect = False
for person in results.keypoints.data:
keypoints = person.cpu().numpy()
# Perform pose detection
results = pose.process(image_rgb)
# Common keypoints: 0=nose, 5=left_shoulder, 11=left_hip, 15=left_foot
head = tuple(map(int, keypoints[0][:2]))
foot = tuple(map(int, keypoints[15][:2]))
# Draw landmarks on the image (optional)
if results.pose_landmarks:
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
cv2.circle(image, head, 5, (255, 0, 0), -1) # Head in blue
cv2.circle(image, foot, 5, (0, 0, 255), -1) # Foot in red
did_detect = True
# You can extract specific keypoints (head, feet, etc.)
# Example: 0 = Nose, 15 = Left Foot, 16 = Right Foot
head = results.pose_landmarks.landmark[0] # Nose landmark
left_foot = results.pose_landmarks.landmark[15] # Left Foot
right_foot = results.pose_landmarks.landmark[16] # Right Foot
# Convert to pixel coordinates
h, w, _ = image.shape
head_coords = int(head.x * w), int(head.y * h)
left_foot_coords = int(left_foot.x * w), int(left_foot.y * h)
right_foot_coords = int(right_foot.x * w), int(right_foot.y * h)
# Draw head and feet positions on the image
cv2.circle(image, head_coords, 5, (255, 0, 0), -1) # Head in blue
cv2.circle(image, left_foot_coords, 5, (0, 0, 255), -1) # Left foot in red
cv2.circle(image, right_foot_coords, 5, (0, 0, 255), -1) # Right foot in red
# Save the result
cv2.imwrite(detected, image)
if did_detect:
cv2.imwrite(detected, image)
def detect_humans(to_detect: str):
def detect_humans(to_detect: str, crop_padding: int = 20):
_p = Path(to_detect)
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
@@ -143,7 +129,16 @@ def detect_humans(to_detect: str):
x, y, w, h = boxes[i]
human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
human_crop = original_image[y:y+h, x:x+w]
image_height, image_width = image.shape[:2]
# Compute safe crop coordinates with padding
x1 = max(x - crop_padding, 0)
y1 = max(y - crop_padding, 0)
x2 = min(x + w + crop_padding, image_width)
y2 = min(y + h + crop_padding, image_height)
human_crop = original_image[y1:y2, x1:x2]
cv2.imwrite(str(human_part_image_path), human_crop)
print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
@@ -152,7 +147,14 @@ def detect_humans(to_detect: str):
"y": y,
"w": w,
"h": h,
"cropped": str(human_part_image_path)
"crop": {
"file": str(human_part_image_path),
"x": x1,
"y": y,
"w": x2 - x1,
"h": y2 - y1,
},
"parts": {},
})
@@ -163,3 +165,6 @@ def detect_humans(to_detect: str):
with open(boxes_file, "w") as f:
json.dump(boxes_structures, f)
cv2.imwrite(detected, image)
for human in human_boxes:
detect_human_parts(human["crop"]["file"])