feat: detecting human features

2025-04-23 13:09:41 +02:00
parent f13878d8bc
commit bcfc90acdf
2 changed files with 44 additions and 38 deletions
--- a/secure_pixelation/detect_humans.py
+++ b/secure_pixelation/detect_humans.py
@@ -3,10 +3,9 @@ import urllib.request
 from typing import Dict, List
 import json

+from ultralytics import YOLO
 import cv2
 import numpy as np
-import mediapipe as mp
-


 MODEL_PATH = Path("assets", "models")
@@ -39,52 +38,39 @@ def require_net(name: str):
        )


-def detect_humans_mediapipe(to_detect: str):
+def detect_human_parts(human: dict):
+    parts = human["parts"]
+
+
+    to_detect = human["crop"]["file"]
    _p = Path(to_detect)
    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
-    print(f"detecting humans: {to_detect} => {detected}")
+    print(f"detecting human parts: {to_detect} => {detected}")

-    # Initialize MediaPipe Pose
-    mp_pose = mp.solutions.pose
-    pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
-    mp_drawing = mp.solutions.drawing_utils
+    model = YOLO('yolov8n-pose.pt')  # You can also try 'yolov8s-pose.pt' for better accuracy
+
+    results = model(to_detect)[0]

-    # Load your image
    image = cv2.imread(to_detect)

-    # Convert the BGR image to RGB (MediaPipe uses RGB format)
-    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    did_detect = False
+    for person in results.keypoints.data:
+        keypoints = person.cpu().numpy()

-    # Perform pose detection
-    results = pose.process(image_rgb)
+        # Common keypoints: 0=nose, 5=left_shoulder, 11=left_hip, 15=left_foot
+        head = tuple(map(int, keypoints[0][:2]))
+        foot = tuple(map(int, keypoints[15][:2]))

-    # Draw landmarks on the image (optional)
-    if results.pose_landmarks:
-        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
+        cv2.circle(image, head, 5, (255, 0, 0), -1)   # Head in blue
+        cv2.circle(image, foot, 5, (0, 0, 255), -1)   # Foot in red
+        did_detect = True

-        # You can extract specific keypoints (head, feet, etc.)
-        # Example: 0 = Nose, 15 = Left Foot, 16 = Right Foot
-        head = results.pose_landmarks.landmark[0]  # Nose landmark
-        left_foot = results.pose_landmarks.landmark[15]  # Left Foot
-        right_foot = results.pose_landmarks.landmark[16]  # Right Foot
-
-        # Convert to pixel coordinates
-        h, w, _ = image.shape
-        head_coords = int(head.x * w), int(head.y * h)
-        left_foot_coords = int(left_foot.x * w), int(left_foot.y * h)
-        right_foot_coords = int(right_foot.x * w), int(right_foot.y * h)
-
-        # Draw head and feet positions on the image
-        cv2.circle(image, head_coords, 5, (255, 0, 0), -1)  # Head in blue
-        cv2.circle(image, left_foot_coords, 5, (0, 0, 255), -1)  # Left foot in red
-        cv2.circle(image, right_foot_coords, 5, (0, 0, 255), -1)  # Right foot in red
-
-    # Save the result
-    cv2.imwrite(detected, image)
+    if did_detect:
+        cv2.imwrite(detected, image)


-def detect_humans(to_detect: str):
+def detect_humans(to_detect: str, crop_padding: int = 20):
    _p = Path(to_detect)
    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
@@ -143,7 +129,16 @@ def detect_humans(to_detect: str):
        x, y, w, h = boxes[i]

        human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
-        human_crop = original_image[y:y+h, x:x+w]
+
+        image_height, image_width = image.shape[:2]
+
+        # Compute safe crop coordinates with padding
+        x1 = max(x - crop_padding, 0)
+        y1 = max(y - crop_padding, 0)
+        x2 = min(x + w + crop_padding, image_width)
+        y2 = min(y + h + crop_padding, image_height)
+        human_crop = original_image[y1:y2, x1:x2]
+
        cv2.imwrite(str(human_part_image_path), human_crop)

        print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
@@ -152,7 +147,14 @@ def detect_humans(to_detect: str):
            "y": y,
            "w": w,
            "h": h,
-            "cropped": str(human_part_image_path)
+            "crop": {
+                "file": str(human_part_image_path),
+                "x": x1,
+                "y": y,
+                "w": x2 - x1,
+                "h": y2 - y1,
+            },
+            "parts": {},
        })


@@ -163,3 +165,6 @@ def detect_humans(to_detect: str):
    with open(boxes_file, "w") as f:
        json.dump(boxes_structures, f)
    cv2.imwrite(detected, image)
+
+    for human in human_boxes:
+        detect_human_parts(human["crop"]["file"])