feat: detecting human features

2025-04-23 13:09:41 +02:00 · 2025-04-23 13:09:41 +02:00 · bcfc90acdf
commit bcfc90acdf
parent f13878d8bc
2 changed files with 44 additions and 38 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -3,6 +3,7 @@ name = "secure_pixelation"
 version = "0.0.0"
 dependencies = [
    "opencv_python~=4.11.0.86",
+    "ultralytics~=8.3.114",
 ]
 authors = []
 description = "Hiding faces with Mosaic has proven incredibly unsafe especially with videos, because the algorythm isn't destructive. However, if you black out the selected area, repopulate it with generative ai, and then pixelate it, it should look authentic, but be 100% destructive, thus safe."
--- a/secure_pixelation/detect_humans.py
+++ b/secure_pixelation/detect_humans.py
@ -3,10 +3,9 @@ import urllib.request
 from typing import Dict, List
 import json

+from ultralytics import YOLO
 import cv2
 import numpy as np
-import mediapipe as mp
-


 MODEL_PATH = Path("assets", "models")
@ -39,52 +38,39 @@ def require_net(name: str):
        )


-def detect_humans_mediapipe(to_detect: str):
+def detect_human_parts(human: dict):
+    parts = human["parts"]
+
+
+    to_detect = human["crop"]["file"]
    _p = Path(to_detect)
    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
-    print(f"detecting humans: {to_detect} => {detected}")
+    print(f"detecting human parts: {to_detect} => {detected}")

-    # Initialize MediaPipe Pose
-    mp_pose = mp.solutions.pose
-    pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
-    mp_drawing = mp.solutions.drawing_utils
+    model = YOLO('yolov8n-pose.pt')  # You can also try 'yolov8s-pose.pt' for better accuracy
+
+    results = model(to_detect)[0]

-    # Load your image
    image = cv2.imread(to_detect)

-    # Convert the BGR image to RGB (MediaPipe uses RGB format)
-    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    did_detect = False
+    for person in results.keypoints.data:
+        keypoints = person.cpu().numpy()

-    # Perform pose detection
-    results = pose.process(image_rgb)
+        # Common keypoints: 0=nose, 5=left_shoulder, 11=left_hip, 15=left_foot
+        head = tuple(map(int, keypoints[0][:2]))
+        foot = tuple(map(int, keypoints[15][:2]))

-    # Draw landmarks on the image (optional)
-    if results.pose_landmarks:
-        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
+        cv2.circle(image, head, 5, (255, 0, 0), -1)   # Head in blue
+        cv2.circle(image, foot, 5, (0, 0, 255), -1)   # Foot in red
+        did_detect = True

-        # You can extract specific keypoints (head, feet, etc.)
-        # Example: 0 = Nose, 15 = Left Foot, 16 = Right Foot
-        head = results.pose_landmarks.landmark[0]  # Nose landmark
-        left_foot = results.pose_landmarks.landmark[15]  # Left Foot
-        right_foot = results.pose_landmarks.landmark[16]  # Right Foot
-
-        # Convert to pixel coordinates
-        h, w, _ = image.shape
-        head_coords = int(head.x * w), int(head.y * h)
-        left_foot_coords = int(left_foot.x * w), int(left_foot.y * h)
-        right_foot_coords = int(right_foot.x * w), int(right_foot.y * h)
-
-        # Draw head and feet positions on the image
-        cv2.circle(image, head_coords, 5, (255, 0, 0), -1)  # Head in blue
-        cv2.circle(image, left_foot_coords, 5, (0, 0, 255), -1)  # Left foot in red
-        cv2.circle(image, right_foot_coords, 5, (0, 0, 255), -1)  # Right foot in red
-
-    # Save the result
-    cv2.imwrite(detected, image)
+    if did_detect:
+        cv2.imwrite(detected, image)


-def detect_humans(to_detect: str):
+def detect_humans(to_detect: str, crop_padding: int = 20):
    _p = Path(to_detect)
    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
@ -143,7 +129,16 @@ def detect_humans(to_detect: str):
        x, y, w, h = boxes[i]

        human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
-        human_crop = original_image[y:y+h, x:x+w]
+
+        image_height, image_width = image.shape[:2]
+
+        # Compute safe crop coordinates with padding
+        x1 = max(x - crop_padding, 0)
+        y1 = max(y - crop_padding, 0)
+        x2 = min(x + w + crop_padding, image_width)
+        y2 = min(y + h + crop_padding, image_height)
+        human_crop = original_image[y1:y2, x1:x2]
+
        cv2.imwrite(str(human_part_image_path), human_crop)

        print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
@ -152,7 +147,14 @@ def detect_humans(to_detect: str):
            "y": y,
            "w": w,
            "h": h,
-            "cropped": str(human_part_image_path)
+            "crop": {
+                "file": str(human_part_image_path),
+                "x": x1,
+                "y": y,
+                "w": x2 - x1,
+                "h": y2 - y1,
+            },
+            "parts": {},
        })


@ -163,3 +165,6 @@ def detect_humans(to_detect: str):
    with open(boxes_file, "w") as f:
        json.dump(boxes_structures, f)
    cv2.imwrite(detected, image)
+
+    for human in human_boxes:
+        detect_human_parts(human["crop"]["file"])