feat: improved human detection

2025-04-23 12:42:19 +02:00
parent 2951becc00
commit f13878d8bc
3 changed files with 69 additions and 6 deletions
--- a/secure_pixelation/main.py
+++ b/secure_pixelation/main.py
@@ -4,4 +4,4 @@ from .detect_humans import detect_humans
 def cli():
    print(f"Running secure_pixelation")

-    detect_humans("assets/humans.png")
+    detect_humans("assets/human_detection/humans.png")
--- a/secure_pixelation/detect_humans.py
+++ b/secure_pixelation/detect_humans.py
@@ -1,10 +1,11 @@
 from pathlib import Path
 import urllib.request
 from typing import Dict, List
+import json

 import cv2
-import imutils
 import numpy as np
+import mediapipe as mp



@@ -38,13 +39,55 @@ def require_net(name: str):
        )


+def detect_humans_mediapipe(to_detect: str):
+    _p = Path(to_detect)
+    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
+    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
+    print(f"detecting humans: {to_detect} => {detected}")

-# print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
+    # Initialize MediaPipe Pose
+    mp_pose = mp.solutions.pose
+    pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+    mp_drawing = mp.solutions.drawing_utils
+
+    # Load your image
+    image = cv2.imread(to_detect)
+
+    # Convert the BGR image to RGB (MediaPipe uses RGB format)
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+    # Perform pose detection
+    results = pose.process(image_rgb)
+
+    # Draw landmarks on the image (optional)
+    if results.pose_landmarks:
+        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
+
+        # You can extract specific keypoints (head, feet, etc.)
+        # Example: 0 = Nose, 15 = Left Foot, 16 = Right Foot
+        head = results.pose_landmarks.landmark[0]  # Nose landmark
+        left_foot = results.pose_landmarks.landmark[15]  # Left Foot
+        right_foot = results.pose_landmarks.landmark[16]  # Right Foot
+
+        # Convert to pixel coordinates
+        h, w, _ = image.shape
+        head_coords = int(head.x * w), int(head.y * h)
+        left_foot_coords = int(left_foot.x * w), int(left_foot.y * h)
+        right_foot_coords = int(right_foot.x * w), int(right_foot.y * h)
+
+        # Draw head and feet positions on the image
+        cv2.circle(image, head_coords, 5, (255, 0, 0), -1)  # Head in blue
+        cv2.circle(image, left_foot_coords, 5, (0, 0, 255), -1)  # Left foot in red
+        cv2.circle(image, right_foot_coords, 5, (0, 0, 255), -1)  # Right foot in red
+
+    # Save the result
+    cv2.imwrite(detected, image)


 def detect_humans(to_detect: str):
    _p = Path(to_detect)
-    detected = str(_p.with_name(_p.stem + ".detected" + _p.suffix))
+    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
+    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
    print(f"detecting humans: {to_detect} => {detected}")

    require_net("yolov3")
@@ -58,6 +101,7 @@ def detect_humans(to_detect: str):

    # Load image
    image = cv2.imread(to_detect)
+    original_image = cv2.imread(to_detect)
    height, width, channels = image.shape

    # Create blob and do forward pass
@@ -88,14 +132,34 @@ def detect_humans(to_detect: str):
    # Apply Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)

+    boxes_structures = {}
+    human_boxes = boxes_structures["humans"] = []
+
+    human_part_folder = _p.with_name(_p.stem + "_parts")
+    human_part_folder.mkdir(exist_ok=True)
+
    for i in indices:
        i = i[0] if isinstance(i, (list, np.ndarray)) else i  # Flatten index if needed
        x, y, w, h = boxes[i]

+        human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
+        human_crop = original_image[y:y+h, x:x+w]
+        cv2.imwrite(str(human_part_image_path), human_crop)
+
        print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
+        human_boxes.append({
+            "x": x,
+            "y": y,
+            "w": w,
+            "h": h,
+            "cropped": str(human_part_image_path)
+        })
+
+
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)


    # Save the result
+    with open(boxes_file, "w") as f:
+        json.dump(boxes_structures, f)
    cv2.imwrite(detected, image)
-