feat: improved human detection

2025-04-23 12:42:19 +02:00
parent 2951becc00
commit f13878d8bc
3 changed files with 69 additions and 6 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,6 @@ name = "secure_pixelation"
 version = "0.0.0"
 dependencies = [
    "opencv_python~=4.11.0.86",
-    "imutils~=0.5.4",
 ]
 authors = []
 description = "Hiding faces with Mosaic has proven incredibly unsafe especially with videos, because the algorythm isn't destructive. However, if you black out the selected area, repopulate it with generative ai, and then pixelate it, it should look authentic, but be 100% destructive, thus safe."
--- a/secure_pixelation/main.py
+++ b/secure_pixelation/main.py
@@ -4,4 +4,4 @@ from .detect_humans import detect_humans
 def cli():
    print(f"Running secure_pixelation")

-    detect_humans("assets/humans.png")
+    detect_humans("assets/human_detection/humans.png")
--- a/secure_pixelation/detect_humans.py
+++ b/secure_pixelation/detect_humans.py
@@ -1,10 +1,11 @@
 from pathlib import Path
 import urllib.request
 from typing import Dict, List
+import json

 import cv2
-import imutils
 import numpy as np
+import mediapipe as mp



@@ -38,13 +39,55 @@ def require_net(name: str):
        )


+def detect_humans_mediapipe(to_detect: str):
+    _p = Path(to_detect)
+    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
+    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
+    print(f"detecting humans: {to_detect} => {detected}")

-# print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
+    # Initialize MediaPipe Pose
+    mp_pose = mp.solutions.pose
+    pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+    mp_drawing = mp.solutions.drawing_utils
+
+    # Load your image
+    image = cv2.imread(to_detect)
+
+    # Convert the BGR image to RGB (MediaPipe uses RGB format)
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+    # Perform pose detection
+    results = pose.process(image_rgb)
+
+    # Draw landmarks on the image (optional)
+    if results.pose_landmarks:
+        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
+
+        # You can extract specific keypoints (head, feet, etc.)
+        # Example: 0 = Nose, 15 = Left Foot, 16 = Right Foot
+        head = results.pose_landmarks.landmark[0]  # Nose landmark
+        left_foot = results.pose_landmarks.landmark[15]  # Left Foot
+        right_foot = results.pose_landmarks.landmark[16]  # Right Foot
+
+        # Convert to pixel coordinates
+        h, w, _ = image.shape
+        head_coords = int(head.x * w), int(head.y * h)
+        left_foot_coords = int(left_foot.x * w), int(left_foot.y * h)
+        right_foot_coords = int(right_foot.x * w), int(right_foot.y * h)
+
+        # Draw head and feet positions on the image
+        cv2.circle(image, head_coords, 5, (255, 0, 0), -1)  # Head in blue
+        cv2.circle(image, left_foot_coords, 5, (0, 0, 255), -1)  # Left foot in red
+        cv2.circle(image, right_foot_coords, 5, (0, 0, 255), -1)  # Right foot in red
+
+    # Save the result
+    cv2.imwrite(detected, image)


 def detect_humans(to_detect: str):
    _p = Path(to_detect)
-    detected = str(_p.with_name(_p.stem + ".detected" + _p.suffix))
+    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
+    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
    print(f"detecting humans: {to_detect} => {detected}")

    require_net("yolov3")
@@ -58,6 +101,7 @@ def detect_humans(to_detect: str):

    # Load image
    image = cv2.imread(to_detect)
+    original_image = cv2.imread(to_detect)
    height, width, channels = image.shape

    # Create blob and do forward pass
@@ -88,14 +132,34 @@ def detect_humans(to_detect: str):
    # Apply Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)

+    boxes_structures = {}
+    human_boxes = boxes_structures["humans"] = []
+
+    human_part_folder = _p.with_name(_p.stem + "_parts")
+    human_part_folder.mkdir(exist_ok=True)
+
    for i in indices:
        i = i[0] if isinstance(i, (list, np.ndarray)) else i  # Flatten index if needed
        x, y, w, h = boxes[i]

+        human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
+        human_crop = original_image[y:y+h, x:x+w]
+        cv2.imwrite(str(human_part_image_path), human_crop)
+
        print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
+        human_boxes.append({
+            "x": x,
+            "y": y,
+            "w": w,
+            "h": h,
+            "cropped": str(human_part_image_path)
+        })
+
+
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)


    # Save the result
+    with open(boxes_file, "w") as f:
+        json.dump(boxes_structures, f)
    cv2.imwrite(detected, image)
-