feat: improved human detection

This commit is contained in:
2025-04-23 12:42:19 +02:00
parent 2951becc00
commit f13878d8bc
3 changed files with 69 additions and 6 deletions

View File

@@ -4,4 +4,4 @@ from .detect_humans import detect_humans
def cli():
print(f"Running secure_pixelation")
detect_humans("assets/humans.png")
detect_humans("assets/human_detection/humans.png")

View File

@@ -1,10 +1,11 @@
from pathlib import Path
import urllib.request
from typing import Dict, List
import json
import cv2
import imutils
import numpy as np
import mediapipe as mp
@@ -38,13 +39,55 @@ def require_net(name: str):
)
def detect_humans_mediapipe(to_detect: str):
_p = Path(to_detect)
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
print(f"detecting humans: {to_detect} => {detected}")
# print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
# Load your image
image = cv2.imread(to_detect)
# Convert the BGR image to RGB (MediaPipe uses RGB format)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Perform pose detection
results = pose.process(image_rgb)
# Draw landmarks on the image (optional)
if results.pose_landmarks:
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
# You can extract specific keypoints (head, feet, etc.)
# Example: 0 = Nose, 15 = Left Foot, 16 = Right Foot
head = results.pose_landmarks.landmark[0] # Nose landmark
left_foot = results.pose_landmarks.landmark[15] # Left Foot
right_foot = results.pose_landmarks.landmark[16] # Right Foot
# Convert to pixel coordinates
h, w, _ = image.shape
head_coords = int(head.x * w), int(head.y * h)
left_foot_coords = int(left_foot.x * w), int(left_foot.y * h)
right_foot_coords = int(right_foot.x * w), int(right_foot.y * h)
# Draw head and feet positions on the image
cv2.circle(image, head_coords, 5, (255, 0, 0), -1) # Head in blue
cv2.circle(image, left_foot_coords, 5, (0, 0, 255), -1) # Left foot in red
cv2.circle(image, right_foot_coords, 5, (0, 0, 255), -1) # Right foot in red
# Save the result
cv2.imwrite(detected, image)
def detect_humans(to_detect: str):
_p = Path(to_detect)
detected = str(_p.with_name(_p.stem + ".detected" + _p.suffix))
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
print(f"detecting humans: {to_detect} => {detected}")
require_net("yolov3")
@@ -58,6 +101,7 @@ def detect_humans(to_detect: str):
# Load image
image = cv2.imread(to_detect)
original_image = cv2.imread(to_detect)
height, width, channels = image.shape
# Create blob and do forward pass
@@ -88,14 +132,34 @@ def detect_humans(to_detect: str):
# Apply Non-Maximum Suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)
boxes_structures = {}
human_boxes = boxes_structures["humans"] = []
human_part_folder = _p.with_name(_p.stem + "_parts")
human_part_folder.mkdir(exist_ok=True)
for i in indices:
i = i[0] if isinstance(i, (list, np.ndarray)) else i # Flatten index if needed
x, y, w, h = boxes[i]
human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
human_crop = original_image[y:y+h, x:x+w]
cv2.imwrite(str(human_part_image_path), human_crop)
print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
human_boxes.append({
"x": x,
"y": y,
"w": w,
"h": h,
"cropped": str(human_part_image_path)
})
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)
# Save the result
with open(boxes_file, "w") as f:
json.dump(boxes_structures, f)
cv2.imwrite(detected, image)