feat: cleaner detection

2025-04-23 16:56:09 +02:00
parent bcfc90acdf
commit ad8f3b8e66
1 changed files with 363 additions and 90 deletions
--- a/secure_pixelation/detect_humans.py
+++ b/secure_pixelation/detect_humans.py
@@ -1,11 +1,15 @@
 from __future__ import annotations
 from pathlib import Path
 import urllib.request
 from typing import Dict, List
 import json
 from dataclasses import dataclass
 from ultralytics import YOLO
 import cv2
 import numpy as np
 from scipy.optimize import minimize
 from scipy.spatial.transform import Rotation as R
 MODEL_PATH = Path("assets", "models")
@@ -38,44 +42,309 @@ def require_net(name: str):
        )
-def detect_human_parts(human: dict):
+# Thresholds for face keypoint distances (these might need adjustment)
-    parts = human["parts"]
+EYE_RATIO_THRESHOLD = 0.25
 NOSE_EYE_RATIO_THRESHOLD = 0.2
 EAR_NOSE_RATIO_THRESHOLD = 1.2
@dataclass
 class Keypoint:
    x: float
    y: float
    name: str
    confidence: float = 0
    @property
    def point(self):
        return (int(self.x), int(self.y))
    def get_distance(self, other: Keypoint) -> float:
        return np.sqrt((self.x - other.x) ** 2 + (self.y - other.y) ** 2)
 def detect_human_parts(human: dict, face_padding: int = 20):
    parts = human["parts"]
    to_detect = human["crop"]["file"]
    _p = Path(to_detect)
    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
    print(f"detecting human parts: {to_detect} => {detected}")
    def apply_rotation(rot_matrix, points):
        # Apply the rotation to the points, assuming points are 2D coordinates (flattened)
        return np.dot(rot_matrix, points.T).T
    def linearize_pairwise_distances(points, target_distances):
        # Calculate pairwise distances between the points
        num_points = len(points)
        pairwise_distances = np.zeros((num_points, num_points))
        for i in range(num_points):
            for j in range(i, num_points):
                pairwise_distances[i, j] = np.linalg.norm(points[i] - points[j])
                pairwise_distances[j, i] = pairwise_distances[i, j]  # symmetric matrix
        total_distance = np.sum(pairwise_distances)
        normed_distances = pairwise_distances / total_distance
        return np.abs(normed_distances - target_distances) / target_distances
    def objective(params, original_points, target_distances):
        # Convert params to an axis-angle representation (rotation vector)
        rot = R.from_rotvec(params)
        rotation_matrix = rot.as_matrix()[:2, :2]  # 2D rotation matrix (2x2)
        # Apply the rotation to the original points
        rotated_points = apply_rotation(rotation_matrix, original_points)
        # Compute the pairwise distances for the rotated points
        divergence = linearize_pairwise_distances(rotated_points, target_distances)
        return np.nansum(divergence)
    def optimize_rotation(original_points, relative_face_matrix):
        # Compute the pairwise distances of the original points
        original_distances = linearize_pairwise_distances(original_points, relative_face_matrix)
        # Initial guess: rotation vector (zero rotation)
        initial_params = np.zeros(3)  # Initial guess for the rotation vector (no rotation)
        # Perform the optimization to minimize the divergence
        result = minimize(objective, initial_params, args=(original_points, relative_face_matrix), method='BFGS')
        return result.x  # Rotation vector (axis-angle)
    def apply_optimized_rotation(rotation_vector, original_points):
        # Convert the rotation vector to a rotation matrix (2D)
        rot = R.from_rotvec(rotation_vector)
        rotation_matrix = rot.as_matrix()[:2, :2]  # 2D rotation matrix (2x2)
        # Apply the rotation to the points
        return apply_rotation(rotation_matrix, original_points)
    relative_face_matrix = np.array([
        [0.         , 0.02243309, 0.02243309, 0.05016191, 0.05016191],
        [0.02243309, 0.        , 0.04012953, 0.04486618, 0.07234453],
        [0.02243309, 0.04012953, 0.        , 0.07234453, 0.04486618],
        [0.05016191, 0.04486618, 0.07234453, 0.        , 0.08025906],
        [0.05016191, 0.07234453, 0.04486618, 0.08025906, 0.        ]
    ])
    # 
    model = YOLO('yolov8n-pose.pt')  # You can also try 'yolov8s-pose.pt' for better accuracy
    results = model(to_detect)[0]
    image = cv2.imread(to_detect)
    did_detect = False
    for person in results.keypoints.data:
        keypoints = person.cpu().numpy()
-        # Common keypoints: 0=nose, 5=left_shoulder, 11=left_hip, 15=left_foot
+        print("#" * 50)
        head = tuple(map(int, keypoints[0][:2]))
        foot = tuple(map(int, keypoints[15][:2]))
-        cv2.circle(image, head, 5, (255, 0, 0), -1)   # Head in blue
+        original_points = np.array([[k[0], k[1]] for k in keypoints[:5]])
-        cv2.circle(image, foot, 5, (0, 0, 255), -1)   # Foot in red
+        is_not_zero = False
-        did_detect = True
+        for x, y in original_points:
            if x != 0 or y != 0:
                is_not_zero = True
                break
        if not is_not_zero:
            continue
        rotation_vector = optimize_rotation(original_points, relative_face_matrix)
        optimized_points = apply_optimized_rotation(rotation_vector, original_points)
        optimized_distances = linearize_pairwise_distances(optimized_points, relative_face_matrix)
        # indices of the points that seem to be likely correct
        success_points = []
        for i in range(5):
            s_count = 0
            for j in range(5):
                d = np.abs(optimized_distances[i][j])
                if d < 1:
                    s_count += 1
            if s_count > 2:
                success_points.append(i)
        for point in original_points:
            cv2.circle(image, (int(point[0]), int(point[1])), 4, (0, 0, 255), -1)
        if len(success_points) < 1:
            continue
        valid_face = len(success_points) >= 3
        clean_points = []
        # Reconstruct disregarded points using weighted average of relative positions
        for i in range(5):
            if i not in success_points:
                weighted_sum = np.zeros(2)
                total_weight = 0.0
                for j in success_points:
                    if not np.isnan(relative_face_matrix[i][j]):
                        direction = original_points[j] - original_points[i]
                        norm = np.linalg.norm(direction)
                        if norm > 0:
                            direction = direction / norm
                        estimated_distance = relative_face_matrix[i][j]
                        estimate = original_points[j] - direction * estimated_distance
                        weighted_sum += estimate
                        total_weight += 1
                if total_weight > 0:
                    clean_points.append(weighted_sum / total_weight)
            else:
                clean_points.append(original_points[i])
        clean_points = np.array(clean_points)
        # Calculate bounding box from clean_points
        realistic_aspect_ratio = 2/3 # width / height
        x_coords = clean_points[:, 0]
        y_coords = clean_points[:, 1]
        min_x = np.min(x_coords)
        max_x = np.max(x_coords)
        min_y = np.min(y_coords)
        max_y = np.max(y_coords)
        # Face-like padding: more space top & bottom than sides
        width = max_x - min_x
        height = max_y - min_y
        normalized_bounding_size = max(width, height * realistic_aspect_ratio)
        real_width = normalized_bounding_size
        real_height = normalized_bounding_size / realistic_aspect_ratio
        padding_x = width * 0.7 + (real_width - width) / 2
        padding_y_top = height * 2 + (real_height - height) / 2
        padding_y_bottom = height * 1.7  + (real_height - height) / 2
        face_box_x1 = int(min_x - padding_x)
        face_box_y1 = int(min_y - padding_y_top)
        face_box_x2 = int(max_x + padding_x)
        face_box_y2 = int(max_y + padding_y_bottom)
        face_bounding_box = (face_box_x1, face_box_y1, face_box_x2, face_box_y2)
        color = (255, 255, 0)
        if valid_face:
            color = (0, 255, 0)
        cv2.rectangle(image, (face_box_x1, face_box_y1), (face_box_x2, face_box_y2), color, 2)
        for point in clean_points:
            cv2.circle(image, (int(point[0]), int(point[1])), 4, color, -1)
        print("\nOriginal points:")
        print(original_points)
        print("\nOriginal pairwise distances:")
        print(linearize_pairwise_distances(original_points, relative_face_matrix))
        print(f"Optimized rotation vector (axis-angle): {rotation_vector}")
        print("\nOptimized points after rotation:")
        print(optimized_points)
        print("\nOptimized pairwise distances:")
        print(optimized_distances)
        print(success_points)
        print(clean_points)
        """
        for idx in face_indices:
            x, y, conf = keypoints[idx]
            name = keypoint_names[idx]
            if conf > 0.3:
                face_points.append((x, y))
                point = (int(x), int(y))
                name = keypoint_names[idx]
                cv2.circle(image, point, 4, (0, 255, 0), -1)
                cv2.putText(image, name, (point[0] + 5, point[1] + 5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
        """
        """
        nose, left_eye, right_eye, left_ear, right_ear = face_points
        print(face_points)
        # Calculate pairwise distances
        nose_to_left_eye = euclidean_distance(nose, left_eye)
        nose_to_right_eye = euclidean_distance(nose, right_eye)
        eyes_distance = euclidean_distance(left_eye, right_eye)
        left_ear_to_nose = euclidean_distance(left_ear, nose)
        right_ear_to_nose = euclidean_distance(right_ear, nose)
        # Relative distances
        eye_to_eye_ratio = eyes_distance / (left_ear_to_nose + right_ear_to_nose)  # Eyes vs. nose-to-ears
        nose_to_eye_ratio = (nose_to_left_eye + nose_to_right_eye) / (left_ear_to_nose + right_ear_to_nose)  # Nose-to-eye vs. ear-to-nose
        ear_to_nose_ratio = (left_ear_to_nose + right_ear_to_nose) / 2  # Ear-to-nose proportionality
        # Validate using relative distances
        if not (EYE_RATIO_THRESHOLD < eye_to_eye_ratio < 0.5):  # Arbitrary ratio threshold
            print("⚠️ Rejected due to unrealistic eye-to-eye ratio:", eye_to_eye_ratio)
            has_valid_face = False
        if not (NOSE_EYE_RATIO_THRESHOLD < nose_to_eye_ratio < 0.4):  # Arbitrary ratio threshold
            print("⚠️ Rejected due to unrealistic nose-to-eye ratio:", nose_to_eye_ratio)
            has_valid_face = False
        if not (0.5 < ear_to_nose_ratio < EAR_NOSE_RATIO_THRESHOLD):
            print("⚠️ Rejected due to unrealistic ear-to-nose ratio:", ear_to_nose_ratio)
            has_valid_face = False
        # If all checks pass, calculate the bounding box
        xs, ys, _ = zip(*face_points)
        x_min, x_max = int(min(xs)), int(max(xs))
        y_min, y_max = int(min(ys)), int(max(ys))
        x_min = max(x_min - face_padding, 0)
        y_min = max(y_min - face_padding, 0)
        x_max = min(x_max + face_padding, image.shape[1])
        y_max = min(y_max + face_padding, image.shape[0])
        # Compute box size
        box_w = x_max - x_min
        box_h = y_max - y_min
        if has_valid_face:
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        else:
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
        for i, (x, y, conf) in enumerate(keypoints):
            point = (int(x), int(y))
            name = keypoint_names[i]
            # cv2.circle(image, point, 4, (0, 255, 0), -1)
            # cv2.putText(image, name, (point[0] + 5, point[1] - 5),
            # cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
        # cv2.circle(image, head, 5, (255, 0, 0), -1)   # Head in blue
        # cv2.circle(image, foot, 5, (0, 0, 255), -1)   # Foot in red
        """ 
    if did_detect:
    cv2.imwrite(detected, image)
-def detect_humans(to_detect: str, crop_padding: int = 20):
+def detect_humans(to_detect: str, crop_padding: int = 20, skip_detection_if_present: bool = True):
    _p = Path(to_detect)
    detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
    boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
    print(f"detecting humans: {to_detect} => {detected}")
    boxes_structures = {}
    human_boxes = boxes_structures["humans"] = []
    if not (Path(boxes_file).exists() and skip_detection_if_present):
        require_net("yolov3")
        # Load YOLO
@@ -118,9 +387,6 @@ def detect_humans(to_detect: str, crop_padding: int = 20):
        # Apply Non-Maximum Suppression
        indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)
    boxes_structures = {}
    human_boxes = boxes_structures["humans"] = []
        human_part_folder = _p.with_name(_p.stem + "_parts")
        human_part_folder.mkdir(exist_ok=True)
@@ -166,5 +432,12 @@ def detect_humans(to_detect: str, crop_padding: int = 20):
            json.dump(boxes_structures, f)
        cv2.imwrite(detected, image)
    else:
        with open(boxes_file, "r") as f:
            boxes_structures = json.load(f)
            human_boxes = boxes_structures["humans"]
    for human in human_boxes:
-        detect_human_parts(human["crop"]["file"])
+        detect_human_parts(human)