generated from Hazel/python-project
	feat: cleaner detection
This commit is contained in:
		| @@ -1,11 +1,15 @@ | |||||||
|  | from __future__ import annotations | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| import urllib.request | import urllib.request | ||||||
| from typing import Dict, List | from typing import Dict, List | ||||||
| import json | import json | ||||||
|  | from dataclasses import dataclass | ||||||
|  |  | ||||||
| from ultralytics import YOLO | from ultralytics import YOLO | ||||||
| import cv2 | import cv2 | ||||||
| import numpy as np | import numpy as np | ||||||
|  | from scipy.optimize import minimize | ||||||
|  | from scipy.spatial.transform import Rotation as R | ||||||
|  |  | ||||||
|  |  | ||||||
| MODEL_PATH = Path("assets", "models") | MODEL_PATH = Path("assets", "models") | ||||||
| @@ -38,133 +42,402 @@ def require_net(name: str): | |||||||
|         ) |         ) | ||||||
|  |  | ||||||
|  |  | ||||||
| def detect_human_parts(human: dict): | # Thresholds for face keypoint distances (these might need adjustment) | ||||||
|     parts = human["parts"] | EYE_RATIO_THRESHOLD = 0.25 | ||||||
|  | NOSE_EYE_RATIO_THRESHOLD = 0.2 | ||||||
|  | EAR_NOSE_RATIO_THRESHOLD = 1.2 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @dataclass | ||||||
|  | class Keypoint: | ||||||
|  |     x: float | ||||||
|  |     y: float | ||||||
|  |     name: str | ||||||
|  |     confidence: float = 0 | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def point(self): | ||||||
|  |         return (int(self.x), int(self.y)) | ||||||
|  |  | ||||||
|  |     def get_distance(self, other: Keypoint) -> float: | ||||||
|  |         return np.sqrt((self.x - other.x) ** 2 + (self.y - other.y) ** 2) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def detect_human_parts(human: dict, face_padding: int = 20): | ||||||
|  |     parts = human["parts"] | ||||||
|  |  | ||||||
|     to_detect = human["crop"]["file"] |     to_detect = human["crop"]["file"] | ||||||
|     _p = Path(to_detect) |     _p = Path(to_detect) | ||||||
|     detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix)) |     detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix)) | ||||||
|     boxes_file = str(_p.with_name(_p.stem + "_boxes.json")) |     boxes_file = str(_p.with_name(_p.stem + "_boxes.json")) | ||||||
|     print(f"detecting human parts: {to_detect} => {detected}") |     print(f"detecting human parts: {to_detect} => {detected}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     def apply_rotation(rot_matrix, points): | ||||||
|  |         # Apply the rotation to the points, assuming points are 2D coordinates (flattened) | ||||||
|  |         return np.dot(rot_matrix, points.T).T | ||||||
|  |  | ||||||
|  |     def linearize_pairwise_distances(points, target_distances): | ||||||
|  |         # Calculate pairwise distances between the points | ||||||
|  |         num_points = len(points) | ||||||
|  |         pairwise_distances = np.zeros((num_points, num_points)) | ||||||
|  |         for i in range(num_points): | ||||||
|  |             for j in range(i, num_points): | ||||||
|  |                 pairwise_distances[i, j] = np.linalg.norm(points[i] - points[j]) | ||||||
|  |                 pairwise_distances[j, i] = pairwise_distances[i, j]  # symmetric matrix | ||||||
|  |  | ||||||
|  |         total_distance = np.sum(pairwise_distances) | ||||||
|  |         normed_distances = pairwise_distances / total_distance | ||||||
|  |          | ||||||
|  |         return np.abs(normed_distances - target_distances) / target_distances | ||||||
|  |  | ||||||
|  |     def objective(params, original_points, target_distances): | ||||||
|  |         # Convert params to an axis-angle representation (rotation vector) | ||||||
|  |         rot = R.from_rotvec(params) | ||||||
|  |         rotation_matrix = rot.as_matrix()[:2, :2]  # 2D rotation matrix (2x2) | ||||||
|  |  | ||||||
|  |         # Apply the rotation to the original points | ||||||
|  |         rotated_points = apply_rotation(rotation_matrix, original_points) | ||||||
|  |  | ||||||
|  |         # Compute the pairwise distances for the rotated points | ||||||
|  |         divergence = linearize_pairwise_distances(rotated_points, target_distances) | ||||||
|  |         return np.nansum(divergence) | ||||||
|  |  | ||||||
|  |     def optimize_rotation(original_points, relative_face_matrix): | ||||||
|  |         # Compute the pairwise distances of the original points | ||||||
|  |         original_distances = linearize_pairwise_distances(original_points, relative_face_matrix) | ||||||
|  |          | ||||||
|  |         # Initial guess: rotation vector (zero rotation) | ||||||
|  |         initial_params = np.zeros(3)  # Initial guess for the rotation vector (no rotation) | ||||||
|  |  | ||||||
|  |         # Perform the optimization to minimize the divergence | ||||||
|  |         result = minimize(objective, initial_params, args=(original_points, relative_face_matrix), method='BFGS') | ||||||
|  |  | ||||||
|  |         return result.x  # Rotation vector (axis-angle) | ||||||
|  |  | ||||||
|  |     def apply_optimized_rotation(rotation_vector, original_points): | ||||||
|  |         # Convert the rotation vector to a rotation matrix (2D) | ||||||
|  |         rot = R.from_rotvec(rotation_vector) | ||||||
|  |         rotation_matrix = rot.as_matrix()[:2, :2]  # 2D rotation matrix (2x2) | ||||||
|  |          | ||||||
|  |         # Apply the rotation to the points | ||||||
|  |         return apply_rotation(rotation_matrix, original_points) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     relative_face_matrix = np.array([ | ||||||
|  |         [0.         , 0.02243309, 0.02243309, 0.05016191, 0.05016191], | ||||||
|  |         [0.02243309, 0.        , 0.04012953, 0.04486618, 0.07234453], | ||||||
|  |         [0.02243309, 0.04012953, 0.        , 0.07234453, 0.04486618], | ||||||
|  |         [0.05016191, 0.04486618, 0.07234453, 0.        , 0.08025906], | ||||||
|  |         [0.05016191, 0.07234453, 0.04486618, 0.08025906, 0.        ] | ||||||
|  |     ]) | ||||||
|  |     #  | ||||||
|     model = YOLO('yolov8n-pose.pt')  # You can also try 'yolov8s-pose.pt' for better accuracy |     model = YOLO('yolov8n-pose.pt')  # You can also try 'yolov8s-pose.pt' for better accuracy | ||||||
|  |  | ||||||
|     results = model(to_detect)[0] |     results = model(to_detect)[0] | ||||||
|  |  | ||||||
|     image = cv2.imread(to_detect) |     image = cv2.imread(to_detect) | ||||||
|  |  | ||||||
|     did_detect = False |  | ||||||
|     for person in results.keypoints.data: |     for person in results.keypoints.data: | ||||||
|         keypoints = person.cpu().numpy() |         keypoints = person.cpu().numpy() | ||||||
|  |  | ||||||
|         # Common keypoints: 0=nose, 5=left_shoulder, 11=left_hip, 15=left_foot |         print("#" * 50) | ||||||
|         head = tuple(map(int, keypoints[0][:2])) |  | ||||||
|         foot = tuple(map(int, keypoints[15][:2])) |  | ||||||
|  |  | ||||||
|         cv2.circle(image, head, 5, (255, 0, 0), -1)   # Head in blue |         original_points = np.array([[k[0], k[1]] for k in keypoints[:5]]) | ||||||
|         cv2.circle(image, foot, 5, (0, 0, 255), -1)   # Foot in red |         is_not_zero = False | ||||||
|         did_detect = True |         for x, y in original_points: | ||||||
|  |             if x != 0 or y != 0: | ||||||
|  |                 is_not_zero = True | ||||||
|  |                 break | ||||||
|  |  | ||||||
|     if did_detect: |         if not is_not_zero: | ||||||
|         cv2.imwrite(detected, image) |             continue | ||||||
|  |  | ||||||
|  |         rotation_vector = optimize_rotation(original_points, relative_face_matrix) | ||||||
|  |         optimized_points = apply_optimized_rotation(rotation_vector, original_points) | ||||||
|  |         optimized_distances = linearize_pairwise_distances(optimized_points, relative_face_matrix) | ||||||
|  |  | ||||||
|  |         # indices of the points that seem to be likely correct | ||||||
|  |         success_points = [] | ||||||
|  |         for i in range(5): | ||||||
|  |             s_count = 0 | ||||||
|  |             for j in range(5): | ||||||
|  |                 d = np.abs(optimized_distances[i][j]) | ||||||
|  |                 if d < 1: | ||||||
|  |                     s_count += 1 | ||||||
|  |  | ||||||
|  |             if s_count > 2: | ||||||
|  |                 success_points.append(i) | ||||||
|  |  | ||||||
|  |         for point in original_points: | ||||||
|  |             cv2.circle(image, (int(point[0]), int(point[1])), 4, (0, 0, 255), -1) | ||||||
|  |  | ||||||
|  |         if len(success_points) < 1: | ||||||
|  |             continue | ||||||
|  |         valid_face = len(success_points) >= 3 | ||||||
|  |  | ||||||
|  |         clean_points = [] | ||||||
|  |  | ||||||
|  |         # Reconstruct disregarded points using weighted average of relative positions | ||||||
|  |         for i in range(5): | ||||||
|  |             if i not in success_points: | ||||||
|  |                 weighted_sum = np.zeros(2) | ||||||
|  |                 total_weight = 0.0 | ||||||
|  |                 for j in success_points: | ||||||
|  |                     if not np.isnan(relative_face_matrix[i][j]): | ||||||
|  |                         direction = original_points[j] - original_points[i] | ||||||
|  |                         norm = np.linalg.norm(direction) | ||||||
|  |                         if norm > 0: | ||||||
|  |                             direction = direction / norm | ||||||
|  |                         estimated_distance = relative_face_matrix[i][j] | ||||||
|  |                         estimate = original_points[j] - direction * estimated_distance | ||||||
|  |                         weighted_sum += estimate | ||||||
|  |                         total_weight += 1 | ||||||
|  |                 if total_weight > 0: | ||||||
|  |                     clean_points.append(weighted_sum / total_weight) | ||||||
|  |             else: | ||||||
|  |                 clean_points.append(original_points[i]) | ||||||
|  |  | ||||||
|  |         clean_points = np.array(clean_points) | ||||||
|  |  | ||||||
|  |         # Calculate bounding box from clean_points | ||||||
|  |         realistic_aspect_ratio = 2/3 # width / height | ||||||
|  |  | ||||||
|  |         x_coords = clean_points[:, 0] | ||||||
|  |         y_coords = clean_points[:, 1] | ||||||
|  |  | ||||||
|  |         min_x = np.min(x_coords) | ||||||
|  |         max_x = np.max(x_coords) | ||||||
|  |         min_y = np.min(y_coords) | ||||||
|  |         max_y = np.max(y_coords) | ||||||
|  |  | ||||||
|  |         # Face-like padding: more space top & bottom than sides | ||||||
|  |         width = max_x - min_x | ||||||
|  |         height = max_y - min_y | ||||||
|  |  | ||||||
|  |  | ||||||
| def detect_humans(to_detect: str, crop_padding: int = 20): |         normalized_bounding_size = max(width, height * realistic_aspect_ratio) | ||||||
|  |         real_width = normalized_bounding_size | ||||||
|  |         real_height = normalized_bounding_size / realistic_aspect_ratio | ||||||
|  |  | ||||||
|  |         padding_x = width * 0.7 + (real_width - width) / 2 | ||||||
|  |         padding_y_top = height * 2 + (real_height - height) / 2 | ||||||
|  |         padding_y_bottom = height * 1.7  + (real_height - height) / 2 | ||||||
|  |  | ||||||
|  |         face_box_x1 = int(min_x - padding_x) | ||||||
|  |         face_box_y1 = int(min_y - padding_y_top) | ||||||
|  |         face_box_x2 = int(max_x + padding_x) | ||||||
|  |         face_box_y2 = int(max_y + padding_y_bottom) | ||||||
|  |  | ||||||
|  |         face_bounding_box = (face_box_x1, face_box_y1, face_box_x2, face_box_y2) | ||||||
|  |  | ||||||
|  |         color = (255, 255, 0) | ||||||
|  |         if valid_face: | ||||||
|  |             color = (0, 255, 0) | ||||||
|  |  | ||||||
|  |         cv2.rectangle(image, (face_box_x1, face_box_y1), (face_box_x2, face_box_y2), color, 2) | ||||||
|  |         for point in clean_points: | ||||||
|  |             cv2.circle(image, (int(point[0]), int(point[1])), 4, color, -1) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         print("\nOriginal points:") | ||||||
|  |         print(original_points) | ||||||
|  |         print("\nOriginal pairwise distances:") | ||||||
|  |         print(linearize_pairwise_distances(original_points, relative_face_matrix)) | ||||||
|  |         print(f"Optimized rotation vector (axis-angle): {rotation_vector}") | ||||||
|  |         print("\nOptimized points after rotation:") | ||||||
|  |         print(optimized_points) | ||||||
|  |         print("\nOptimized pairwise distances:") | ||||||
|  |         print(optimized_distances) | ||||||
|  |         print(success_points) | ||||||
|  |         print(clean_points) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |              | ||||||
|  |  | ||||||
|  |         """ | ||||||
|  |         for idx in face_indices: | ||||||
|  |             x, y, conf = keypoints[idx] | ||||||
|  |             name = keypoint_names[idx] | ||||||
|  |             if conf > 0.3: | ||||||
|  |                 face_points.append((x, y)) | ||||||
|  |  | ||||||
|  |                 point = (int(x), int(y)) | ||||||
|  |                 name = keypoint_names[idx] | ||||||
|  |                 cv2.circle(image, point, 4, (0, 255, 0), -1) | ||||||
|  |                 cv2.putText(image, name, (point[0] + 5, point[1] + 5), | ||||||
|  |                 cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1) | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         """ | ||||||
|  |         nose, left_eye, right_eye, left_ear, right_ear = face_points | ||||||
|  |         print(face_points) | ||||||
|  |  | ||||||
|  |         # Calculate pairwise distances | ||||||
|  |         nose_to_left_eye = euclidean_distance(nose, left_eye) | ||||||
|  |         nose_to_right_eye = euclidean_distance(nose, right_eye) | ||||||
|  |         eyes_distance = euclidean_distance(left_eye, right_eye) | ||||||
|  |         left_ear_to_nose = euclidean_distance(left_ear, nose) | ||||||
|  |         right_ear_to_nose = euclidean_distance(right_ear, nose) | ||||||
|  |  | ||||||
|  |         # Relative distances | ||||||
|  |         eye_to_eye_ratio = eyes_distance / (left_ear_to_nose + right_ear_to_nose)  # Eyes vs. nose-to-ears | ||||||
|  |         nose_to_eye_ratio = (nose_to_left_eye + nose_to_right_eye) / (left_ear_to_nose + right_ear_to_nose)  # Nose-to-eye vs. ear-to-nose | ||||||
|  |         ear_to_nose_ratio = (left_ear_to_nose + right_ear_to_nose) / 2  # Ear-to-nose proportionality | ||||||
|  |  | ||||||
|  |         # Validate using relative distances | ||||||
|  |         if not (EYE_RATIO_THRESHOLD < eye_to_eye_ratio < 0.5):  # Arbitrary ratio threshold | ||||||
|  |             print("⚠️ Rejected due to unrealistic eye-to-eye ratio:", eye_to_eye_ratio) | ||||||
|  |             has_valid_face = False | ||||||
|  |  | ||||||
|  |         if not (NOSE_EYE_RATIO_THRESHOLD < nose_to_eye_ratio < 0.4):  # Arbitrary ratio threshold | ||||||
|  |             print("⚠️ Rejected due to unrealistic nose-to-eye ratio:", nose_to_eye_ratio) | ||||||
|  |             has_valid_face = False | ||||||
|  |  | ||||||
|  |         if not (0.5 < ear_to_nose_ratio < EAR_NOSE_RATIO_THRESHOLD): | ||||||
|  |             print("⚠️ Rejected due to unrealistic ear-to-nose ratio:", ear_to_nose_ratio) | ||||||
|  |             has_valid_face = False | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         # If all checks pass, calculate the bounding box | ||||||
|  |         xs, ys, _ = zip(*face_points) | ||||||
|  |         x_min, x_max = int(min(xs)), int(max(xs)) | ||||||
|  |         y_min, y_max = int(min(ys)), int(max(ys)) | ||||||
|  |  | ||||||
|  |         x_min = max(x_min - face_padding, 0) | ||||||
|  |         y_min = max(y_min - face_padding, 0) | ||||||
|  |         x_max = min(x_max + face_padding, image.shape[1]) | ||||||
|  |         y_max = min(y_max + face_padding, image.shape[0]) | ||||||
|  |  | ||||||
|  |         # Compute box size | ||||||
|  |         box_w = x_max - x_min | ||||||
|  |         box_h = y_max - y_min | ||||||
|  |  | ||||||
|  |         if has_valid_face: | ||||||
|  |             cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) | ||||||
|  |         else: | ||||||
|  |             cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2) | ||||||
|  |              | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         for i, (x, y, conf) in enumerate(keypoints): | ||||||
|  |             point = (int(x), int(y)) | ||||||
|  |             name = keypoint_names[i] | ||||||
|  |             # cv2.circle(image, point, 4, (0, 255, 0), -1) | ||||||
|  |             # cv2.putText(image, name, (point[0] + 5, point[1] - 5), | ||||||
|  |             # cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1) | ||||||
|  |  | ||||||
|  |         # cv2.circle(image, head, 5, (255, 0, 0), -1)   # Head in blue | ||||||
|  |         # cv2.circle(image, foot, 5, (0, 0, 255), -1)   # Foot in red | ||||||
|  |         """  | ||||||
|  |  | ||||||
|  |     cv2.imwrite(detected, image) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def detect_humans(to_detect: str, crop_padding: int = 20, skip_detection_if_present: bool = True): | ||||||
|     _p = Path(to_detect) |     _p = Path(to_detect) | ||||||
|     detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix)) |     detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix)) | ||||||
|     boxes_file = str(_p.with_name(_p.stem + "_boxes.json")) |     boxes_file = str(_p.with_name(_p.stem + "_boxes.json")) | ||||||
|     print(f"detecting humans: {to_detect} => {detected}") |     print(f"detecting humans: {to_detect} => {detected}") | ||||||
|  |  | ||||||
|     require_net("yolov3") |  | ||||||
|  |  | ||||||
|     # Load YOLO |  | ||||||
|     net = cv2.dnn.readNet(str(MODEL_PATH / 'yolov3.weights'), str(MODEL_PATH / 'yolov3.cfg')) |  | ||||||
|     layer_names = net.getLayerNames() |  | ||||||
|     indices = net.getUnconnectedOutLayers() |  | ||||||
|     output_layers = [layer_names[int(i) - 1] for i in indices] |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     # Load image |  | ||||||
|     image = cv2.imread(to_detect) |  | ||||||
|     original_image = cv2.imread(to_detect) |  | ||||||
|     height, width, channels = image.shape |  | ||||||
|  |  | ||||||
|     # Create blob and do forward pass |  | ||||||
|     blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False) |  | ||||||
|     net.setInput(blob) |  | ||||||
|     outs = net.forward(output_layers) |  | ||||||
|  |  | ||||||
|     boxes = [] |  | ||||||
|     confidences = [] |  | ||||||
|  |  | ||||||
|     # Information for each object detected |  | ||||||
|     for out in outs: |  | ||||||
|         for detection in out: |  | ||||||
|             scores = detection[5:] |  | ||||||
|             class_id = np.argmax(scores) |  | ||||||
|             confidence = scores[class_id] |  | ||||||
|             if confidence > 0.5 and class_id == 0:  # Class ID 0 is human |  | ||||||
|                 center_x = int(detection[0] * width) |  | ||||||
|                 center_y = int(detection[1] * height) |  | ||||||
|                 w = int(detection[2] * width) |  | ||||||
|                 h = int(detection[3] * height) |  | ||||||
|                 x = int(center_x - w / 2) |  | ||||||
|                 y = int(center_y - h / 2) |  | ||||||
|  |  | ||||||
|                 boxes.append([x, y, w, h]) |  | ||||||
|                 confidences.append(float(confidence)) |  | ||||||
|  |  | ||||||
|     # Apply Non-Maximum Suppression |  | ||||||
|     indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4) |  | ||||||
|  |  | ||||||
|     boxes_structures = {} |     boxes_structures = {} | ||||||
|     human_boxes = boxes_structures["humans"] = [] |     human_boxes = boxes_structures["humans"] = [] | ||||||
|  |  | ||||||
|     human_part_folder = _p.with_name(_p.stem + "_parts") |     if not (Path(boxes_file).exists() and skip_detection_if_present): | ||||||
|     human_part_folder.mkdir(exist_ok=True) |         require_net("yolov3") | ||||||
|  |  | ||||||
|     for i in indices: |         # Load YOLO | ||||||
|         i = i[0] if isinstance(i, (list, np.ndarray)) else i  # Flatten index if needed |         net = cv2.dnn.readNet(str(MODEL_PATH / 'yolov3.weights'), str(MODEL_PATH / 'yolov3.cfg')) | ||||||
|         x, y, w, h = boxes[i] |         layer_names = net.getLayerNames() | ||||||
|  |         indices = net.getUnconnectedOutLayers() | ||||||
|  |         output_layers = [layer_names[int(i) - 1] for i in indices] | ||||||
|  |  | ||||||
|         human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix) |  | ||||||
|  |  | ||||||
|         image_height, image_width = image.shape[:2] |         # Load image | ||||||
|  |         image = cv2.imread(to_detect) | ||||||
|  |         original_image = cv2.imread(to_detect) | ||||||
|  |         height, width, channels = image.shape | ||||||
|  |  | ||||||
|         # Compute safe crop coordinates with padding |         # Create blob and do forward pass | ||||||
|         x1 = max(x - crop_padding, 0) |         blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False) | ||||||
|         y1 = max(y - crop_padding, 0) |         net.setInput(blob) | ||||||
|         x2 = min(x + w + crop_padding, image_width) |         outs = net.forward(output_layers) | ||||||
|         y2 = min(y + h + crop_padding, image_height) |  | ||||||
|         human_crop = original_image[y1:y2, x1:x2] |  | ||||||
|  |  | ||||||
|         cv2.imwrite(str(human_part_image_path), human_crop) |         boxes = [] | ||||||
|  |         confidences = [] | ||||||
|  |  | ||||||
|         print(f"\tfound human at {x}/{y} with the size of {w} x {h}") |         # Information for each object detected | ||||||
|         human_boxes.append({ |         for out in outs: | ||||||
|             "x": x, |             for detection in out: | ||||||
|             "y": y, |                 scores = detection[5:] | ||||||
|             "w": w, |                 class_id = np.argmax(scores) | ||||||
|             "h": h, |                 confidence = scores[class_id] | ||||||
|             "crop": { |                 if confidence > 0.5 and class_id == 0:  # Class ID 0 is human | ||||||
|                 "file": str(human_part_image_path), |                     center_x = int(detection[0] * width) | ||||||
|                 "x": x1, |                     center_y = int(detection[1] * height) | ||||||
|  |                     w = int(detection[2] * width) | ||||||
|  |                     h = int(detection[3] * height) | ||||||
|  |                     x = int(center_x - w / 2) | ||||||
|  |                     y = int(center_y - h / 2) | ||||||
|  |  | ||||||
|  |                     boxes.append([x, y, w, h]) | ||||||
|  |                     confidences.append(float(confidence)) | ||||||
|  |  | ||||||
|  |         # Apply Non-Maximum Suppression | ||||||
|  |         indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4) | ||||||
|  |  | ||||||
|  |         human_part_folder = _p.with_name(_p.stem + "_parts") | ||||||
|  |         human_part_folder.mkdir(exist_ok=True) | ||||||
|  |  | ||||||
|  |         for i in indices: | ||||||
|  |             i = i[0] if isinstance(i, (list, np.ndarray)) else i  # Flatten index if needed | ||||||
|  |             x, y, w, h = boxes[i] | ||||||
|  |  | ||||||
|  |             human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix) | ||||||
|  |  | ||||||
|  |             image_height, image_width = image.shape[:2] | ||||||
|  |  | ||||||
|  |             # Compute safe crop coordinates with padding | ||||||
|  |             x1 = max(x - crop_padding, 0) | ||||||
|  |             y1 = max(y - crop_padding, 0) | ||||||
|  |             x2 = min(x + w + crop_padding, image_width) | ||||||
|  |             y2 = min(y + h + crop_padding, image_height) | ||||||
|  |             human_crop = original_image[y1:y2, x1:x2] | ||||||
|  |  | ||||||
|  |             cv2.imwrite(str(human_part_image_path), human_crop) | ||||||
|  |  | ||||||
|  |             print(f"\tfound human at {x}/{y} with the size of {w} x {h}") | ||||||
|  |             human_boxes.append({ | ||||||
|  |                 "x": x, | ||||||
|                 "y": y, |                 "y": y, | ||||||
|                 "w": x2 - x1, |                 "w": w, | ||||||
|                 "h": y2 - y1, |                 "h": h, | ||||||
|             }, |                 "crop": { | ||||||
|             "parts": {}, |                     "file": str(human_part_image_path), | ||||||
|         }) |                     "x": x1, | ||||||
|  |                     "y": y, | ||||||
|  |                     "w": x2 - x1, | ||||||
|  |                     "h": y2 - y1, | ||||||
|  |                 }, | ||||||
|  |                 "parts": {}, | ||||||
|  |             }) | ||||||
|  |  | ||||||
|  |  | ||||||
|         cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2) |             cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2) | ||||||
|  |  | ||||||
|  |  | ||||||
|     # Save the result |         # Save the result | ||||||
|     with open(boxes_file, "w") as f: |         with open(boxes_file, "w") as f: | ||||||
|         json.dump(boxes_structures, f) |             json.dump(boxes_structures, f) | ||||||
|     cv2.imwrite(detected, image) |         cv2.imwrite(detected, image) | ||||||
|  |  | ||||||
|  |     else: | ||||||
|  |  | ||||||
|  |         with open(boxes_file, "r") as f: | ||||||
|  |             boxes_structures = json.load(f) | ||||||
|  |             human_boxes = boxes_structures["humans"] | ||||||
|  |  | ||||||
|  |  | ||||||
|     for human in human_boxes: |     for human in human_boxes: | ||||||
|         detect_human_parts(human["crop"]["file"]) |         detect_human_parts(human) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user