generated from Hazel/python-project
feat: cleaner detection
This commit is contained in:
parent
bcfc90acdf
commit
ad8f3b8e66
@ -1,11 +1,15 @@
|
|||||||
|
from __future__ import annotations
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
import json
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from scipy.optimize import minimize
|
||||||
|
from scipy.spatial.transform import Rotation as R
|
||||||
|
|
||||||
|
|
||||||
MODEL_PATH = Path("assets", "models")
|
MODEL_PATH = Path("assets", "models")
|
||||||
@ -38,133 +42,402 @@ def require_net(name: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def detect_human_parts(human: dict):
|
# Thresholds for face keypoint distances (these might need adjustment)
|
||||||
parts = human["parts"]
|
EYE_RATIO_THRESHOLD = 0.25
|
||||||
|
NOSE_EYE_RATIO_THRESHOLD = 0.2
|
||||||
|
EAR_NOSE_RATIO_THRESHOLD = 1.2
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Keypoint:
|
||||||
|
x: float
|
||||||
|
y: float
|
||||||
|
name: str
|
||||||
|
confidence: float = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def point(self):
|
||||||
|
return (int(self.x), int(self.y))
|
||||||
|
|
||||||
|
def get_distance(self, other: Keypoint) -> float:
|
||||||
|
return np.sqrt((self.x - other.x) ** 2 + (self.y - other.y) ** 2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def detect_human_parts(human: dict, face_padding: int = 20):
|
||||||
|
parts = human["parts"]
|
||||||
|
|
||||||
to_detect = human["crop"]["file"]
|
to_detect = human["crop"]["file"]
|
||||||
_p = Path(to_detect)
|
_p = Path(to_detect)
|
||||||
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
|
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
|
||||||
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
|
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
|
||||||
print(f"detecting human parts: {to_detect} => {detected}")
|
print(f"detecting human parts: {to_detect} => {detected}")
|
||||||
|
|
||||||
|
|
||||||
|
def apply_rotation(rot_matrix, points):
|
||||||
|
# Apply the rotation to the points, assuming points are 2D coordinates (flattened)
|
||||||
|
return np.dot(rot_matrix, points.T).T
|
||||||
|
|
||||||
|
def linearize_pairwise_distances(points, target_distances):
|
||||||
|
# Calculate pairwise distances between the points
|
||||||
|
num_points = len(points)
|
||||||
|
pairwise_distances = np.zeros((num_points, num_points))
|
||||||
|
for i in range(num_points):
|
||||||
|
for j in range(i, num_points):
|
||||||
|
pairwise_distances[i, j] = np.linalg.norm(points[i] - points[j])
|
||||||
|
pairwise_distances[j, i] = pairwise_distances[i, j] # symmetric matrix
|
||||||
|
|
||||||
|
total_distance = np.sum(pairwise_distances)
|
||||||
|
normed_distances = pairwise_distances / total_distance
|
||||||
|
|
||||||
|
return np.abs(normed_distances - target_distances) / target_distances
|
||||||
|
|
||||||
|
def objective(params, original_points, target_distances):
|
||||||
|
# Convert params to an axis-angle representation (rotation vector)
|
||||||
|
rot = R.from_rotvec(params)
|
||||||
|
rotation_matrix = rot.as_matrix()[:2, :2] # 2D rotation matrix (2x2)
|
||||||
|
|
||||||
|
# Apply the rotation to the original points
|
||||||
|
rotated_points = apply_rotation(rotation_matrix, original_points)
|
||||||
|
|
||||||
|
# Compute the pairwise distances for the rotated points
|
||||||
|
divergence = linearize_pairwise_distances(rotated_points, target_distances)
|
||||||
|
return np.nansum(divergence)
|
||||||
|
|
||||||
|
def optimize_rotation(original_points, relative_face_matrix):
|
||||||
|
# Compute the pairwise distances of the original points
|
||||||
|
original_distances = linearize_pairwise_distances(original_points, relative_face_matrix)
|
||||||
|
|
||||||
|
# Initial guess: rotation vector (zero rotation)
|
||||||
|
initial_params = np.zeros(3) # Initial guess for the rotation vector (no rotation)
|
||||||
|
|
||||||
|
# Perform the optimization to minimize the divergence
|
||||||
|
result = minimize(objective, initial_params, args=(original_points, relative_face_matrix), method='BFGS')
|
||||||
|
|
||||||
|
return result.x # Rotation vector (axis-angle)
|
||||||
|
|
||||||
|
def apply_optimized_rotation(rotation_vector, original_points):
|
||||||
|
# Convert the rotation vector to a rotation matrix (2D)
|
||||||
|
rot = R.from_rotvec(rotation_vector)
|
||||||
|
rotation_matrix = rot.as_matrix()[:2, :2] # 2D rotation matrix (2x2)
|
||||||
|
|
||||||
|
# Apply the rotation to the points
|
||||||
|
return apply_rotation(rotation_matrix, original_points)
|
||||||
|
|
||||||
|
|
||||||
|
relative_face_matrix = np.array([
|
||||||
|
[0. , 0.02243309, 0.02243309, 0.05016191, 0.05016191],
|
||||||
|
[0.02243309, 0. , 0.04012953, 0.04486618, 0.07234453],
|
||||||
|
[0.02243309, 0.04012953, 0. , 0.07234453, 0.04486618],
|
||||||
|
[0.05016191, 0.04486618, 0.07234453, 0. , 0.08025906],
|
||||||
|
[0.05016191, 0.07234453, 0.04486618, 0.08025906, 0. ]
|
||||||
|
])
|
||||||
|
#
|
||||||
model = YOLO('yolov8n-pose.pt') # You can also try 'yolov8s-pose.pt' for better accuracy
|
model = YOLO('yolov8n-pose.pt') # You can also try 'yolov8s-pose.pt' for better accuracy
|
||||||
|
|
||||||
results = model(to_detect)[0]
|
results = model(to_detect)[0]
|
||||||
|
|
||||||
image = cv2.imread(to_detect)
|
image = cv2.imread(to_detect)
|
||||||
|
|
||||||
did_detect = False
|
|
||||||
for person in results.keypoints.data:
|
for person in results.keypoints.data:
|
||||||
keypoints = person.cpu().numpy()
|
keypoints = person.cpu().numpy()
|
||||||
|
|
||||||
# Common keypoints: 0=nose, 5=left_shoulder, 11=left_hip, 15=left_foot
|
print("#" * 50)
|
||||||
head = tuple(map(int, keypoints[0][:2]))
|
|
||||||
foot = tuple(map(int, keypoints[15][:2]))
|
|
||||||
|
|
||||||
cv2.circle(image, head, 5, (255, 0, 0), -1) # Head in blue
|
original_points = np.array([[k[0], k[1]] for k in keypoints[:5]])
|
||||||
cv2.circle(image, foot, 5, (0, 0, 255), -1) # Foot in red
|
is_not_zero = False
|
||||||
did_detect = True
|
for x, y in original_points:
|
||||||
|
if x != 0 or y != 0:
|
||||||
|
is_not_zero = True
|
||||||
|
break
|
||||||
|
|
||||||
if did_detect:
|
if not is_not_zero:
|
||||||
cv2.imwrite(detected, image)
|
continue
|
||||||
|
|
||||||
|
rotation_vector = optimize_rotation(original_points, relative_face_matrix)
|
||||||
|
optimized_points = apply_optimized_rotation(rotation_vector, original_points)
|
||||||
|
optimized_distances = linearize_pairwise_distances(optimized_points, relative_face_matrix)
|
||||||
|
|
||||||
|
# indices of the points that seem to be likely correct
|
||||||
|
success_points = []
|
||||||
|
for i in range(5):
|
||||||
|
s_count = 0
|
||||||
|
for j in range(5):
|
||||||
|
d = np.abs(optimized_distances[i][j])
|
||||||
|
if d < 1:
|
||||||
|
s_count += 1
|
||||||
|
|
||||||
|
if s_count > 2:
|
||||||
|
success_points.append(i)
|
||||||
|
|
||||||
|
for point in original_points:
|
||||||
|
cv2.circle(image, (int(point[0]), int(point[1])), 4, (0, 0, 255), -1)
|
||||||
|
|
||||||
|
if len(success_points) < 1:
|
||||||
|
continue
|
||||||
|
valid_face = len(success_points) >= 3
|
||||||
|
|
||||||
|
clean_points = []
|
||||||
|
|
||||||
|
# Reconstruct disregarded points using weighted average of relative positions
|
||||||
|
for i in range(5):
|
||||||
|
if i not in success_points:
|
||||||
|
weighted_sum = np.zeros(2)
|
||||||
|
total_weight = 0.0
|
||||||
|
for j in success_points:
|
||||||
|
if not np.isnan(relative_face_matrix[i][j]):
|
||||||
|
direction = original_points[j] - original_points[i]
|
||||||
|
norm = np.linalg.norm(direction)
|
||||||
|
if norm > 0:
|
||||||
|
direction = direction / norm
|
||||||
|
estimated_distance = relative_face_matrix[i][j]
|
||||||
|
estimate = original_points[j] - direction * estimated_distance
|
||||||
|
weighted_sum += estimate
|
||||||
|
total_weight += 1
|
||||||
|
if total_weight > 0:
|
||||||
|
clean_points.append(weighted_sum / total_weight)
|
||||||
|
else:
|
||||||
|
clean_points.append(original_points[i])
|
||||||
|
|
||||||
|
clean_points = np.array(clean_points)
|
||||||
|
|
||||||
|
# Calculate bounding box from clean_points
|
||||||
|
realistic_aspect_ratio = 2/3 # width / height
|
||||||
|
|
||||||
|
x_coords = clean_points[:, 0]
|
||||||
|
y_coords = clean_points[:, 1]
|
||||||
|
|
||||||
|
min_x = np.min(x_coords)
|
||||||
|
max_x = np.max(x_coords)
|
||||||
|
min_y = np.min(y_coords)
|
||||||
|
max_y = np.max(y_coords)
|
||||||
|
|
||||||
|
# Face-like padding: more space top & bottom than sides
|
||||||
|
width = max_x - min_x
|
||||||
|
height = max_y - min_y
|
||||||
|
|
||||||
|
|
||||||
def detect_humans(to_detect: str, crop_padding: int = 20):
|
normalized_bounding_size = max(width, height * realistic_aspect_ratio)
|
||||||
|
real_width = normalized_bounding_size
|
||||||
|
real_height = normalized_bounding_size / realistic_aspect_ratio
|
||||||
|
|
||||||
|
padding_x = width * 0.7 + (real_width - width) / 2
|
||||||
|
padding_y_top = height * 2 + (real_height - height) / 2
|
||||||
|
padding_y_bottom = height * 1.7 + (real_height - height) / 2
|
||||||
|
|
||||||
|
face_box_x1 = int(min_x - padding_x)
|
||||||
|
face_box_y1 = int(min_y - padding_y_top)
|
||||||
|
face_box_x2 = int(max_x + padding_x)
|
||||||
|
face_box_y2 = int(max_y + padding_y_bottom)
|
||||||
|
|
||||||
|
face_bounding_box = (face_box_x1, face_box_y1, face_box_x2, face_box_y2)
|
||||||
|
|
||||||
|
color = (255, 255, 0)
|
||||||
|
if valid_face:
|
||||||
|
color = (0, 255, 0)
|
||||||
|
|
||||||
|
cv2.rectangle(image, (face_box_x1, face_box_y1), (face_box_x2, face_box_y2), color, 2)
|
||||||
|
for point in clean_points:
|
||||||
|
cv2.circle(image, (int(point[0]), int(point[1])), 4, color, -1)
|
||||||
|
|
||||||
|
|
||||||
|
print("\nOriginal points:")
|
||||||
|
print(original_points)
|
||||||
|
print("\nOriginal pairwise distances:")
|
||||||
|
print(linearize_pairwise_distances(original_points, relative_face_matrix))
|
||||||
|
print(f"Optimized rotation vector (axis-angle): {rotation_vector}")
|
||||||
|
print("\nOptimized points after rotation:")
|
||||||
|
print(optimized_points)
|
||||||
|
print("\nOptimized pairwise distances:")
|
||||||
|
print(optimized_distances)
|
||||||
|
print(success_points)
|
||||||
|
print(clean_points)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
for idx in face_indices:
|
||||||
|
x, y, conf = keypoints[idx]
|
||||||
|
name = keypoint_names[idx]
|
||||||
|
if conf > 0.3:
|
||||||
|
face_points.append((x, y))
|
||||||
|
|
||||||
|
point = (int(x), int(y))
|
||||||
|
name = keypoint_names[idx]
|
||||||
|
cv2.circle(image, point, 4, (0, 255, 0), -1)
|
||||||
|
cv2.putText(image, name, (point[0] + 5, point[1] + 5),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
nose, left_eye, right_eye, left_ear, right_ear = face_points
|
||||||
|
print(face_points)
|
||||||
|
|
||||||
|
# Calculate pairwise distances
|
||||||
|
nose_to_left_eye = euclidean_distance(nose, left_eye)
|
||||||
|
nose_to_right_eye = euclidean_distance(nose, right_eye)
|
||||||
|
eyes_distance = euclidean_distance(left_eye, right_eye)
|
||||||
|
left_ear_to_nose = euclidean_distance(left_ear, nose)
|
||||||
|
right_ear_to_nose = euclidean_distance(right_ear, nose)
|
||||||
|
|
||||||
|
# Relative distances
|
||||||
|
eye_to_eye_ratio = eyes_distance / (left_ear_to_nose + right_ear_to_nose) # Eyes vs. nose-to-ears
|
||||||
|
nose_to_eye_ratio = (nose_to_left_eye + nose_to_right_eye) / (left_ear_to_nose + right_ear_to_nose) # Nose-to-eye vs. ear-to-nose
|
||||||
|
ear_to_nose_ratio = (left_ear_to_nose + right_ear_to_nose) / 2 # Ear-to-nose proportionality
|
||||||
|
|
||||||
|
# Validate using relative distances
|
||||||
|
if not (EYE_RATIO_THRESHOLD < eye_to_eye_ratio < 0.5): # Arbitrary ratio threshold
|
||||||
|
print("⚠️ Rejected due to unrealistic eye-to-eye ratio:", eye_to_eye_ratio)
|
||||||
|
has_valid_face = False
|
||||||
|
|
||||||
|
if not (NOSE_EYE_RATIO_THRESHOLD < nose_to_eye_ratio < 0.4): # Arbitrary ratio threshold
|
||||||
|
print("⚠️ Rejected due to unrealistic nose-to-eye ratio:", nose_to_eye_ratio)
|
||||||
|
has_valid_face = False
|
||||||
|
|
||||||
|
if not (0.5 < ear_to_nose_ratio < EAR_NOSE_RATIO_THRESHOLD):
|
||||||
|
print("⚠️ Rejected due to unrealistic ear-to-nose ratio:", ear_to_nose_ratio)
|
||||||
|
has_valid_face = False
|
||||||
|
|
||||||
|
|
||||||
|
# If all checks pass, calculate the bounding box
|
||||||
|
xs, ys, _ = zip(*face_points)
|
||||||
|
x_min, x_max = int(min(xs)), int(max(xs))
|
||||||
|
y_min, y_max = int(min(ys)), int(max(ys))
|
||||||
|
|
||||||
|
x_min = max(x_min - face_padding, 0)
|
||||||
|
y_min = max(y_min - face_padding, 0)
|
||||||
|
x_max = min(x_max + face_padding, image.shape[1])
|
||||||
|
y_max = min(y_max + face_padding, image.shape[0])
|
||||||
|
|
||||||
|
# Compute box size
|
||||||
|
box_w = x_max - x_min
|
||||||
|
box_h = y_max - y_min
|
||||||
|
|
||||||
|
if has_valid_face:
|
||||||
|
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
|
||||||
|
else:
|
||||||
|
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for i, (x, y, conf) in enumerate(keypoints):
|
||||||
|
point = (int(x), int(y))
|
||||||
|
name = keypoint_names[i]
|
||||||
|
# cv2.circle(image, point, 4, (0, 255, 0), -1)
|
||||||
|
# cv2.putText(image, name, (point[0] + 5, point[1] - 5),
|
||||||
|
# cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
|
||||||
|
|
||||||
|
# cv2.circle(image, head, 5, (255, 0, 0), -1) # Head in blue
|
||||||
|
# cv2.circle(image, foot, 5, (0, 0, 255), -1) # Foot in red
|
||||||
|
"""
|
||||||
|
|
||||||
|
cv2.imwrite(detected, image)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_humans(to_detect: str, crop_padding: int = 20, skip_detection_if_present: bool = True):
|
||||||
_p = Path(to_detect)
|
_p = Path(to_detect)
|
||||||
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
|
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
|
||||||
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
|
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
|
||||||
print(f"detecting humans: {to_detect} => {detected}")
|
print(f"detecting humans: {to_detect} => {detected}")
|
||||||
|
|
||||||
require_net("yolov3")
|
|
||||||
|
|
||||||
# Load YOLO
|
|
||||||
net = cv2.dnn.readNet(str(MODEL_PATH / 'yolov3.weights'), str(MODEL_PATH / 'yolov3.cfg'))
|
|
||||||
layer_names = net.getLayerNames()
|
|
||||||
indices = net.getUnconnectedOutLayers()
|
|
||||||
output_layers = [layer_names[int(i) - 1] for i in indices]
|
|
||||||
|
|
||||||
|
|
||||||
# Load image
|
|
||||||
image = cv2.imread(to_detect)
|
|
||||||
original_image = cv2.imread(to_detect)
|
|
||||||
height, width, channels = image.shape
|
|
||||||
|
|
||||||
# Create blob and do forward pass
|
|
||||||
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
|
|
||||||
net.setInput(blob)
|
|
||||||
outs = net.forward(output_layers)
|
|
||||||
|
|
||||||
boxes = []
|
|
||||||
confidences = []
|
|
||||||
|
|
||||||
# Information for each object detected
|
|
||||||
for out in outs:
|
|
||||||
for detection in out:
|
|
||||||
scores = detection[5:]
|
|
||||||
class_id = np.argmax(scores)
|
|
||||||
confidence = scores[class_id]
|
|
||||||
if confidence > 0.5 and class_id == 0: # Class ID 0 is human
|
|
||||||
center_x = int(detection[0] * width)
|
|
||||||
center_y = int(detection[1] * height)
|
|
||||||
w = int(detection[2] * width)
|
|
||||||
h = int(detection[3] * height)
|
|
||||||
x = int(center_x - w / 2)
|
|
||||||
y = int(center_y - h / 2)
|
|
||||||
|
|
||||||
boxes.append([x, y, w, h])
|
|
||||||
confidences.append(float(confidence))
|
|
||||||
|
|
||||||
# Apply Non-Maximum Suppression
|
|
||||||
indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)
|
|
||||||
|
|
||||||
boxes_structures = {}
|
boxes_structures = {}
|
||||||
human_boxes = boxes_structures["humans"] = []
|
human_boxes = boxes_structures["humans"] = []
|
||||||
|
|
||||||
human_part_folder = _p.with_name(_p.stem + "_parts")
|
if not (Path(boxes_file).exists() and skip_detection_if_present):
|
||||||
human_part_folder.mkdir(exist_ok=True)
|
require_net("yolov3")
|
||||||
|
|
||||||
for i in indices:
|
# Load YOLO
|
||||||
i = i[0] if isinstance(i, (list, np.ndarray)) else i # Flatten index if needed
|
net = cv2.dnn.readNet(str(MODEL_PATH / 'yolov3.weights'), str(MODEL_PATH / 'yolov3.cfg'))
|
||||||
x, y, w, h = boxes[i]
|
layer_names = net.getLayerNames()
|
||||||
|
indices = net.getUnconnectedOutLayers()
|
||||||
|
output_layers = [layer_names[int(i) - 1] for i in indices]
|
||||||
|
|
||||||
human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
|
|
||||||
|
|
||||||
image_height, image_width = image.shape[:2]
|
# Load image
|
||||||
|
image = cv2.imread(to_detect)
|
||||||
|
original_image = cv2.imread(to_detect)
|
||||||
|
height, width, channels = image.shape
|
||||||
|
|
||||||
# Compute safe crop coordinates with padding
|
# Create blob and do forward pass
|
||||||
x1 = max(x - crop_padding, 0)
|
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
|
||||||
y1 = max(y - crop_padding, 0)
|
net.setInput(blob)
|
||||||
x2 = min(x + w + crop_padding, image_width)
|
outs = net.forward(output_layers)
|
||||||
y2 = min(y + h + crop_padding, image_height)
|
|
||||||
human_crop = original_image[y1:y2, x1:x2]
|
|
||||||
|
|
||||||
cv2.imwrite(str(human_part_image_path), human_crop)
|
boxes = []
|
||||||
|
confidences = []
|
||||||
|
|
||||||
print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
|
# Information for each object detected
|
||||||
human_boxes.append({
|
for out in outs:
|
||||||
"x": x,
|
for detection in out:
|
||||||
"y": y,
|
scores = detection[5:]
|
||||||
"w": w,
|
class_id = np.argmax(scores)
|
||||||
"h": h,
|
confidence = scores[class_id]
|
||||||
"crop": {
|
if confidence > 0.5 and class_id == 0: # Class ID 0 is human
|
||||||
"file": str(human_part_image_path),
|
center_x = int(detection[0] * width)
|
||||||
"x": x1,
|
center_y = int(detection[1] * height)
|
||||||
|
w = int(detection[2] * width)
|
||||||
|
h = int(detection[3] * height)
|
||||||
|
x = int(center_x - w / 2)
|
||||||
|
y = int(center_y - h / 2)
|
||||||
|
|
||||||
|
boxes.append([x, y, w, h])
|
||||||
|
confidences.append(float(confidence))
|
||||||
|
|
||||||
|
# Apply Non-Maximum Suppression
|
||||||
|
indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)
|
||||||
|
|
||||||
|
human_part_folder = _p.with_name(_p.stem + "_parts")
|
||||||
|
human_part_folder.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
for i in indices:
|
||||||
|
i = i[0] if isinstance(i, (list, np.ndarray)) else i # Flatten index if needed
|
||||||
|
x, y, w, h = boxes[i]
|
||||||
|
|
||||||
|
human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
|
||||||
|
|
||||||
|
image_height, image_width = image.shape[:2]
|
||||||
|
|
||||||
|
# Compute safe crop coordinates with padding
|
||||||
|
x1 = max(x - crop_padding, 0)
|
||||||
|
y1 = max(y - crop_padding, 0)
|
||||||
|
x2 = min(x + w + crop_padding, image_width)
|
||||||
|
y2 = min(y + h + crop_padding, image_height)
|
||||||
|
human_crop = original_image[y1:y2, x1:x2]
|
||||||
|
|
||||||
|
cv2.imwrite(str(human_part_image_path), human_crop)
|
||||||
|
|
||||||
|
print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
|
||||||
|
human_boxes.append({
|
||||||
|
"x": x,
|
||||||
"y": y,
|
"y": y,
|
||||||
"w": x2 - x1,
|
"w": w,
|
||||||
"h": y2 - y1,
|
"h": h,
|
||||||
},
|
"crop": {
|
||||||
"parts": {},
|
"file": str(human_part_image_path),
|
||||||
})
|
"x": x1,
|
||||||
|
"y": y,
|
||||||
|
"w": x2 - x1,
|
||||||
|
"h": y2 - y1,
|
||||||
|
},
|
||||||
|
"parts": {},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)
|
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)
|
||||||
|
|
||||||
|
|
||||||
# Save the result
|
# Save the result
|
||||||
with open(boxes_file, "w") as f:
|
with open(boxes_file, "w") as f:
|
||||||
json.dump(boxes_structures, f)
|
json.dump(boxes_structures, f)
|
||||||
cv2.imwrite(detected, image)
|
cv2.imwrite(detected, image)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
with open(boxes_file, "r") as f:
|
||||||
|
boxes_structures = json.load(f)
|
||||||
|
human_boxes = boxes_structures["humans"]
|
||||||
|
|
||||||
|
|
||||||
for human in human_boxes:
|
for human in human_boxes:
|
||||||
detect_human_parts(human["crop"]["file"])
|
detect_human_parts(human)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user