Compare commits

...

42 Commits

Author SHA1 Message Date
Hazel Noack
54a2138746 stuff 2025-05-07 16:48:07 +02:00
Hazel Noack
37a5da37b0 changed deconvolution algorythm 2025-05-07 16:11:39 +02:00
Hazel Noack
edd8096030 feat: 2025-05-07 15:31:58 +02:00
Hazel Noack
d576f9979c feat: ui 2025-05-07 15:20:02 +02:00
Hazel Noack
f6a774a01f feat: generating kernel 2025-05-07 14:49:08 +02:00
Hazel Noack
6126e675f1 heatmaps 2025-05-07 13:46:27 +02:00
Hazel Noack
df4b949dd2 feat: further tests 2025-05-07 13:01:10 +02:00
Hazel Noack
6101a8d5e4 feat: deconvolution 2025-05-07 12:38:25 +02:00
Hazel Noack
b4c7512a73 feat: detect color edges 2025-05-07 12:02:54 +02:00
Hazel Noack
ed650dcc5d feat: added option to paint mask 2025-05-07 11:22:11 +02:00
Hazel Noack
aaa706264d feat: added links 2025-05-05 16:46:28 +02:00
Hazel Noack
8d6eecaf78 feat: speed up the code by huge difference 2025-05-05 16:44:47 +02:00
Hazel Noack
f23fd1cdb3 feat: 2d deblurr 2025-05-05 16:38:04 +02:00
Hazel Noack
2467b4788f feat: add test for deconvolution 2025-05-05 15:42:58 +02:00
8bd512a0a7 feat: added alternative inpainting 2025-04-24 17:06:54 +02:00
8fc56b887d feat: added alternative inpainting 2025-04-24 16:59:30 +02:00
edad12841f fix: some minor errors 2025-04-24 16:55:44 +02:00
5baefdcc6f feat: implemented correct lama bindings 2025-04-24 16:50:37 +02:00
eb00e869fc wip 2025-04-24 15:18:07 +02:00
94b641cbd6 wip 2025-04-24 15:15:46 +02:00
061cc20046 feat: added some stuff 2025-04-24 13:48:06 +02:00
8753e1e05f feat: added readme stuff 2025-04-24 11:59:03 +02:00
529e1af517 feat: added impaint 2025-04-24 11:52:38 +02:00
ad38eef03b feat: added proper pixelation 2025-04-24 11:46:45 +02:00
678aeab7a5 feat: implemented effective but non generative impainting 2025-04-24 11:41:46 +02:00
180b41ffa4 feat: blacking out 2025-04-24 11:33:28 +02:00
88180d035c feat: blacking out image 2025-04-24 11:26:17 +02:00
b88f9c22a3 feat: improved bounding box format 2025-04-24 11:21:31 +02:00
cb9e594837 feat: added steps dir 2025-04-24 11:00:54 +02:00
0895256dc4 fix: converting ndarray to list 2025-04-24 10:58:21 +02:00
ff2088c1d0 feat: writing bounding boxes 2025-04-24 10:53:40 +02:00
e104a8f45c feat: added bounding boxes to meta data 2025-04-24 10:52:12 +02:00
bb6eafbc74 feat: added data classes 2025-04-24 10:48:49 +02:00
d7cf61ee2d feat: added multiple bounding boxes support 2025-04-24 10:47:57 +02:00
3d9ecea560 feat: selecting single bounding box 2025-04-24 10:43:48 +02:00
208f818e18 feat: started selecting bounding boxes 2025-04-23 17:48:49 +02:00
1dd387d980 feat: improved stuff 2025-04-23 17:23:46 +02:00
9ae09b1ae5 feat: removed comments 2025-04-23 17:14:54 +02:00
0e73aff25d feat: fixed some issue 2025-04-23 17:14:17 +02:00
ad8f3b8e66 feat: cleaner detection 2025-04-23 16:56:09 +02:00
bcfc90acdf feat: detecting human features 2025-04-23 13:09:41 +02:00
f13878d8bc feat: improved human detection 2025-04-23 12:42:19 +02:00
12 changed files with 1336 additions and 52 deletions

3
.gitignore vendored
View File

@@ -161,3 +161,6 @@ cython_debug/
#.idea/
.venv
assets/*
*.pt
big-lama

View File

@@ -12,8 +12,8 @@ I first realized that a normal mosaic algorithm isn't safe AT ALL seeing this pr
```bash
# Step 1: Create and activate virtual environment
python3 -m venv .venv
source venv/bin/activate
python3.8 -m venv .venv
source .venv/bin/activate
# Step 2: Install the local Python program add the -e flag for development
pip install .
@@ -21,3 +21,21 @@ pip install .
# Step 3: Run the secure-pixelation command
secure-pixelation
```
## Setup LaMa
This is the generative ai model to impaint the blacked out areas.
```
# get the pretrained models
mkdir -p ./big-lama
wget https://huggingface.co/smartywu/big-lama/resolve/main/big-lama.zip
unzip big-lama.zip -d ./big-lama
rm big-lama.zip
# get the code to run the models
cd big-lama
git clone https://github.com/advimman/lama.git
cd lama
pip install -r requirements.txt
```

57
deblur/deblur.py Normal file
View File

@@ -0,0 +1,57 @@
import numpy as np
import cv2
image = np.array([1, 3, 1, 2, 1, 6, 1], dtype=np.float32)
kernel = np.array([1, 2, 1], dtype=np.float32) / 4
blurred = np.convolve(image, kernel, mode="same")
print(image)
print(blurred)
print()
print("building linalg")
# https://numpy.org/doc/stable/reference/generated/numpy.linalg.solve.html
a = []
b = []
for i in range(len(blurred)):
y = blurred[i]
shift = i - 1
equation = np.zeros(len(image))
# Calculate valid range in the output array
start_eq = max(0, shift)
end_eq = min(len(image), shift + len(kernel))
# Corresponding range in the kernel
start_k = start_eq - shift # how much to cut from the beginning of the kernel
end_k = start_k + (end_eq - start_eq)
# Assign the clipped kernel segment
equation[start_eq:end_eq] = kernel[start_k:end_k]
a.append(equation)
b.append(y)
goal = image[i]
print(f"{i} ({goal}): {y} = {equation}")
print()
print("deblurring")
deblurred = np.linalg.solve(a, b)
print(deblurred)
def show_matrix(m):
# Resize the image to make it visible (e.g., scale up to 200x200 pixels)
scaled_image = cv2.resize(m, (200, 200), interpolation=cv2.INTER_NEAREST)
# Display the image
cv2.imshow('Test Matrix', scaled_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

419
deblur/deblur_2d.py Normal file
View File

@@ -0,0 +1,419 @@
import numpy as np
from scipy.signal import convolve2d
from scipy.sparse import lil_matrix
from scipy.sparse.linalg import spsolve
from scipy.optimize import curve_fit
import cv2
import matplotlib
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.ndimage import correlate
from skimage.restoration import richardson_lucy
import os
matplotlib.use('qtagg')
"""
https://setosa.io/ev/image-kernels/
https://openaccess.thecvf.com/content/CVPR2021/papers/Tran_Explore_Image_Deblurring_via_Encoded_Blur_Kernel_Space_CVPR_2021_paper.pdf
"""
def show(img):
cv2.imshow('image',img.astype(np.uint8))
cv2.waitKey(0)
cv2.destroyAllWindows()
def demo(image_file):
# Define 2D image and kernel
image = cv2.imread(image_file, 0)
image = cv2.resize(image, (200, 200), interpolation= cv2.INTER_LINEAR)
kernel = np.array([
[1, 2, 1],
[2, 4, 2],
[1, 2, 1]
], dtype=np.float32)
kernel /= kernel.sum() # Normalize
print(kernel)
# Perform 2D convolution (blurring)
blurred = convolve2d(image, kernel, mode="same", boundary="fill", fillvalue=0)
h, w = image.shape
kh, kw = kernel.shape
pad_h, pad_w = kh // 2, kw // 2
show(image)
show(blurred)
print("Original image:\n", image)
print("\nBlurred image:\n", blurred)
print("\nBuilding linear system for deconvolution...")
# Step 2: Build sparse matrix A
N = h * w
A = lil_matrix((N, N), dtype=np.float32)
b = blurred.flatten()
def index(y, x):
return y * w + x
for y in range(h):
for x in range(w):
row_idx = index(y, x)
for ky in range(kh):
for kx in range(kw):
iy = y + ky - pad_h
ix = x + kx - pad_w
if 0 <= iy < h and 0 <= ix < w:
col_idx = index(iy, ix)
A[row_idx, col_idx] += kernel[ky, kx]
# Step 3: Solve the sparse system A * x = b
x = spsolve(A.tocsr(), b)
deblurred = x.reshape((h, w))
print("\nDeblurred image:\n", np.round(deblurred, 2))
show(deblurred)
def get_mask(image_file):
mask_file = Path(image_file)
mask_file = mask_file.with_name("mask_" + mask_file.name)
if mask_file.exists():
return cv2.imread(str(mask_file), 0)
drawing = False # True when mouse is pressed
brush_size = 5
image = cv2.imread(image_file)
mask = np.zeros(image.shape[:2], dtype=np.uint8)
clone = image.copy()
def draw_mask(event, x, y, flags, param):
nonlocal drawing, mask, brush_size
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
elif event == cv2.EVENT_MOUSEMOVE:
if drawing:
cv2.circle(mask, (x, y), brush_size, 255, -1)
cv2.circle(image, (x, y), brush_size, (0, 0, 255), -1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
cv2.namedWindow("Draw Mask")
cv2.setMouseCallback("Draw Mask", draw_mask)
while True:
display = image.copy()
cv2.putText(display, f'Brush size: {brush_size}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
cv2.imshow("Draw Mask", display)
key = cv2.waitKey(1) & 0xFF
if key == 13: # Enter to finish
break
elif key == ord('+') or key == ord('='): # `=` for some keyboard layouts
brush_size = min(100, brush_size + 1)
elif key == ord('-') or key == ord('_'):
brush_size = max(1, brush_size - 1)
cv2.destroyAllWindows()
cv2.imwrite(str(mask_file), mask)
# Apply mask
masked_image = cv2.bitwise_and(clone, clone, mask=mask)
cv2.imshow("Masked Image", masked_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def color_edge_detection(image, threshold=30):
img_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
L, A, B = cv2.split(img_lab)
def gradient_magnitude(channel):
gx = cv2.Sobel(channel, cv2.CV_64F, 1, 0, ksize=3)
gy = cv2.Sobel(channel, cv2.CV_64F, 0, 1, ksize=3)
return gx, gy
gxL, gyL = gradient_magnitude(L)
gxA, gyA = gradient_magnitude(A)
gxB, gyB = gradient_magnitude(B)
gx_total = gxL**2 + gxA**2 + gxB**2
gy_total = gyL**2 + gyA**2 + gyB**2
magnitude = np.sqrt(gx_total + gy_total)
magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
edges = (magnitude > threshold).astype(np.uint8) * 255
return edges, magnitude
# === Step 2: Extract Vertical Profile ===
def extract_vertical_profile(image, center_x, center_y, length=21):
half_len = length // 2
y_range = np.clip(np.arange(center_y - half_len, center_y + half_len + 1), 0, image.shape[0] - 1)
profile = image[y_range, center_x].astype(np.float64)
profile -= profile.min()
if profile.max() > 0:
profile /= profile.max()
return profile, y_range - center_y # profile, x-axis
# === Step 3: Fit Gaussian ===
def gaussian(x, amp, mu, sigma):
return amp * np.exp(-(x - mu)**2 / (2 * sigma**2))
def fit_gaussian(profile, x_vals):
p0 = [1.0, 0.0, 2.0] # initial guess: amp, mu, sigma
popt, _ = curve_fit(gaussian, x_vals, profile, p0=p0)
return popt # amp, mu, sigma
# === Step 4: Create Gaussian Kernel ===
def create_gaussian_kernel(sigma):
ksize = int(sigma * 6) | 1 # ensure odd size
kernel_1d = cv2.getGaussianKernel(ksize, sigma)
kernel_2d = kernel_1d @ kernel_1d.T
return kernel_2d
def kernel_detection(blurred, mask, edge_threshold=30, profile_length=21):
edges, gradient_mag = color_edge_detection(blurred, threshold=edge_threshold)
edges = cv2.bitwise_and(edges, edges, mask=mask)
# show(edges)
# Find central edge pixel
y_idxs, x_idxs = np.where(edges > 0)
if len(x_idxs) == 0:
raise RuntimeError("No edges found.")
idx = len(x_idxs) // 2
cx, cy = x_idxs[idx], y_idxs[idx]
gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
profile, x_vals = extract_vertical_profile(gray, cx, cy, length=profile_length)
popt = fit_gaussian(profile, x_vals)
amp, mu, sigma = popt
print(f"Estimated Gaussian sigma: {sigma:.2f}")
kernel = create_gaussian_kernel(sigma)
# print(kernel)
return kernel / kernel.sum()
def kernel_detection_box(blurred, mask, edge_threshold=30, profile_length=21):
def box_function(x, amp, center, width):
"""Simple box profile: flat region with sharp transitions."""
return amp * ((x >= (center - width / 2)) & (x <= (center + width / 2))).astype(float)
def fit_box(profile, x_vals):
# Initial guess: full amplitude, centered at 0, small width
p0 = [1.0, 0.0, 5.0]
bounds = ([0, -10, 1], [1.5, 10, len(x_vals)]) # reasonable bounds
popt, _ = curve_fit(box_function, x_vals, profile, p0=p0, bounds=bounds)
return popt # amp, center, width
def create_box_kernel(width):
"""Generate a normalized 2D box kernel."""
ksize = int(round(width))
if ksize < 1:
ksize = 1
if ksize % 2 == 0:
ksize += 1 # ensure odd size
kernel = np.ones((ksize, ksize), dtype=np.float32)
return kernel / kernel.sum()
edges, gradient_mag = color_edge_detection(blurred, threshold=edge_threshold)
edges = cv2.bitwise_and(edges, edges, mask=mask)
y_idxs, x_idxs = np.where(edges > 0)
if len(x_idxs) == 0:
raise RuntimeError("No edges found.")
idx = len(x_idxs) // 2
cx, cy = x_idxs[idx], y_idxs[idx]
gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
profile, x_vals = extract_vertical_profile(gray, cx, cy, length=profile_length)
popt = fit_box(profile, x_vals)
amp, mu, width = popt
print(f"Estimated box width: {width:.2f} pixels")
kernel = create_box_kernel(width)
return kernel
def deconvolution(image_file, edge_threshold=30, profile_length=21):
image = cv2.imread(image_file)
mask = get_mask(image_file)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
kernel = kernel_detection_box(image, mask, edge_threshold=edge_threshold, profile_length=profile_length)
# Apply Richardson-Lucy to each channel
num_iter = 30
deblurred_channels = []
for i in range(3): # R, G, B
channel = image_rgb[..., i]
deconv = richardson_lucy(channel, kernel, num_iter=num_iter)
deblurred_channels.append(deconv)
# Stack back into an RGB image
deblurred_rgb = np.stack(deblurred_channels, axis=2)
deblurred_rgb = np.clip(deblurred_rgb, 0, 1)
# Show result
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(image_rgb)
plt.title("Blurred Image")
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(deblurred_rgb)
plt.title("Deconvolved Image")
plt.axis('off')
plt.show()
def sharpness_heatmap(image, block_size=32, threshold=30):
"""
Compute a sharpness heatmap using color-aware Laplacian variance over blocks,
generate a binary mask highlighting blurred areas, and smooth the edges of the mask.
Args:
image: BGR or RGB image (NumPy array).
block_size: Size of the square block to compute sharpness.
sigma: Standard deviation for Gaussian smoothing of the heatmap.
threshold: Sharpness threshold to define blurred regions (between 0 and 1).
smoothing_sigma: Standard deviation for Gaussian smoothing of the binary mask edges.
Returns:
blurred_mask: Binary mask highlighting blurred areas (0 = sharp, 255 = blurred).
"""
if image.ndim != 3 or image.shape[2] != 3:
raise ValueError("Input must be a color image (3 channels)")
h, w, _ = image.shape
heatmap = np.zeros((h // block_size, w // block_size))
# Calculate sharpness for each block
for y in range(0, h - block_size + 1, block_size):
for x in range(0, w - block_size + 1, block_size):
block = image[y:y + block_size, x:x + block_size, :]
sharpness_vals = []
for c in range(3): # For R, G, B channels
channel = block[..., c]
lap_var = cv2.Laplacian(channel, cv2.CV_64F).var()
sharpness_vals.append(lap_var)
# Use average sharpness across color channels
heatmap[y // block_size, x // block_size] = np.mean(sharpness_vals)
print(heatmap)
# Threshold the heatmap to create a binary mask (blurred regions)
mask = heatmap < threshold
mask = (mask * 255).astype(np.uint8) # Convert to binary mask (0, 255)
# Display Heatmap
plt.subplot(1, 2, 1)
plt.imshow(heatmap, cmap='hot', interpolation='nearest')
plt.title("Sharpness Heatmap")
plt.colorbar(label='Sharpness')
# Display Smoothed Mask
plt.subplot(1, 2, 2)
plt.imshow(mask, cmap='gray', interpolation='nearest')
plt.title("Smoothed Mask for Blurred Areas")
plt.colorbar(label='Blurred Mask')
plt.tight_layout()
plt.show()
return smoothed_mask
def graininess_heatmap(image, block_size=32, threshold=100):
"""
Compute a graininess heatmap using local variance (texture/noise) over blocks.
No smoothing or blurring is applied.
Args:
image: BGR or RGB image (NumPy array).
block_size: Size of the square block to compute variance (graininess).
Returns:
graininess_map: Heatmap highlighting the graininess (texture/noise) in the image.
"""
if image.ndim != 3 or image.shape[2] != 3:
raise ValueError("Input must be a color image (3 channels)")
h, w, _ = image.shape
graininess_map = np.zeros((h // block_size, w // block_size))
# Calculate variance for each block
for y in range(0, h - block_size + 1, block_size):
for x in range(0, w - block_size + 1, block_size):
block = image[y:y + block_size, x:x + block_size, :]
variance_vals = []
for c in range(3): # For R, G, B channels
channel = block[..., c]
variance = np.var(channel)
variance_vals.append(variance)
# Use average variance across color channels for graininess
graininess_map[y // block_size, x // block_size] = np.mean(variance_vals)
mask = graininess_map < threshold
mask = (mask * 255).astype(np.uint8) # Convert to binary mask (0, 255)
# Display graininess_map
plt.subplot(1, 2, 1)
plt.imshow(graininess_map, cmap='hot', interpolation='nearest')
plt.title("Graininess Heatmap")
plt.colorbar(label='Graininess')
# Display Smoothed Mask
plt.subplot(1, 2, 2)
plt.imshow(mask, cmap='gray', interpolation='nearest')
plt.title("Mask for Blurred Areas")
plt.colorbar(label='Blurred Mask')
plt.tight_layout()
plt.show()
return graininess_map
if __name__ == "__main__":
img_file = "assets/real_test.jpg"
#demo("assets/omas.png")
# deconvolution(img_file, edge_threshold=5)
image = cv2.imread(img_file)
test = graininess_heatmap(image)
heatmap = sharpness_heatmap(image)

251
deblur/symetric_kernel.py Normal file
View File

@@ -0,0 +1,251 @@
import sys
import numpy as np
import cv2
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QSlider, QVBoxLayout,
QHBoxLayout, QGridLayout, QPushButton, QFileDialog
)
from PyQt5.QtCore import Qt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import scipy.signal
from scipy.signal import convolve2d
import os
os.environ.pop("QT_QPA_PLATFORM_PLUGIN_PATH", None)
def generate_box_kernel(size):
return np.ones((size, size), dtype=np.float32) / (size * size)
def generate_disk_kernel(radius):
size = 2 * radius + 1
y, x = np.ogrid[-radius:radius+1, -radius:radius+1]
mask = x**2 + y**2 <= radius**2
kernel = np.zeros((size, size), dtype=np.float32)
kernel[mask] = 1
kernel /= kernel.sum()
return kernel
def generate_kernel(radius, sigma=None):
"""
Generate a 2D Gaussian kernel with a given radius.
Parameters:
- radius: int, the radius of the kernel (size will be 2*radius + 1)
- sigma: float (optional), standard deviation of the Gaussian. If None, sigma = radius / 3
Returns:
- kernel: 2D numpy array of shape (2*radius+1, 2*radius+1)
"""
size = 2 * radius + 1
if sigma is None:
sigma = radius / 3.0 # Common default choice
print(f"radius: {radius}, sigma: {sigma}")
# Create a grid of (x,y) coordinates
ax = np.arange(-radius, radius + 1)
xx, yy = np.meshgrid(ax, ax)
# Apply the 2D Gaussian formula
kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
kernel /= 2 * np.pi * sigma**2 # Normalize based on Gaussian PDF
kernel /= kernel.sum() # Normalize to sum to 1
return kernel
def wiener_deconvolution(blurred, kernel, K=0.1):
"""
Perform Wiener deconvolution on a 2D image.
Parameters:
- blurred: 2D numpy array (blurred image)
- kernel: 2D numpy array (PSF / blur kernel)
- K: float, estimated noise-to-signal ratio
Returns:
- deconvolved: 2D numpy array (deblurred image)
"""
# Pad kernel to image size
kernel /= np.sum(kernel)
pad = [(0, blurred.shape[0] - kernel.shape[0]),
(0, blurred.shape[1] - kernel.shape[1])]
kernel_padded = np.pad(kernel, pad, 'constant')
# FFT of image and kernel
H = np.fft.fft2(kernel_padded)
G = np.fft.fft2(blurred)
# Avoid division by zero
H_conj = np.conj(H)
denominator = H_conj * H + K
F_hat = H_conj / denominator * G
# Inverse FFT to get result
deconvolved = np.fft.ifft2(F_hat)
deconvolved = np.abs(deconvolved)
deconvolved = np.clip(deconvolved, 0, 255)
return deconvolved.astype(np.uint8)
def richardson_lucy(image, psf, iterations=30, clip=True):
image = image.astype(np.float32) + 1e-6
psf = psf / psf.sum()
estimate = np.full(image.shape, 0.5, dtype=np.float32)
psf_mirror = psf[::-1, ::-1]
for _ in range(iterations):
conv = convolve2d(estimate, psf, mode='same', boundary='wrap')
relative_blur = image / (conv + 1e-6)
estimate *= convolve2d(relative_blur, psf_mirror, mode='same', boundary='wrap')
if clip:
estimate = np.clip(estimate, 0, 255)
return estimate
class KernelVisualizer(QWidget):
def __init__(self, image_path=None):
super().__init__()
self.setWindowTitle("Gaussian Kernel Visualizer")
self.image = None
self.deconvolved = None
self.load_button = QPushButton("Load Image")
self.load_button.clicked.connect(self.load_image)
self.radius_slider = QSlider(Qt.Horizontal)
self.radius_slider.setRange(1, 100)
self.radius_slider.setValue(5)
self.sigma_slider = QSlider(Qt.Horizontal)
self.sigma_slider.setRange(1, 300)
self.sigma_slider.setValue(15)
self.radius_slider.valueChanged.connect(self.update_visualization)
self.sigma_slider.valueChanged.connect(self.update_visualization)
self.kernel_fig = Figure(figsize=(3, 3))
self.kernel_canvas = FigureCanvas(self.kernel_fig)
self.image_fig = Figure(figsize=(6, 3))
self.image_canvas = FigureCanvas(self.image_fig)
self.iter_slider = QSlider(Qt.Horizontal)
self.iter_slider.setRange(1, 50)
self.iter_slider.setValue(10)
self.apply_button = QPushButton("Do Deconvolution.")
self.apply_button.clicked.connect(self.apply_kernel)
layout = QVBoxLayout()
layout.addWidget(self.load_button)
sliders_layout = QGridLayout()
sliders_layout.addWidget(QLabel("Radius:"), 0, 0)
sliders_layout.addWidget(self.radius_slider, 0, 1)
sliders_layout.addWidget(QLabel("Sigma:"), 1, 0)
sliders_layout.addWidget(self.sigma_slider, 1, 1)
sliders_layout.addWidget(QLabel("Iterations:"), 2, 0)
sliders_layout.addWidget(self.iter_slider, 2, 1)
sliders_layout.addWidget(self.apply_button, 3, 1)
layout.addLayout(sliders_layout)
layout.addWidget(QLabel("Kernel Visualization:"))
layout.addWidget(self.kernel_canvas)
layout.addWidget(QLabel("Original and Deconvolved Image:"))
layout.addWidget(self.image_canvas)
self.setLayout(layout)
if image_path:
self.load_image(image_path)
else:
self.update_visualization()
def load_image(self, image_path=None):
if not image_path:
fname, _ = QFileDialog.getOpenFileName(self, "Open Image", "", "Images (*.png *.jpg *.bmp *.jpeg)")
image_path = fname
if image_path:
img = cv2.imread(image_path)
if img is not None:
self.image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
self.update_visualization()
def load_image(self, image_path=None):
if not image_path:
fname, _ = QFileDialog.getOpenFileName(self, "Open Image", "", "Images (*.png *.jpg *.bmp *.jpeg)")
image_path = fname
if image_path:
img = cv2.imread(image_path)
if img is not None:
self.image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
self.image = cv2.resize(self.image, (200, 200))
self.update_visualization()
def apply_kernel(self):
radius = self.radius_slider.value()
sigma = self.sigma_slider.value() / 10.0
iterations = self.iter_slider.value()
kernel = generate_kernel(radius, sigma)
self.deconvolved = richardson_lucy(self.image, kernel, iterations=iterations)
self.update_visualization()
def update_visualization(self):
radius = self.radius_slider.value()
sigma = self.sigma_slider.value() / 10.0 * (radius / 3)
kernel = generate_kernel(radius, sigma)
iterations = self.iter_slider.value()
# Kernel Visualization
self.kernel_fig.clear()
ax = self.kernel_fig.add_subplot(111)
cax = ax.imshow(kernel, cmap='hot')
self.kernel_fig.colorbar(cax, ax=ax)
ax.set_title(f"Kernel (r={radius}, σ={sigma:.2f})")
self.kernel_canvas.draw()
if self.image is not None:
self.image_fig.clear()
ax1 = self.image_fig.add_subplot(131)
ax1.imshow(self.image, cmap='gray')
ax1.set_title("Original")
ax1.axis('off')
if self.deconvolved is not None:
ax3 = self.image_fig.add_subplot(133)
ax3.imshow(self.deconvolved, cmap='gray')
ax3.set_title(f"Deconvolved (RL, {iterations} iter)")
ax3.axis('off')
self.image_canvas.draw()
else:
self.image_fig.clear()
ax = self.image_fig.add_subplot(111)
ax.text(0.5, 0.5, "No image loaded", fontsize=14, ha='center', va='center')
ax.axis('off')
self.image_canvas.draw()
if __name__ == "__main__":
image_path = None
if len(sys.argv) > 1:
image_path = sys.argv[1] # Get image path from command-line argument
print(image_path)
app = QApplication(sys.argv)
viewer = KernelVisualizer(image_path=image_path)
viewer.show()
sys.exit(app.exec_())

View File

@@ -2,8 +2,15 @@
name = "secure_pixelation"
version = "0.0.0"
dependencies = [
"torch==2.1.2",
"torchvision==0.16.2",
"opencv_python~=4.11.0.86",
"imutils~=0.5.4",
"numpy<2.0.0",
"hf_transfer==0.1.8",
"huggingface_hub==0.25.1",
"ultralytics~=8.3.114",
]
authors = []
description = "Hiding faces with Mosaic has proven incredibly unsafe especially with videos, because the algorythm isn't destructive. However, if you black out the selected area, repopulate it with generative ai, and then pixelate it, it should look authentic, but be 100% destructive, thus safe."

View File

@@ -1,7 +1,10 @@
from .detect_humans import detect_humans
from .get_bounding_boxes import select_bounding_boxes
from .pixelation_process import pixelate
def cli():
print(f"Running secure_pixelation")
detect_humans("assets/humans.png")
pixelate("assets/human_detection/test.png", generative_impaint=True)
pixelate("assets/human_detection/humans.png", generative_impaint=False)
pixelate("assets/human_detection/rev1.png", generative_impaint=False)

View File

@@ -0,0 +1,53 @@
from __future__ import annotations
from typing import Union, List, Tuple
from pathlib import Path
import json
import cv2
import numpy as np
class RawImage:
def __init__(self, file: Union[Path, str]):
self.file = Path(file)
self.name = self.file.name
self.meta_file = self._get_path("boxes.json")
self.meta_data = self.read_meta()
self.image = self.get_image()
def _get_path(self, ending: str, original_suffix: bool = False) -> Path:
if original_suffix:
return self.file.with_name(self.file.stem + "_" + ending + self.file.suffix)
else:
return self.file.with_name(self.file.stem + "_" + ending)
def get_dir(self, name: str) -> Path:
p = self._get_path(ending=name, original_suffix=False)
p.mkdir(exist_ok=True, parents=True)
return p
def read_meta(self) -> dict:
if not self.meta_file.exists():
return {}
with self.meta_file.open("r") as f:
self.meta_data = json.load(f)
return self.meta_data
def write_meta(self):
with self.meta_file.open("w") as f:
json.dump(self.meta_data, f)
def get_image(self) -> np.ndarray:
return cv2.imread(str(self.file))
@property
def bounding_boxes(self) -> List[List[int]]:
_key = "bounding_boxes"
if _key not in self.meta_data:
self.meta_data[_key] = []
return self.meta_data[_key]

View File

@@ -1,11 +1,15 @@
from __future__ import annotations
from pathlib import Path
import urllib.request
from typing import Dict, List
import json
from dataclasses import dataclass
from ultralytics import YOLO
import cv2
import imutils
import numpy as np
from scipy.optimize import minimize
from scipy.spatial.transform import Rotation as R
MODEL_PATH = Path("assets", "models")
@@ -38,64 +42,339 @@ def require_net(name: str):
)
# print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
# Thresholds for face keypoint distances (these might need adjustment)
EYE_RATIO_THRESHOLD = 0.25
NOSE_EYE_RATIO_THRESHOLD = 0.2
EAR_NOSE_RATIO_THRESHOLD = 1.2
def detect_humans(to_detect: str):
@dataclass
class Keypoint:
x: float
y: float
name: str
confidence: float = 0
@property
def point(self):
return (int(self.x), int(self.y))
def get_distance(self, other: Keypoint) -> float:
return np.sqrt((self.x - other.x) ** 2 + (self.y - other.y) ** 2)
def detect_human_parts(human: dict, face_padding: int = 20):
parts = human["parts"]
to_detect = human["crop"]["file"]
_p = Path(to_detect)
detected = str(_p.with_name(_p.stem + ".detected" + _p.suffix))
print(f"detecting humans: {to_detect} => {detected}")
require_net("yolov3")
# Load YOLO
net = cv2.dnn.readNet(str(MODEL_PATH / 'yolov3.weights'), str(MODEL_PATH / 'yolov3.cfg'))
layer_names = net.getLayerNames()
indices = net.getUnconnectedOutLayers()
output_layers = [layer_names[int(i) - 1] for i in indices]
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
print(f"detecting human parts: {to_detect} => {detected}")
# Load image
def apply_rotation(rot_matrix, points):
# Apply the rotation to the points, assuming points are 2D coordinates (flattened)
return np.dot(rot_matrix, points.T).T
def linearize_pairwise_distances(points, target_distances):
# Calculate pairwise distances between the points
num_points = len(points)
pairwise_distances = np.zeros((num_points, num_points))
for i in range(num_points):
for j in range(i, num_points):
pairwise_distances[i, j] = np.linalg.norm(points[i] - points[j])
pairwise_distances[j, i] = pairwise_distances[i, j] # symmetric matrix
total_distance = np.sum(pairwise_distances)
normed_distances = pairwise_distances / total_distance
return np.abs(normed_distances - target_distances) / target_distances
def objective(params, original_points, target_distances):
# Convert params to an axis-angle representation (rotation vector)
rot = R.from_rotvec(params)
rotation_matrix = rot.as_matrix()[:2, :2] # 2D rotation matrix (2x2)
# Apply the rotation to the original points
rotated_points = apply_rotation(rotation_matrix, original_points)
# Compute the pairwise distances for the rotated points
divergence = linearize_pairwise_distances(rotated_points, target_distances)
return np.nansum(divergence)
def optimize_rotation(original_points, relative_face_matrix):
# Compute the pairwise distances of the original points
original_distances = linearize_pairwise_distances(original_points, relative_face_matrix)
# Initial guess: rotation vector (zero rotation)
initial_params = np.zeros(3) # Initial guess for the rotation vector (no rotation)
# Perform the optimization to minimize the divergence
result = minimize(objective, initial_params, args=(original_points, relative_face_matrix), method='BFGS')
return result.x # Rotation vector (axis-angle)
def apply_optimized_rotation(rotation_vector, original_points):
# Convert the rotation vector to a rotation matrix (2D)
rot = R.from_rotvec(rotation_vector)
rotation_matrix = rot.as_matrix()[:2, :2] # 2D rotation matrix (2x2)
# Apply the rotation to the points
return apply_rotation(rotation_matrix, original_points)
relative_face_matrix = np.array([
[0. , 0.02243309, 0.02243309, 0.05016191, 0.05016191],
[0.02243309, 0. , 0.04012953, 0.04486618, 0.07234453],
[0.02243309, 0.04012953, 0. , 0.07234453, 0.04486618],
[0.05016191, 0.04486618, 0.07234453, 0. , 0.08025906],
[0.05016191, 0.07234453, 0.04486618, 0.08025906, 0. ]
])
#
model = YOLO('yolov8n-pose.pt') # You can also try 'yolov8s-pose.pt' for better accuracy
results = model(to_detect)[0]
image = cv2.imread(to_detect)
height, width, channels = image.shape
# Create blob and do forward pass
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
for person in results.keypoints.data:
keypoints = person.cpu().numpy()
boxes = []
confidences = []
print("#" * 50)
# Information for each object detected
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5 and class_id == 0: # Class ID 0 is human
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
original_points = np.array([[k[0], k[1]] for k in keypoints[:5]])
is_not_zero = False
for x, y in original_points:
if x != 0 or y != 0:
is_not_zero = True
break
boxes.append([x, y, w, h])
confidences.append(float(confidence))
if not is_not_zero:
continue
# Apply Non-Maximum Suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)
rotation_vector = optimize_rotation(original_points, relative_face_matrix)
optimized_points = apply_optimized_rotation(rotation_vector, original_points)
optimized_distances = linearize_pairwise_distances(optimized_points, relative_face_matrix)
for i in indices:
i = i[0] if isinstance(i, (list, np.ndarray)) else i # Flatten index if needed
x, y, w, h = boxes[i]
# indices of the points that seem to be likely correct
success_points = []
for i in range(5):
if np.sum(original_points[i]) == 0:
continue
print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)
s_count = 0
for j in range(5):
d = np.abs(optimized_distances[i][j])
if d < 1:
s_count += 1
if s_count > 2:
success_points.append(i)
for point in original_points:
cv2.circle(image, (int(point[0]), int(point[1])), 4, (0, 0, 255), -1)
if len(success_points) < 1:
continue
valid_face = len(success_points) >= 3
clean_points = []
# Reconstruct disregarded points using weighted average of relative positions
for i in range(5):
if i not in success_points:
weighted_sum = np.zeros(2)
total_weight = 0.0
for j in success_points:
if not np.isnan(relative_face_matrix[i][j]):
direction = original_points[j] - original_points[i]
norm = np.linalg.norm(direction)
if norm > 0:
direction = direction / norm
estimated_distance = relative_face_matrix[i][j]
estimate = original_points[j] - direction * estimated_distance
weighted_sum += estimate
total_weight += 1
if total_weight > 0:
clean_points.append(weighted_sum / total_weight)
else:
clean_points.append(original_points[i])
clean_points = np.array(clean_points)
# Calculate bounding box from clean_points
realistic_aspect_ratio = 2/3 # width / height
x_coords = clean_points[:, 0]
y_coords = clean_points[:, 1]
min_x = np.min(x_coords)
max_x = np.max(x_coords)
min_y = np.min(y_coords)
max_y = np.max(y_coords)
# Face-like padding: more space top & bottom than sides
width = max_x - min_x
height = max_y - min_y
normalized_bounding_size = max(width, height * realistic_aspect_ratio)
real_width = normalized_bounding_size
real_height = normalized_bounding_size / realistic_aspect_ratio
padding_x = width * 0.7 + (real_width - width) / 2
padding_y_top = height * 2 + (real_height - height) / 2
padding_y_bottom = height * 1.7 + (real_height - height) / 2
face_box_x1 = int(min_x - padding_x)
face_box_y1 = int(min_y - padding_y_top)
face_box_x2 = int(max_x + padding_x)
face_box_y2 = int(max_y + padding_y_bottom)
face_bounding_box = (face_box_x1, face_box_y1, face_box_x2, face_box_y2)
color = (255, 255, 0)
if valid_face:
color = (0, 255, 0)
cv2.rectangle(image, (face_box_x1, face_box_y1), (face_box_x2, face_box_y2), color, 2)
for point in clean_points:
cv2.circle(image, (int(point[0]), int(point[1])), 4, color, -1)
face_info = human["face"] = {
"is_valid": valid_face,
"x": face_box_x1,
"y": face_box_y1,
"w": face_box_x2 - face_box_x1,
"h": face_box_y2 - face_box_y1,
}
if valid_face:
print("\nOriginal points:")
print(original_points)
print("\nOriginal pairwise distances:")
print(linearize_pairwise_distances(original_points, relative_face_matrix))
print(f"Optimized rotation vector (axis-angle): {rotation_vector}")
print("\nOptimized points after rotation:")
print(optimized_points)
print("\nOptimized pairwise distances:")
print(optimized_distances)
print(success_points)
print(clean_points)
# Save the result
cv2.imwrite(detected, image)
def detect_humans(to_detect: str, crop_padding: int = 20, skip_detection_if_present: bool = False):
_p = Path(to_detect)
detected = str(_p.with_name(_p.stem + "_detected" + _p.suffix))
boxes_file = str(_p.with_name(_p.stem + "_boxes.json"))
print(f"detecting humans: {to_detect} => {detected}")
boxes_structures = {}
human_boxes = boxes_structures["humans"] = []
if not (Path(boxes_file).exists() and skip_detection_if_present):
require_net("yolov3")
# Load YOLO
net = cv2.dnn.readNet(str(MODEL_PATH / 'yolov3.weights'), str(MODEL_PATH / 'yolov3.cfg'))
layer_names = net.getLayerNames()
indices = net.getUnconnectedOutLayers()
output_layers = [layer_names[int(i) - 1] for i in indices]
# Load image
image = cv2.imread(to_detect)
r = cv2.selectROI(image)
print(r)
original_image = cv2.imread(to_detect)
height, width, channels = image.shape
# Create blob and do forward pass
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
boxes = []
confidences = []
# Information for each object detected
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5 and class_id == 0: # Class ID 0 is human
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
# Apply Non-Maximum Suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)
human_part_folder = _p.with_name(_p.stem + "_parts")
human_part_folder.mkdir(exist_ok=True)
for i in indices:
i = i[0] if isinstance(i, (list, np.ndarray)) else i # Flatten index if needed
x, y, w, h = boxes[i]
human_part_image_path = human_part_folder / (_p.stem + "_" + str(i) + _p.suffix)
image_height, image_width = image.shape[:2]
# Compute safe crop coordinates with padding
x1 = max(x - crop_padding, 0)
y1 = max(y - crop_padding, 0)
x2 = min(x + w + crop_padding, image_width)
y2 = min(y + h + crop_padding, image_height)
human_crop = original_image[y1:y2, x1:x2]
cv2.imwrite(str(human_part_image_path), human_crop)
print(f"\tfound human at {x}/{y} with the size of {w} x {h}")
human_boxes.append({
"x": x,
"y": y,
"w": w,
"h": h,
"crop": {
"file": str(human_part_image_path),
"x": x1,
"y": y,
"w": x2 - x1,
"h": y2 - y1,
},
"parts": {},
})
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)
# Save the result
with open(boxes_file, "w") as f:
json.dump(boxes_structures, f)
cv2.imwrite(detected, image)
else:
with open(boxes_file, "r") as f:
boxes_structures = json.load(f)
human_boxes = boxes_structures["humans"]
for human in human_boxes:
detect_human_parts(human)
with open(boxes_file, "w") as f:
json.dump(boxes_structures, f)

View File

@@ -0,0 +1,22 @@
from __future__ import annotations
import cv2
import numpy as np
from .data_classes import RawImage
# https://learnopencv.com/how-to-select-a-bounding-box-roi-in-opencv-cpp-python/
def select_bounding_boxes(to_detect: str):
raw_image = RawImage(to_detect)
bounding_boxes = cv2.selectROIs(
windowName=raw_image.name,
img=raw_image.image,
fromCenter=False
)
raw_image.bounding_boxes.extend(bounding_boxes.tolist())
raw_image.write_meta()

View File

@@ -0,0 +1,95 @@
from __future__ import annotations
from typing import Optional
from pathlib import Path
import subprocess
import sys
import os
import cv2
import numpy as np
from .data_classes import RawImage
from .simple_lama_bindings import SimpleLama
# https://github.com/okaris/simple-lama/tree/main
def blackout(raw_image: RawImage) -> np.ndarray:
image = raw_image.get_image()
for box in raw_image.bounding_boxes:
cv2.rectangle(image, box, (0, 0, 0), -1)
return image
def get_mask(raw_image: RawImage) -> np.ndarray:
mask = np.zeros(raw_image.image.shape[:2], dtype=np.uint8)
for (x, y, w, h) in raw_image.bounding_boxes:
mask[y:y+h, x:x+w] = 255
return mask
def quick_impaint(raw_image: RawImage, image: Optional[np.ndarray] = None) -> np.ndarray:
image = image if image is not None else raw_image.get_image()
mask = get_mask(raw_image)
# Apply inpainting using the Telea method
return cv2.inpaint(image, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
def do_generative_impaint(raw_image: RawImage, image: Optional[np.ndarray] = None) -> np.ndarray:
image = image if image is not None else raw_image.get_image()
mask = get_mask(raw_image)
lama = SimpleLama()
return lama(image=image, mask=mask)
def pixelate_regions(raw_image: RawImage, image: Optional[np.ndarray] = None, pixel_size: int = 10) -> np.ndarray:
image = image.copy() if image is not None else raw_image.get_image().copy()
for (x, y, w, h) in raw_image.bounding_boxes:
roi = image[y:y+h, x:x+w]
# Resize down and then back up
temp = cv2.resize(roi, (max(1, w // pixel_size), max(1, h // pixel_size)), interpolation=cv2.INTER_LINEAR)
pixelated = cv2.resize(temp, (w, h), interpolation=cv2.INTER_NEAREST)
image[y:y+h, x:x+w] = pixelated
return image
def pixelate(to_detect: str, generative_impaint: bool = True, debug_drawings: bool = False):
raw_image = RawImage(to_detect)
step_dir = raw_image.get_dir("steps")
def write_image(image: np.ndarray, name: str):
nonlocal debug_drawings
f = str(step_dir / (name + raw_image.file.suffix))
if debug_drawings:
for box in raw_image.bounding_boxes:
cv2.rectangle(image, box, (0, 255, 255), 1)
cv2.imwrite(f, image)
write_image(raw_image.image, "step_0")
step_1 = blackout(raw_image)
write_image(step_1, "step_1")
if generative_impaint:
step_2 = do_generative_impaint(raw_image, image=step_1)
step_2_alt = quick_impaint(raw_image, image=step_1)
else:
step_2 = quick_impaint(raw_image, image=step_1)
step_2_alt = do_generative_impaint(raw_image, image=step_1)
write_image(step_2, "step_2")
write_image(step_2_alt, "step_2_alt")
step_3 = pixelate_regions(raw_image, image=step_2)
write_image(step_3, "step_3")

View File

@@ -0,0 +1,77 @@
import os
from typing import Tuple
import torch
import cv2
import numpy as np
from huggingface_hub import hf_hub_download
# https://github.com/okaris/simple-lama/blob/main/src/simple_lama/simple_lama.py
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
def prepare_img_and_mask(image: np.ndarray, mask: np.ndarray, device: torch.device, pad_out_to_modulo: int = 8, scale_factor: float = 1) -> Tuple[torch.Tensor, torch.Tensor]:
def get_image(img: np.ndarray):
img = img.copy()
if img.ndim == 3:
img = np.transpose(img, (2, 0, 1)) # chw
elif img.ndim == 2:
img = img[np.newaxis, ...]
return img.astype(np.float32) / 255
def scale_image(img: np.ndarray, factor: float, interpolation=cv2.INTER_AREA) -> np.ndarray:
if img.shape[0] == 1:
img = img[0]
else:
img = np.transpose(img, (1, 2, 0))
img = cv2.resize(img, dsize=None, fx=factor, fy=factor, interpolation=interpolation)
return img[None, ...] if img.ndim == 2 else np.transpose(img, (2, 0, 1))
def pad_img_to_modulo(img, mod):
channels, height, width = img.shape
out_height = height if height % mod == 0 else ((height // mod + 1) * mod)
out_width = width if width % mod == 0 else ((width // mod + 1) * mod)
return np.pad(img, ((0, 0), (0, out_height - height), (0, out_width - width)), mode="symmetric")
out_image = get_image(image)
out_mask = get_image(mask)
if scale_factor != 1:
out_image = scale_image(out_image, scale_factor)
out_mask = scale_image(out_mask, scale_factor, interpolation=cv2.INTER_NEAREST)
if pad_out_to_modulo > 1:
out_image = pad_img_to_modulo(out_image, pad_out_to_modulo)
out_mask = pad_img_to_modulo(out_mask, pad_out_to_modulo)
out_image = torch.from_numpy(out_image).unsqueeze(0).to(device)
out_mask = torch.from_numpy(out_mask).unsqueeze(0).to(device)
return out_image, (out_mask > 0) * 1
class SimpleLama:
"""
lama = SimpleLama()
result = lama(image, mask)
"""
def __init__(self, device=None):
self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
model_path = hf_hub_download("okaris/simple-lama", "big-lama.pt")
print(f"using model at {model_path}")
self.model = torch.jit.load(model_path, map_location=self.device).eval()
def __call__(self, image: np.ndarray, mask: np.ndarray) -> np.ndarray:
image, mask = prepare_img_and_mask(image, mask, self.device)
with torch.inference_mode():
inpainted = self.model(image, mask)
cur_res = inpainted[0].permute(1, 2, 0).detach().cpu().numpy()
return np.clip(cur_res * 255, 0, 255).astype(np.uint8)