feat: image hash implemented
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful

This commit is contained in:
2024-07-01 14:59:51 +02:00
parent 17c28722fb
commit 93c9a367a2
5 changed files with 52 additions and 57 deletions

View File

@@ -18,6 +18,8 @@ from .parents import OuterProxy as Base
from .target import Target
from PIL import Image
import imagehash
artwork_connection: Connection = Connection(module="artwork")
@@ -168,24 +170,37 @@ class ArtworkCollection:
for value in values:
self.append(value, **kwargs)
def compile(self, target: Target, **kwargs) -> None:
def compile(self, **kwargs) -> None:
"""
This will make the artworks ready for download
This will make the artworks ready for download and delete duplicates.
"""
artwork_hashes: list = list()
for artwork in self._data:
index = 0
for artwork_variant in artwork.variants:
r = artwork_connection.get(
url=artwork_variant.url,
name=artwork_variant.url,
)
temp_target: Target = Target.temp()
with temp_target.open("wb") as f:
target: Target = artwork_variant.target
with target.open("wb") as f:
f.write(r.content)
converted_target: Target = Target.temp(file_extension=main_settings["image_format"])
with Image.open(temp_target.file_path) as img:
# crop the image if it isn't square in the middle with minimum data loss
with Image.open(target.file_path) as img:
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
try:
image_hash = imagehash.crop_resistant_hash(img)
except Exception as e:
continue
if image_hash in artwork_hashes:
artwork.variants.pop(index)
target.delete()
continue
artwork_hashes.append(image_hash)
width, height = img.size
if width != height:
if width > height:
@@ -193,27 +208,20 @@ class ArtworkCollection:
else:
img = img.crop((0, height // 2 - width // 2, width, height // 2 + width // 2))
# resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
if target is not None:
img.save(target.file_path, main_settings["image_format"])
# resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
index =+ 1
def __merge__(self, other: ArtworkCollection, **kwargs) -> None:
self.parent_artworks.update(other.parent_artworks)
for other_artwork in other._data:
for other_variant in other_artwork.variants:
if len(self._data) != 0:
for artwork in self._data:
for variant in artwork.variants:
variant.__merge__(other_variant)
else:
self.add_data(other_variant.url)
if self.__contains__(other_variant.url):
continue
self.append(ArtworkVariant(other_variant.url))
def __hash__(self) -> int:
return id(self)
@@ -224,21 +232,5 @@ class ArtworkCollection:
def get_urls(self) -> Generator[str, None, None]:
yield from (artwork.url for artwork in self._data if artwork.url is not None)
"""
@property
def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0
def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]:
return min(artwork_variants, key=lambda x: x["deviation"])
@property
def best_variant(self) -> ArtworkVariant:
if self.flat_empty:
return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks])
return self._get_best_from_list(self._variant_mapping.values())
def get_variant_name(self, variant: ArtworkVariant) -> str:
return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}"
"""

View File

@@ -31,7 +31,8 @@ class Target(OuterProxy):
}
@classmethod
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P:
def temp(cls, name: str = None, file_extension: Optional[str] = None) -> P:
name = name or str(random.randint(0, HIGHEST_ID))
if file_extension is not None:
name = f"{name}.{file_extension}"