3 Commits

Author SHA1 Message Date
49c3734526 feat: added hooks for collection on append
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-04 10:11:46 +02:00
bc19a94e7f feat: added parent artwork options 2024-06-04 10:09:17 +02:00
5d26fdbf94 Artwork gallery Musify
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-04 07:58:18 +02:00
8 changed files with 108 additions and 189 deletions

View File

@@ -30,7 +30,7 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = { ALL_PAGES: Set[Type[Page]] = {
@@ -38,8 +38,7 @@ ALL_PAGES: Set[Type[Page]] = {
Genius, Genius,
Musify, Musify,
YoutubeMusic, YoutubeMusic,
Bandcamp, Bandcamp
Musicbrainz
} }
if youtube_settings["use_youtube_alongside_youtube_music"]: if youtube_settings["use_youtube_alongside_youtube_music"]:

View File

@@ -1,18 +1,14 @@
from __future__ import annotations from __future__ import annotations
from typing import List, Optional, Dict, Tuple, Type, Union, TypedDict from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union
from .collection import Collection
from .metadata import (
Mapping as id3Mapping,
ID3Timestamp,
Metadata
)
from ..utils.string_processing import unify, hash_url
from .parents import OuterProxy as Base
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.string_processing import hash_url, unify
from .collection import Collection
from .metadata import ID3Timestamp
from .metadata import Mapping as id3Mapping
from .metadata import Metadata
from .parents import OuterProxy as Base
class ArtworkVariant(TypedDict): class ArtworkVariant(TypedDict):
@@ -23,7 +19,9 @@ class ArtworkVariant(TypedDict):
class Artwork: class Artwork:
def __init__(self, *variants: List[ArtworkVariant]) -> None: def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None) -> None:
self.parent_artworks: Set[Artwork] = parent_artworks or set()
self._variant_mapping: Dict[str, ArtworkVariant] = {} self._variant_mapping: Dict[str, ArtworkVariant] = {}
for variant in variants: for variant in variants:
@@ -36,7 +34,7 @@ class Artwork:
def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None:
if url is None: if url is None:
return return
self._variant_mapping[hash_url(url=url)] = { self._variant_mapping[hash_url(url=url)] = {
"url": url, "url": url,
"width": width, "width": width,
@@ -44,21 +42,36 @@ class Artwork:
"deviation": self._calculate_deviation(width, height), "deviation": self._calculate_deviation(width, height),
} }
@property
def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0
def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]:
return min(artwork_variants, key=lambda x: x["deviation"])
@property @property
def best_variant(self) -> ArtworkVariant: def best_variant(self) -> ArtworkVariant:
if len(self._variant_mapping.keys()) <= 0: if self.flat_empty:
return None return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks])
return min(self._variant_mapping.values(), key=lambda x: x["deviation"]) return self._get_best_from_list(self._variant_mapping.values())
def get_variant_name(self, variant: ArtworkVariant) -> str: def get_variant_name(self, variant: ArtworkVariant) -> str:
return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}"
def __merge__(self, other: Artwork, **kwargs) -> None: def __merge__(self, other: Artwork, **kwargs) -> None:
self.parent_artworks.update(other.parent_artworks)
for key, value in other._variant_mapping.items(): for key, value in other._variant_mapping.items():
if key not in self._variant_mapping: if key not in self._variant_mapping:
self._variant_mapping[key] = value self._variant_mapping[key] = value
def __hash__(self) -> int:
return id(self)
def __eq__(self, other: Artwork) -> bool: def __eq__(self, other: Artwork) -> bool:
if hash(self) == hash(other):
return True
if not isinstance(other, Artwork): if not isinstance(other, Artwork):
return False return False
return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys()))

View File

@@ -1,16 +1,44 @@
from __future__ import annotations from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set
import copy import copy
from collections import defaultdict
from dataclasses import dataclass
from typing import (Any, Callable, Dict, Generator, Generic, Iterable,
Iterator, List, Optional, Set, Tuple, TypeVar, Union)
from .parents import OuterProxy from ..utils import BColors, object_trace, output
from ..utils import object_trace from .parents import InnerData, OuterProxy
from ..utils import output, BColors
T = TypeVar('T', bound=OuterProxy) T = TypeVar('T', bound=OuterProxy)
@dataclass
class AppendHookArguments:
"""
This class is used to store the arguments for the append hook.
The best explanation is with an examples:
```
# this is the action that triggers the append hook
album = Album()
song = Song()
album.song_collection.append(song)
```
In this case, the append hook is triggered with the following arguments:
```
AppendHookArguments(
collection=album.song_collection,
new_object=song,
collection_root_objects=[album]
)
```
"""
collection: Collection
new_object: T
collection_root_objects: Set[InnerData]
class Collection(Generic[T]): class Collection(Generic[T]):
__is_collection__ = True __is_collection__ = True
@@ -27,6 +55,7 @@ class Collection(Generic[T]):
sync_on_append: Dict[str, Collection] = None, sync_on_append: Dict[str, Collection] = None,
append_object_to_attribute: Dict[str, T] = None, append_object_to_attribute: Dict[str, T] = None,
extend_object_to_attribute: Dict[str, Collection] = None, extend_object_to_attribute: Dict[str, Collection] = None,
append_callbacks: List[Callable[[AppendHookArguments], None]] = None,
) -> None: ) -> None:
self._collection_for: dict = dict() self._collection_for: dict = dict()
@@ -41,6 +70,7 @@ class Collection(Generic[T]):
self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.sync_on_append: Dict[str, Collection] = sync_on_append or {}
self.pull_from: List[Collection] = [] self.pull_from: List[Collection] = []
self.push_to: List[Collection] = [] self.push_to: List[Collection] = []
self.append_callbacks: List[Callable[[AppendHookArguments], None]] = append_callbacks or []
# This is to cleanly unmap previously mapped items by their id # This is to cleanly unmap previously mapped items by their id
self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict)
@@ -141,6 +171,14 @@ class Collection(Generic[T]):
for attribute, new_object in self.append_object_to_attribute.items(): for attribute, new_object in self.append_object_to_attribute.items():
other.__getattribute__(attribute).append(new_object, **kwargs) other.__getattribute__(attribute).append(new_object, **kwargs)
append_hook_args = AppendHookArguments(
collection=self,
new_object=other,
collection_root_objects=self._collection_for.keys(),
)
for callback in self.append_callbacks:
callback(append_hook_args)
def append(self, other: Optional[T], **kwargs): def append(self, other: Optional[T], **kwargs):
""" """
If an object, that represents the same entity exists in a relevant collection, If an object, that represents the same entity exists in a relevant collection,

View File

@@ -477,6 +477,8 @@ class Artist(Base):
general_genre: str general_genre: str
unformatted_location: str unformatted_location: str
artwork: List[Artwork]
source_collection: SourceCollection source_collection: SourceCollection
contact_collection: Collection[Contact] contact_collection: Collection[Contact]
@@ -493,6 +495,8 @@ class Artist(Base):
"lyrical_themes": list, "lyrical_themes": list,
"general_genre": lambda: "", "general_genre": lambda: "",
"artwork": list,
"source_collection": SourceCollection, "source_collection": SourceCollection,
"album_collection": Collection, "album_collection": Collection,
"contact_collection": Collection, "contact_collection": Collection,
@@ -511,6 +515,7 @@ class Artist(Base):
notes: FormattedText = None, notes: FormattedText = None,
lyrical_themes: List[str] = None, lyrical_themes: List[str] = None,
general_genre: str = None, general_genre: str = None,
artwork: List[Artwork] = None,
unformatted_location: str = None, unformatted_location: str = None,
source_list: List[Source] = None, source_list: List[Source] = None,
contact_list: List[Contact] = None, contact_list: List[Contact] = None,

View File

@@ -1,6 +1,5 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify from .musify import Musify
from .musicbrainz import Musicbrainz
from .youtube import YouTube from .youtube import YouTube
from .youtube_music import YoutubeMusic from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp from .bandcamp import Bandcamp

View File

@@ -1,145 +0,0 @@
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Type, Union, Generator, Dict, Any
from urllib.parse import urlparse
import pycountry
import musicbrainzngs
from bs4 import BeautifulSoup
from ..connection import Connection
from .abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import (
Artist,
Source,
Song,
Album,
ID3Timestamp,
FormattedText,
Label,
Target,
DatabaseObject,
Lyrics,
Artwork
)
from ..utils.config import logging_settings, main_settings
from ..utils import string_processing, shared
from ..utils.string_processing import clean_song_title
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
class Musicbrainz(Page):
SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ
HOST = "https://musicbrainz.org"
def __init__(self, *args, **kwargs):
musicbrainzngs.set_useragent("mk", "1")
super().__init__(*args, **kwargs)
def general_search(self, search_query: str) -> List[DatabaseObject]:
search_results = []
#Artist
search_results += self.artist_search(search_query).copy()
#Album
search_results += self.album_search(search_query).copy()
#Song
search_results += self.song_search(search_query).copy()
return search_results
def artist_search(self, search_query: str) -> List[Artist]:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list
))
return artist_list
def song_search(self, search_query: str) -> List[Song]:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))
return song_list
def album_search(self, search_query: str) -> List[Album]:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
return album_list
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list,
))
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))

View File

@@ -457,17 +457,17 @@ class Musify(Page):
for album_info in soup.find_all("ul", {"class": "album-info"}): for album_info in soup.find_all("ul", {"class": "album-info"}):
list_element: BeautifulSoup = album_info.find("li") list_element: BeautifulSoup = album_info.find("li")
if list_element is not None: if list_element is not None:
artist_soup: BeautifulSoup artist_soup: BeautifulSoup
for artist_soup in list_element.find_all("a"): for artist_soup in list_element.find_all("a"):
artist_source_list = [] artist_source_list = []
href = artist_soup["href"] href = artist_soup["href"]
if href is not None: if href is not None:
artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)] artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)]
artist_list.append(Artist( artist_list.append(Artist(
name=artist_soup.text.strip(), name=artist_soup.text.strip(),
source_list=artist_source_list source_list=artist_source_list
)) ))
# breadcrums # breadcrums
breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"}) breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"})
@@ -485,7 +485,7 @@ class Musify(Page):
track_name = list_points[4].text.strip() track_name = list_points[4].text.strip()
# artwork # album artwork
artwork: Artwork = Artwork() artwork: Artwork = Artwork()
album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"})
for album_image_element in album_image_element_list: for album_image_element in album_image_element_list:
@@ -918,7 +918,8 @@ class Musify(Page):
name=name, name=name,
country=country, country=country,
source_list=source_list, source_list=source_list,
notes=notes notes=notes,
artwork=self._fetch_artist_artwork(soup, **kwargs)
) )
def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album: def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album:
@@ -1056,6 +1057,20 @@ class Musify(Page):
artist.album_collection.append(album) artist.album_collection.append(album)
def _fetch_artist_artwork(self, soup: BeautifulSoup, **kwargs):
# artist artwork
artist_artwork: List[Artwork] = Artwork()
artist_a_element_list: List[BeautifulSoup] = soup.find_all("a")
for artist_a_element in artist_a_element_list:
if artist_a_element.find_all("img", {"class": "artist-img"}).count() > 0:
artwork_gallery = self.connection.get(artist_a_element("data-src", artist_a_element.get("href")))
if artwork_gallery is not None:
gallery_image_element_list: List[BeautifulSoup] = artwork_gallery.find_all("img", {"class": "artist-img"})
for gallery_image_element in gallery_image_element_list:
artist_artwork.push(Artwork(url=gallery_image_element.get("data-src", gallery_image_element.get("src"))))
return artist_artwork
def fetch_artist(self, source: Source, **kwargs) -> Artist: def fetch_artist(self, source: Source, **kwargs) -> Artist:
""" """
TODO TODO
@@ -1068,7 +1083,7 @@ class Musify(Page):
artist = self._fetch_initial_artist(url, source=source, **kwargs) artist = self._fetch_initial_artist(url, source=source, **kwargs)
self._fetch_artist_discography(artist, url, artist.name, **kwargs) self._fetch_artist_discography(artist, url, artist.name, **kwargs)
self._fetch_artist_artwork(artist, **kwargs)
return artist return artist
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:

View File

@@ -59,11 +59,6 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm
description="The logger for the musify scraper.", description="The logger for the musify scraper.",
default_value="musify" default_value="musify"
), ),
LoggerAttribute(
name="musicbrainz_logger",
description="The logger for the musicbrainz scraper.",
default_value="musicbrainz"
),
LoggerAttribute( LoggerAttribute(
name="youtube_logger", name="youtube_logger",
description="The logger for the youtube scraper.", description="The logger for the youtube scraper.",