3 Commits

Author SHA1 Message Date
49c3734526 feat: added hooks for collection on append
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-04 10:11:46 +02:00
bc19a94e7f feat: added parent artwork options 2024-06-04 10:09:17 +02:00
5d26fdbf94 Artwork gallery Musify
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-04 07:58:18 +02:00
8 changed files with 108 additions and 189 deletions

View File

@@ -30,7 +30,7 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = {
@@ -38,8 +38,7 @@ ALL_PAGES: Set[Type[Page]] = {
Genius,
Musify,
YoutubeMusic,
Bandcamp,
Musicbrainz
Bandcamp
}
if youtube_settings["use_youtube_alongside_youtube_music"]:

View File

@@ -1,18 +1,14 @@
from __future__ import annotations
from typing import List, Optional, Dict, Tuple, Type, Union, TypedDict
from .collection import Collection
from .metadata import (
Mapping as id3Mapping,
ID3Timestamp,
Metadata
)
from ..utils.string_processing import unify, hash_url
from .parents import OuterProxy as Base
from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union
from ..utils.config import main_settings
from ..utils.string_processing import hash_url, unify
from .collection import Collection
from .metadata import ID3Timestamp
from .metadata import Mapping as id3Mapping
from .metadata import Metadata
from .parents import OuterProxy as Base
class ArtworkVariant(TypedDict):
@@ -23,7 +19,9 @@ class ArtworkVariant(TypedDict):
class Artwork:
def __init__(self, *variants: List[ArtworkVariant]) -> None:
def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None) -> None:
self.parent_artworks: Set[Artwork] = parent_artworks or set()
self._variant_mapping: Dict[str, ArtworkVariant] = {}
for variant in variants:
@@ -36,7 +34,7 @@ class Artwork:
def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None:
if url is None:
return
self._variant_mapping[hash_url(url=url)] = {
"url": url,
"width": width,
@@ -44,21 +42,36 @@ class Artwork:
"deviation": self._calculate_deviation(width, height),
}
@property
def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0
def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]:
return min(artwork_variants, key=lambda x: x["deviation"])
@property
def best_variant(self) -> ArtworkVariant:
if len(self._variant_mapping.keys()) <= 0:
return None
return min(self._variant_mapping.values(), key=lambda x: x["deviation"])
if self.flat_empty:
return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks])
return self._get_best_from_list(self._variant_mapping.values())
def get_variant_name(self, variant: ArtworkVariant) -> str:
return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}"
def __merge__(self, other: Artwork, **kwargs) -> None:
self.parent_artworks.update(other.parent_artworks)
for key, value in other._variant_mapping.items():
if key not in self._variant_mapping:
self._variant_mapping[key] = value
def __hash__(self) -> int:
return id(self)
def __eq__(self, other: Artwork) -> bool:
if hash(self) == hash(other):
return True
if not isinstance(other, Artwork):
return False
return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys()))

View File

@@ -1,16 +1,44 @@
from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set
import copy
from collections import defaultdict
from dataclasses import dataclass
from typing import (Any, Callable, Dict, Generator, Generic, Iterable,
Iterator, List, Optional, Set, Tuple, TypeVar, Union)
from .parents import OuterProxy
from ..utils import object_trace
from ..utils import output, BColors
from ..utils import BColors, object_trace, output
from .parents import InnerData, OuterProxy
T = TypeVar('T', bound=OuterProxy)
@dataclass
class AppendHookArguments:
"""
This class is used to store the arguments for the append hook.
The best explanation is with an examples:
```
# this is the action that triggers the append hook
album = Album()
song = Song()
album.song_collection.append(song)
```
In this case, the append hook is triggered with the following arguments:
```
AppendHookArguments(
collection=album.song_collection,
new_object=song,
collection_root_objects=[album]
)
```
"""
collection: Collection
new_object: T
collection_root_objects: Set[InnerData]
class Collection(Generic[T]):
__is_collection__ = True
@@ -27,6 +55,7 @@ class Collection(Generic[T]):
sync_on_append: Dict[str, Collection] = None,
append_object_to_attribute: Dict[str, T] = None,
extend_object_to_attribute: Dict[str, Collection] = None,
append_callbacks: List[Callable[[AppendHookArguments], None]] = None,
) -> None:
self._collection_for: dict = dict()
@@ -41,6 +70,7 @@ class Collection(Generic[T]):
self.sync_on_append: Dict[str, Collection] = sync_on_append or {}
self.pull_from: List[Collection] = []
self.push_to: List[Collection] = []
self.append_callbacks: List[Callable[[AppendHookArguments], None]] = append_callbacks or []
# This is to cleanly unmap previously mapped items by their id
self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict)
@@ -141,6 +171,14 @@ class Collection(Generic[T]):
for attribute, new_object in self.append_object_to_attribute.items():
other.__getattribute__(attribute).append(new_object, **kwargs)
append_hook_args = AppendHookArguments(
collection=self,
new_object=other,
collection_root_objects=self._collection_for.keys(),
)
for callback in self.append_callbacks:
callback(append_hook_args)
def append(self, other: Optional[T], **kwargs):
"""
If an object, that represents the same entity exists in a relevant collection,

View File

@@ -477,6 +477,8 @@ class Artist(Base):
general_genre: str
unformatted_location: str
artwork: List[Artwork]
source_collection: SourceCollection
contact_collection: Collection[Contact]
@@ -493,6 +495,8 @@ class Artist(Base):
"lyrical_themes": list,
"general_genre": lambda: "",
"artwork": list,
"source_collection": SourceCollection,
"album_collection": Collection,
"contact_collection": Collection,
@@ -511,6 +515,7 @@ class Artist(Base):
notes: FormattedText = None,
lyrical_themes: List[str] = None,
general_genre: str = None,
artwork: List[Artwork] = None,
unformatted_location: str = None,
source_list: List[Source] = None,
contact_list: List[Contact] = None,

View File

@@ -1,6 +1,5 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify
from .musicbrainz import Musicbrainz
from .youtube import YouTube
from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp

View File

@@ -1,145 +0,0 @@
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Type, Union, Generator, Dict, Any
from urllib.parse import urlparse
import pycountry
import musicbrainzngs
from bs4 import BeautifulSoup
from ..connection import Connection
from .abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import (
Artist,
Source,
Song,
Album,
ID3Timestamp,
FormattedText,
Label,
Target,
DatabaseObject,
Lyrics,
Artwork
)
from ..utils.config import logging_settings, main_settings
from ..utils import string_processing, shared
from ..utils.string_processing import clean_song_title
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
class Musicbrainz(Page):
SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ
HOST = "https://musicbrainz.org"
def __init__(self, *args, **kwargs):
musicbrainzngs.set_useragent("mk", "1")
super().__init__(*args, **kwargs)
def general_search(self, search_query: str) -> List[DatabaseObject]:
search_results = []
#Artist
search_results += self.artist_search(search_query).copy()
#Album
search_results += self.album_search(search_query).copy()
#Song
search_results += self.song_search(search_query).copy()
return search_results
def artist_search(self, search_query: str) -> List[Artist]:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list
))
return artist_list
def song_search(self, search_query: str) -> List[Song]:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))
return song_list
def album_search(self, search_query: str) -> List[Album]:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
return album_list
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list,
))
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))

View File

@@ -457,17 +457,17 @@ class Musify(Page):
for album_info in soup.find_all("ul", {"class": "album-info"}):
list_element: BeautifulSoup = album_info.find("li")
if list_element is not None:
artist_soup: BeautifulSoup
for artist_soup in list_element.find_all("a"):
artist_source_list = []
href = artist_soup["href"]
if href is not None:
artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)]
artist_list.append(Artist(
name=artist_soup.text.strip(),
source_list=artist_source_list
))
if list_element is not None:
artist_soup: BeautifulSoup
for artist_soup in list_element.find_all("a"):
artist_source_list = []
href = artist_soup["href"]
if href is not None:
artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)]
artist_list.append(Artist(
name=artist_soup.text.strip(),
source_list=artist_source_list
))
# breadcrums
breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"})
@@ -485,7 +485,7 @@ class Musify(Page):
track_name = list_points[4].text.strip()
# artwork
# album artwork
artwork: Artwork = Artwork()
album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"})
for album_image_element in album_image_element_list:
@@ -918,7 +918,8 @@ class Musify(Page):
name=name,
country=country,
source_list=source_list,
notes=notes
notes=notes,
artwork=self._fetch_artist_artwork(soup, **kwargs)
)
def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album:
@@ -1056,6 +1057,20 @@ class Musify(Page):
artist.album_collection.append(album)
def _fetch_artist_artwork(self, soup: BeautifulSoup, **kwargs):
# artist artwork
artist_artwork: List[Artwork] = Artwork()
artist_a_element_list: List[BeautifulSoup] = soup.find_all("a")
for artist_a_element in artist_a_element_list:
if artist_a_element.find_all("img", {"class": "artist-img"}).count() > 0:
artwork_gallery = self.connection.get(artist_a_element("data-src", artist_a_element.get("href")))
if artwork_gallery is not None:
gallery_image_element_list: List[BeautifulSoup] = artwork_gallery.find_all("img", {"class": "artist-img"})
for gallery_image_element in gallery_image_element_list:
artist_artwork.push(Artwork(url=gallery_image_element.get("data-src", gallery_image_element.get("src"))))
return artist_artwork
def fetch_artist(self, source: Source, **kwargs) -> Artist:
"""
TODO
@@ -1068,7 +1083,7 @@ class Musify(Page):
artist = self._fetch_initial_artist(url, source=source, **kwargs)
self._fetch_artist_discography(artist, url, artist.name, **kwargs)
self._fetch_artist_artwork(artist, **kwargs)
return artist
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:

View File

@@ -59,11 +59,6 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm
description="The logger for the musify scraper.",
default_value="musify"
),
LoggerAttribute(
name="musicbrainz_logger",
description="The logger for the musicbrainz scraper.",
default_value="musicbrainz"
),
LoggerAttribute(
name="youtube_logger",
description="The logger for the youtube scraper.",