diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index b227f64..e396a32 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -169,6 +169,8 @@ class Song(Base): id3Mapping.GENRE: [self.genre], id3Mapping.TRACKNUMBER: [self.tracksort_str], id3Mapping.COMMENT: [self.note.markdown], + id3Mapping.FILE_WEBPAGE_URL: self.source_collection.url_list, + id3Mapping.SOURCE_WEBPAGE_URL: self.source_collection.homepage_list, }) # metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3 @@ -555,7 +557,8 @@ class Artist(Base): @property def metadata(self) -> Metadata: metadata = Metadata({ - id3Mapping.ARTIST: [self.name] + id3Mapping.ARTIST: [self.name], + id3Mapping.ARTIST_WEBPAGE_URL: self.source_collection.url_list, }) metadata.merge_many([s.get_artist_metadata() for s in self.source_collection]) diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index c122d11..710f7ee 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -7,10 +7,9 @@ from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property -from ..utils import generate_id from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings -from ..utils.string_processing import hash_url +from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata from .parents import OuterProxy @@ -20,12 +19,11 @@ from .collection import Collection @dataclass class Source: - url: str page_enum: SourcePages - referrer_page: SourcePages - audio_url: Optional[str] + url: str + referrer_page: SourcePages = None + audio_url: Optional[str] = None - id: int = field(default_factory=generate_id) additional_data: dict = field(default_factory=dict) def __post_init__(self): @@ -36,7 +34,7 @@ class Source: return urlparse(self.url) @classmethod - def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]: + def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]: """ this shouldn't be used, unlesse you are not certain what the source is for the reason is that it is more inefficient @@ -78,73 +76,78 @@ class Source: if url.startswith("https://myspace.com"): return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) - def get_song_metadata(self) -> Metadata: - return Metadata({ - Mapping.FILE_WEBPAGE_URL: [self.url], - Mapping.SOURCE_WEBPAGE_URL: [self.homepage] - }) - - def get_artist_metadata(self) -> Metadata: - return Metadata({ - Mapping.ARTIST_WEBPAGE_URL: [self.url] - }) - @property def hash_url(self) -> str: return hash_url(self.url) @property - def metadata(self) -> Metadata: - return self.get_song_metadata() - - @property - def indexing_values(self) -> List[Tuple[str, object]]: - return [ - ('id', self.id), - ('url', self.url), - ('audio_url', self.audio_url), - ] - - def __str__(self): - return self.__repr__() + def indexing_values(self) -> list: + r = [hash_url(self.url)] + if self.audio_url: + r.append(hash_url(self.audio_url)) + return r def __repr__(self) -> str: - return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})" + return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})" - @property - def title_string(self) -> str: - return self.url + def __merge__(self, other: Source, override: bool = False): + if self.audio_url is None: + self.audio_url = other.audio_url + self.additional_data.update(other.additional_data) page_str = property(fget=lambda self: self.page_enum.value) - type_str = property(fget=lambda self: self.type_enum.value) - homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum)) class SourceCollection: + _indexed_sources: Dict[str, Source] _page_to_source_list: Dict[SourcePages, List[Source]] - def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list = defaultdict(list) + self._indexed_sources = {} + + self.extend(data or []) def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: + if not len(source_pages): + source_pages = self.source_pages + for page in source_pages: yield from self._page_to_source_list[page] def append(self, source: Source): - pass + existing_source = None + for key in source.indexing_values: + if key in self._indexed_sources: + existing_source = self._indexed_sources[key] + break + + if existing_source is not None: + existing_source.__merge__(source) + source = existing_source + + for key in source.indexing_values: + self._indexed_sources[key] = source + self._page_to_source_list[source.page_enum].append(source) def extend(self, sources: Iterable[Source]): for source in sources: self.append(source) + + def __iter__(self): + yield from self.get_sources() + + def __merge__(self, other: SourceCollection, override: bool = False): + self.extend(other) @property - def source_pages(self) -> Set[SourcePages]: - return set(source.page_enum for source in self._data) + def source_pages(self) -> Iterable[SourcePages]: + return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) - def get_sources_from_page(self, source_page: SourcePages) -> List[Source]: - """ - getting the sources for a specific page like - YouTube or musify - """ - return self._page_to_source_list[source_page].copy() + @property + def url_list(self) -> List[str]: + return [source.url for source in self.get_sources(SourcePages.ARTIST)] + + @property + def homepage_list(self) -> List[str]: + return [source.homepage for source in self.source_pages] \ No newline at end of file diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 468067b..712c371 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -246,7 +246,7 @@ class Page: # only certain database objects, have a source list if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source - for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): + for source in music_object.source_collection.get_sources(self.SOURCE_TYPE): if music_object.already_fetched_from(source.hash_url): continue @@ -419,7 +419,7 @@ class Page: if song.target_collection.empty: song.target_collection.append(new_target) - sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE) + sources = song.source_collection.get_sources(self.SOURCE_TYPE) if len(sources) == 0: return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.") diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 0e99f75..4646385 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -693,7 +693,7 @@ class Musify(Page): if stop_at_level > 1: song: Song for song in album.song_collection: - sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE) + sources = song.source_collection.get_sources(self.SOURCE_TYPE) for source in sources: song.merge(self.fetch_song(source=source)) diff --git a/music_kraken/utils/enums/source.py b/music_kraken/utils/enums/source.py index a5e213e..be3171f 100644 --- a/music_kraken/utils/enums/source.py +++ b/music_kraken/utils/enums/source.py @@ -9,42 +9,32 @@ class SourceTypes(Enum): class SourcePages(Enum): - YOUTUBE = "youtube" - MUSIFY = "musify" - YOUTUBE_MUSIC = "youtube music" - GENIUS = "genius" - MUSICBRAINZ = "musicbrainz" + YOUTUBE = "youtube", "https://www.youtube.com/" + MUSIFY = "musify", "https://musify.club/" + YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/" + GENIUS = "genius", "https://genius.com/" + MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/" ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" - BANDCAMP = "bandcamp" - DEEZER = "deezer" - SPOTIFY = "spotify" + BANDCAMP = "bandcamp", "https://bandcamp.com/" + DEEZER = "deezer", "https://www.deezer.com/" + SPOTIFY = "spotify", "https://open.spotify.com/" # This has nothing to do with audio, but bands can be here - WIKIPEDIA = "wikipedia" - INSTAGRAM = "instagram" - FACEBOOK = "facebook" - TWITTER = "twitter" # I will use nitter though lol - MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE + WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page" + INSTAGRAM = "instagram", "https://www.instagram.com/" + FACEBOOK = "facebook", "https://www.facebook.com/" + TWITTER = "twitter", "https://twitter.com/" + MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE - MANUAL = "manual" + MANUAL = "manual", "" - PRESET = "preset" + PRESET = "preset", "" - @classmethod - def get_homepage(cls, attribute) -> str: - homepage_map = { - cls.YOUTUBE: "https://www.youtube.com/", - cls.MUSIFY: "https://musify.club/", - cls.MUSICBRAINZ: "https://musicbrainz.org/", - cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/", - cls.GENIUS: "https://genius.com/", - cls.BANDCAMP: "https://bandcamp.com/", - cls.DEEZER: "https://www.deezer.com/", - cls.INSTAGRAM: "https://www.instagram.com/", - cls.FACEBOOK: "https://www.facebook.com/", - cls.SPOTIFY: "https://open.spotify.com/", - cls.TWITTER: "https://twitter.com/", - cls.MYSPACE: "https://myspace.com/", - cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page" - } - return homepage_map[attribute] \ No newline at end of file + def __new__(cls, value, homepage = None): + member = object.__new__(cls) + + member._value_ = value + member.homepage = homepage + + return member + \ No newline at end of file diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index d9d4c70..c81c71f 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -134,6 +134,7 @@ def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str: to_unify = to_unify.replace(char, unify_to) return to_unify +@lru_cache(maxsize=128) def hash_url(url: Union[str, ParseResult]) -> str: if isinstance(url, str): url = urlparse(url)