From 0343c11a626696ac24b41af8b63dd2943759b195 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 13 May 2024 18:03:20 +0200 Subject: [PATCH] feat: migrated fetch details and from source --- music_kraken/download/page_attributes.py | 57 ++++++++++---- music_kraken/objects/__init__.py | 2 +- music_kraken/objects/parents.py | 4 +- music_kraken/pages/abstract.py | 98 +----------------------- 4 files changed, 45 insertions(+), 116 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index f0b678c..f37517e 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -1,7 +1,7 @@ -from typing import Tuple, Type, Dict, Set +from typing import Tuple, Type, Dict, Set, Optional from .results import SearchResults -from ..objects import DatabaseObject, Source +from ..objects import DatabaseObject as DataObject, Source from ..utils.config import youtube_settings from ..utils.enums.source import SourcePages @@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = { Musify, } +fetch_map = { + Song: "fetch_song", + Album: "fetch_album", + Artist: "fetch_artist", + Label: "fetch_label", +} + if DEBUG_PAGES: DEBUGGING_PAGE = Bandcamp print(f"Only downloading from page {DEBUGGING_PAGE}.") @@ -68,7 +75,12 @@ class Pages: for page_type in self.pages: self._page_instances[page_type] = page_type() self._source_to_page[page_type.SOURCE_TYPE] = page_type - + + def _get_page_from_enum(self, source_page: SourcePages) -> Page: + if source_page not in self._source_to_page: + return None + return self._page_instances[self._source_to_page[source_page]] + def search(self, query: Query) -> SearchResults: result = SearchResults() @@ -80,22 +92,33 @@ class Pages: return result - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: - if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): - return music_object + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject: + if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): + return data_object - for source_page in music_object.source_collection.source_pages: - if source_page not in self._source_to_page: - continue + source: Source + for source in data_object.source_collection.get_sources(): + new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) + if new_data_object is not None: + data_object.merge(new_data_object) - page_type = self._source_to_page[source_page] - - if page_type in self._pages_set: - music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level)) + return data_object + + def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: + page: Page = self._get_page_from_enum(source.page_enum) + if page is None: + return None - return music_object + source_type = page.get_source_type(source) + if not hasattr(page, fetch_map[source_type]): + return None - def is_downloadable(self, music_object: DatabaseObject) -> bool: + func = getattr(page, fetch_map[source_type])(source=source, **kwargs) + data_object: DataObject = func(source=source) + data_object.mark_as_fetched(source.hash_url) + return data_object + + def is_downloadable(self, music_object: DataObject) -> bool: _page_types = set(self._source_to_page) for src in music_object.source_collection.source_pages: if src in self._source_to_page: @@ -104,7 +127,7 @@ class Pages: audio_pages = self._audio_pages_set.intersection(_page_types) return len(audio_pages) > 0 - def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: + def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") @@ -122,7 +145,7 @@ class Pages: return DownloadResult(error_message=f"No audio source has been found for {music_object}.") - def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]: + def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: source = Source.match_url(url, SourcePages.MANUAL) if source is None: diff --git a/music_kraken/objects/__init__.py b/music_kraken/objects/__init__.py index da5b9aa..0504353 100644 --- a/music_kraken/objects/__init__.py +++ b/music_kraken/objects/__init__.py @@ -24,4 +24,4 @@ from .parents import OuterProxy from .artwork import Artwork -DatabaseObject = TypeVar('T', bound=OuterProxy) +DatabaseObject = OuterProxy diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 3ef6ab3..0b8504e 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -99,7 +99,9 @@ class OuterProxy: Wraps the inner data, and provides apis, to naturally access those values. """ - _default_factories: dict = {} + source_collection: SourceCollection + + _default_factories: dict = {"source_collection": SourceCollection} _outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"} DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index a202b32..965f836 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -189,103 +189,7 @@ class Page: def song_search(self, song: Song) -> List[Song]: return [] - def fetch_details( - self, - music_object: DatabaseObject, - stop_at_level: int = 1, - ) -> DatabaseObject: - """ - when a music object with lacking data is passed in, it returns - the SAME object **(no copy)** with more detailed data. - If you for example put in, an album, it fetches the tracklist - - :param music_object: - :param stop_at_level: - This says the depth of the level the scraper will recurse to. - If this is for example set to 2, then the levels could be: - 1. Level: the album - 2. Level: every song of the album + every artist of the album - If no additional requests are needed to get the data one level below the supposed stop level - this gets ignored - :return detailed_music_object: IT MODIFIES THE INPUT OBJ - """ - # creating a new object, of the same type - new_music_object: Optional[DatabaseObject] = None - fetched_from_url: List[str] = [] - - # only certain database objects, have a source list - if isinstance(music_object, INDEPENDENT_DB_OBJECTS): - source: Source - for source in music_object.source_collection.get_sources(self.SOURCE_TYPE): - if music_object.already_fetched_from(source.hash_url): - continue - - tmp = self.fetch_object_from_source( - source=source, - enforce_type=type(music_object), - stop_at_level=stop_at_level, - type_string=type(music_object).__name__, - entity_string=music_object.option_string, - ) - - if new_music_object is None: - new_music_object = tmp - else: - new_music_object.merge(tmp) - fetched_from_url.append(source.hash_url) - - if new_music_object is not None: - music_object.merge(new_music_object) - - music_object.mark_as_fetched(*fetched_from_url) - return music_object - - def fetch_object_from_source( - self, - source: Source, - stop_at_level: int = 2, - enforce_type: Type[DatabaseObject] = None, - type_string: str = "", - entity_string: str = "", - ) -> Optional[DatabaseObject]: - - obj_type = self.get_source_type(source) - - if obj_type is None: - return None - - if enforce_type != obj_type and enforce_type is not None: - self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") - return None - - music_object: DatabaseObject = None - - fetch_map = { - Song: self.fetch_song, - Album: self.fetch_album, - Artist: self.fetch_artist, - Label: self.fetch_label - } - - if obj_type in fetch_map: - music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level) - else: - self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") - return None - - if stop_at_level > 0: - trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]") - - collection: Collection - for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - collection = music_object.__getattribute__(collection_str) - - for sub_element in collection: - sub_element.merge( - self.fetch_details(sub_element, stop_at_level=stop_at_level - 1)) - - return music_object - + # to fetch stuff def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: return Song()