From 40e9366a0bd8dfcabb188c19c247af0cf4a66d7e Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 23 May 2024 14:32:31 +0200 Subject: [PATCH] feat: implemented the new page mechanics in the downloader --- music_kraken/download/page_attributes.py | 80 +++--------------------- 1 file changed, 9 insertions(+), 71 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 1db24be..8be35b8 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -30,31 +30,9 @@ from ..utils.exception import MKMissingNameException from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES -from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS +from ..pages import scan_for_pages, get_pages -ALL_PAGES: Set[Type[Page]] = { - # EncyclopaediaMetallum, - Genius, - Musify, - YoutubeMusic, - Bandcamp -} - -if youtube_settings["use_youtube_alongside_youtube_music"]: - ALL_PAGES.add(YouTube) - -AUDIO_PAGES: Set[Type[Page]] = { - Musify, - YouTube, - YoutubeMusic, - Bandcamp -} - -SHADY_PAGES: Set[Type[Page]] = { - Musify, -} - fetch_map = { Song: "fetch_song", Album: "fetch_album", @@ -62,66 +40,28 @@ fetch_map = { Label: "fetch_label", } -if DEBUG_PAGES: - DEBUGGING_PAGE = Bandcamp - print(f"Only downloading from page {DEBUGGING_PAGE}.") - - ALL_PAGES = {DEBUGGING_PAGE} - AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES) - class Pages: - def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs): self.LOGGER = logging.getLogger("download") self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() - # initialize all page instances - self._page_instances: Dict[Type[Page], Page] = dict() - self._source_to_page: Dict[SourceType, Type[Page]] = dict() - - exclude_pages = exclude_pages if exclude_pages is not None else set() - - if exclude_shady: - exclude_pages = exclude_pages.union(SHADY_PAGES) - - if not exclude_pages.issubset(ALL_PAGES): - raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}") - - def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]: - return tuple(sorted(page_set, key=lambda page: page.__name__)) - - self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages) - self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set) - - self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES) - self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) - - for page_type in self.pages: - self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options) - self._source_to_page[page_type.SOURCE_TYPE] = page_type - - def _get_page_from_enum(self, source_page: SourceType) -> Page: - if source_page not in self._source_to_page: - return None - return self._page_instances[self._source_to_page[source_page]] + scan_for_pages() def search(self, query: Query) -> SearchResults: result = SearchResults() - for page_type in self.pages: + for page in get_pages(): result.add( - page=page_type, - search_result=self._page_instances[page_type].search(query=query) + page=type(page), + search_result=page.search(query=query) ) return result def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: - if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): - return data_object - source: Source for source in data_object.source_collection.get_sources(source_type_sorting={ "only_with_page": True, @@ -317,12 +257,10 @@ class Pages: tmp.delete() return r - def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: + def fetch_url(self, url: str, **kwargs) -> DataObject: source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) - if source is None: + if source is None or source.page is None: raise UrlNotFoundException(url=url) - _actual_page = self._source_to_page[source.source_type] - - return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) \ No newline at end of file + return source.page.fetch_object_from_source(source=source, **kwargs) \ No newline at end of file