From 10f6153199f555beff9c9a2b04f39eb05665bc9a Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Fri, 26 May 2023 11:41:20 +0200 Subject: [PATCH] added wrapper methods, for the basic fuctionalities of the webscraper --- src/music_kraken/download/page_attributes.py | 56 ++++++++++++++++---- src/music_kraken/objects/source.py | 6 ++- src/music_kraken/pages/__init__.py | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 4ba72b5..a4630a8 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -1,8 +1,10 @@ from typing import Tuple, Type, Dict, List, Set +from .results import SearchResults +from ..objects import DatabaseObject from ..utils.enums.source import SourcePages -from ..utils.support_classes import Query, EndThread -from ..pages import Page, EncyclopaediaMetallum, Musify +from ..utils.support_classes import Query, DownloadResult +from ..pages import Page, EncyclopaediaMetallum, Musify, INDEPENDENT_DB_OBJECTS ALL_PAGES: Set[Type[Page]] = { EncyclopaediaMetallum, @@ -23,6 +25,7 @@ class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: # initialize all page instances self._page_instances: Dict[Type[Page], Page] = dict() + self._source_to_page: Dict[SourcePages, Type[Page]] = dict() exclude_pages = exclude_pages if exclude_pages is not None else set() @@ -35,15 +38,50 @@ class Pages: def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]: return tuple(sorted(page_set, key=lambda page: page.__name__)) - self.pages: Tuple[Type[Page], ...] = _set_to_tuple(ALL_PAGES.difference(exclude_pages)) - self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(set(self.pages).intersection(AUDIO_PAGES)) + self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages) + self.pages: Tuple[Type[Page], ...] = _set_to_tuple(ALL_PAGES.difference(self.pages)) + + self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES) + self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) - for page_type in ALL_PAGES: - self._page_instances[page_type] = page_type() - - def search(self, query: Query): for page_type in self.pages: - self._page_instances[page_type].search(query=query) + self._page_instances[page_type] = page_type() + self._source_to_page[page_type.SOURCE_TYPE] = page_type + + def search(self, query: Query) -> SearchResults: + result = SearchResults() + + for page_type in self.pages: + result.add( + page=page_type, + search_result=self._page_instances[page_type].search(query=query) + ) + + return result + + def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: + if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): + return music_object + + for source_page in music_object.source_collection.source_pages: + page_type = self._source_to_page[source_page] + + if page_type in self._pages_set: + music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level)) + + return music_object + + def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False) -> DownloadResult: + if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): + return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") + + _page_types = set(self._source_to_page[src] for src in music_object.source_collection.source_pages) + audio_pages = self._audio_pages_set.intersection(_page_types) + + for download_page in audio_pages: + return self._page_instances[download_page].download(genre=genre, download_all=download_all) + + return DownloadResult(error_message=f"No audio source has been found for {music_object}.") """ diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 2d37f59..1680821 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -1,6 +1,6 @@ from collections import defaultdict from enum import Enum -from typing import List, Dict, Tuple, Optional +from typing import List, Dict, Set, Tuple, Optional from urllib.parse import urlparse from ..utils.enums.source import SourcePages, SourceTypes @@ -128,6 +128,10 @@ class SourceCollection(Collection): super().map_element(source) self._page_to_source_list[source.page_enum].append(source) + + @property + def source_pages(self) -> Set[SourcePages]: + return set(source.page_enum for source in self._data) def get_sources_from_page(self, source_page: SourcePages) -> List[Source]: """ diff --git a/src/music_kraken/pages/__init__.py b/src/music_kraken/pages/__init__.py index 0423f94..a7e2a61 100644 --- a/src/music_kraken/pages/__init__.py +++ b/src/music_kraken/pages/__init__.py @@ -1,3 +1,3 @@ from .encyclopaedia_metallum import EncyclopaediaMetallum from .musify import Musify -from .abstract import Page +from .abstract import Page, INDEPENDENT_DB_OBJECTS