added wrapper methods, for the basic fuctionalities of the webscraper

This commit is contained in:
Hellow2 2023-05-26 11:41:20 +02:00
parent 2fabd09a71
commit 10f6153199
3 changed files with 53 additions and 11 deletions

View File

@ -1,8 +1,10 @@
from typing import Tuple, Type, Dict, List, Set from typing import Tuple, Type, Dict, List, Set
from .results import SearchResults
from ..objects import DatabaseObject
from ..utils.enums.source import SourcePages from ..utils.enums.source import SourcePages
from ..utils.support_classes import Query, EndThread from ..utils.support_classes import Query, DownloadResult
from ..pages import Page, EncyclopaediaMetallum, Musify from ..pages import Page, EncyclopaediaMetallum, Musify, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = { ALL_PAGES: Set[Type[Page]] = {
EncyclopaediaMetallum, EncyclopaediaMetallum,
@ -23,6 +25,7 @@ class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None:
# initialize all page instances # initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict() self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set() exclude_pages = exclude_pages if exclude_pages is not None else set()
@ -35,15 +38,50 @@ class Pages:
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]: def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
return tuple(sorted(page_set, key=lambda page: page.__name__)) return tuple(sorted(page_set, key=lambda page: page.__name__))
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(ALL_PAGES.difference(exclude_pages)) self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(set(self.pages).intersection(AUDIO_PAGES)) self.pages: Tuple[Type[Page], ...] = _set_to_tuple(ALL_PAGES.difference(self.pages))
for page_type in ALL_PAGES: self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self._page_instances[page_type] = page_type() self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
def search(self, query: Query):
for page_type in self.pages: for page_type in self.pages:
self._page_instances[page_type].search(query=query) self._page_instances[page_type] = page_type()
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def search(self, query: Query) -> SearchResults:
result = SearchResults()
for page_type in self.pages:
result.add(
page=page_type,
search_result=self._page_instances[page_type].search(query=query)
)
return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return music_object
for source_page in music_object.source_collection.source_pages:
page_type = self._source_to_page[source_page]
if page_type in self._pages_set:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
return music_object
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
_page_types = set(self._source_to_page[src] for src in music_object.source_collection.source_pages)
audio_pages = self._audio_pages_set.intersection(_page_types)
for download_page in audio_pages:
return self._page_instances[download_page].download(genre=genre, download_all=download_all)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
""" """

View File

@ -1,6 +1,6 @@
from collections import defaultdict from collections import defaultdict
from enum import Enum from enum import Enum
from typing import List, Dict, Tuple, Optional from typing import List, Dict, Set, Tuple, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
from ..utils.enums.source import SourcePages, SourceTypes from ..utils.enums.source import SourcePages, SourceTypes
@ -129,6 +129,10 @@ class SourceCollection(Collection):
self._page_to_source_list[source.page_enum].append(source) self._page_to_source_list[source.page_enum].append(source)
@property
def source_pages(self) -> Set[SourcePages]:
return set(source.page_enum for source in self._data)
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]: def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
""" """
getting the sources for a specific page like getting the sources for a specific page like

View File

@ -1,3 +1,3 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify from .musify import Musify
from .abstract import Page from .abstract import Page, INDEPENDENT_DB_OBJECTS