added wrapper methods, for the basic fuctionalities of the webscraper
This commit is contained in:
parent
2fabd09a71
commit
10f6153199
@ -1,8 +1,10 @@
|
|||||||
from typing import Tuple, Type, Dict, List, Set
|
from typing import Tuple, Type, Dict, List, Set
|
||||||
|
|
||||||
|
from .results import SearchResults
|
||||||
|
from ..objects import DatabaseObject
|
||||||
from ..utils.enums.source import SourcePages
|
from ..utils.enums.source import SourcePages
|
||||||
from ..utils.support_classes import Query, EndThread
|
from ..utils.support_classes import Query, DownloadResult
|
||||||
from ..pages import Page, EncyclopaediaMetallum, Musify
|
from ..pages import Page, EncyclopaediaMetallum, Musify, INDEPENDENT_DB_OBJECTS
|
||||||
|
|
||||||
ALL_PAGES: Set[Type[Page]] = {
|
ALL_PAGES: Set[Type[Page]] = {
|
||||||
EncyclopaediaMetallum,
|
EncyclopaediaMetallum,
|
||||||
@ -23,6 +25,7 @@ class Pages:
|
|||||||
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None:
|
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None:
|
||||||
# initialize all page instances
|
# initialize all page instances
|
||||||
self._page_instances: Dict[Type[Page], Page] = dict()
|
self._page_instances: Dict[Type[Page], Page] = dict()
|
||||||
|
self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
|
||||||
|
|
||||||
exclude_pages = exclude_pages if exclude_pages is not None else set()
|
exclude_pages = exclude_pages if exclude_pages is not None else set()
|
||||||
|
|
||||||
@ -35,15 +38,50 @@ class Pages:
|
|||||||
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
|
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
|
||||||
return tuple(sorted(page_set, key=lambda page: page.__name__))
|
return tuple(sorted(page_set, key=lambda page: page.__name__))
|
||||||
|
|
||||||
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(ALL_PAGES.difference(exclude_pages))
|
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
|
||||||
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(set(self.pages).intersection(AUDIO_PAGES))
|
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(ALL_PAGES.difference(self.pages))
|
||||||
|
|
||||||
for page_type in ALL_PAGES:
|
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
|
||||||
self._page_instances[page_type] = page_type()
|
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
|
||||||
|
|
||||||
def search(self, query: Query):
|
|
||||||
for page_type in self.pages:
|
for page_type in self.pages:
|
||||||
self._page_instances[page_type].search(query=query)
|
self._page_instances[page_type] = page_type()
|
||||||
|
self._source_to_page[page_type.SOURCE_TYPE] = page_type
|
||||||
|
|
||||||
|
def search(self, query: Query) -> SearchResults:
|
||||||
|
result = SearchResults()
|
||||||
|
|
||||||
|
for page_type in self.pages:
|
||||||
|
result.add(
|
||||||
|
page=page_type,
|
||||||
|
search_result=self._page_instances[page_type].search(query=query)
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
|
||||||
|
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||||
|
return music_object
|
||||||
|
|
||||||
|
for source_page in music_object.source_collection.source_pages:
|
||||||
|
page_type = self._source_to_page[source_page]
|
||||||
|
|
||||||
|
if page_type in self._pages_set:
|
||||||
|
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
|
||||||
|
|
||||||
|
return music_object
|
||||||
|
|
||||||
|
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False) -> DownloadResult:
|
||||||
|
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||||
|
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
|
||||||
|
|
||||||
|
_page_types = set(self._source_to_page[src] for src in music_object.source_collection.source_pages)
|
||||||
|
audio_pages = self._audio_pages_set.intersection(_page_types)
|
||||||
|
|
||||||
|
for download_page in audio_pages:
|
||||||
|
return self._page_instances[download_page].download(genre=genre, download_all=download_all)
|
||||||
|
|
||||||
|
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Dict, Tuple, Optional
|
from typing import List, Dict, Set, Tuple, Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from ..utils.enums.source import SourcePages, SourceTypes
|
from ..utils.enums.source import SourcePages, SourceTypes
|
||||||
@ -129,6 +129,10 @@ class SourceCollection(Collection):
|
|||||||
|
|
||||||
self._page_to_source_list[source.page_enum].append(source)
|
self._page_to_source_list[source.page_enum].append(source)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source_pages(self) -> Set[SourcePages]:
|
||||||
|
return set(source.page_enum for source in self._data)
|
||||||
|
|
||||||
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
|
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
|
||||||
"""
|
"""
|
||||||
getting the sources for a specific page like
|
getting the sources for a specific page like
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from .encyclopaedia_metallum import EncyclopaediaMetallum
|
from .encyclopaedia_metallum import EncyclopaediaMetallum
|
||||||
from .musify import Musify
|
from .musify import Musify
|
||||||
from .abstract import Page
|
from .abstract import Page, INDEPENDENT_DB_OBJECTS
|
||||||
|
Loading…
Reference in New Issue
Block a user