feat: migrated fetch details and from source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-05-13 18:03:20 +02:00
parent 9769cf4033
commit 0343c11a62
4 changed files with 45 additions and 116 deletions

View File

@ -1,7 +1,7 @@
from typing import Tuple, Type, Dict, Set from typing import Tuple, Type, Dict, Set, Optional
from .results import SearchResults from .results import SearchResults
from ..objects import DatabaseObject, Source from ..objects import DatabaseObject as DataObject, Source
from ..utils.config import youtube_settings from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages from ..utils.enums.source import SourcePages
@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = {
Musify, Musify,
} }
fetch_map = {
Song: "fetch_song",
Album: "fetch_album",
Artist: "fetch_artist",
Label: "fetch_label",
}
if DEBUG_PAGES: if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.") print(f"Only downloading from page {DEBUGGING_PAGE}.")
@ -68,7 +75,12 @@ class Pages:
for page_type in self.pages: for page_type in self.pages:
self._page_instances[page_type] = page_type() self._page_instances[page_type] = page_type()
self._source_to_page[page_type.SOURCE_TYPE] = page_type self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourcePages) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
def search(self, query: Query) -> SearchResults: def search(self, query: Query) -> SearchResults:
result = SearchResults() result = SearchResults()
@ -80,22 +92,33 @@ class Pages:
return result return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return music_object return data_object
for source_page in music_object.source_collection.source_pages: source: Source
if source_page not in self._source_to_page: for source in data_object.source_collection.get_sources():
continue new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
if new_data_object is not None:
data_object.merge(new_data_object)
page_type = self._source_to_page[source_page] return data_object
if page_type in self._pages_set: def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level)) page: Page = self._get_page_from_enum(source.page_enum)
if page is None:
return None
return music_object source_type = page.get_source_type(source)
if not hasattr(page, fetch_map[source_type]):
return None
def is_downloadable(self, music_object: DatabaseObject) -> bool: func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
data_object: DataObject = func(source=source)
data_object.mark_as_fetched(source.hash_url)
return data_object
def is_downloadable(self, music_object: DataObject) -> bool:
_page_types = set(self._source_to_page) _page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages: for src in music_object.source_collection.source_pages:
if src in self._source_to_page: if src in self._source_to_page:
@ -104,7 +127,7 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types) audio_pages = self._audio_pages_set.intersection(_page_types)
return len(audio_pages) > 0 return len(audio_pages) > 0
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
@ -122,7 +145,7 @@ class Pages:
return DownloadResult(error_message=f"No audio source has been found for {music_object}.") return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]: def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
source = Source.match_url(url, SourcePages.MANUAL) source = Source.match_url(url, SourcePages.MANUAL)
if source is None: if source is None:

View File

@ -24,4 +24,4 @@ from .parents import OuterProxy
from .artwork import Artwork from .artwork import Artwork
DatabaseObject = TypeVar('T', bound=OuterProxy) DatabaseObject = OuterProxy

View File

@ -99,7 +99,9 @@ class OuterProxy:
Wraps the inner data, and provides apis, to naturally access those values. Wraps the inner data, and provides apis, to naturally access those values.
""" """
_default_factories: dict = {} source_collection: SourceCollection
_default_factories: dict = {"source_collection": SourceCollection}
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"} _outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()

View File

@ -189,103 +189,7 @@ class Page:
def song_search(self, song: Song) -> List[Song]: def song_search(self, song: Song) -> List[Song]:
return [] return []
def fetch_details( # to fetch stuff
self,
music_object: DatabaseObject,
stop_at_level: int = 1,
) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in, an album, it fetches the tracklist
:param music_object:
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
# creating a new object, of the same type
new_music_object: Optional[DatabaseObject] = None
fetched_from_url: List[str] = []
# only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
if music_object.already_fetched_from(source.hash_url):
continue
tmp = self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
type_string=type(music_object).__name__,
entity_string=music_object.option_string,
)
if new_music_object is None:
new_music_object = tmp
else:
new_music_object.merge(tmp)
fetched_from_url.append(source.hash_url)
if new_music_object is not None:
music_object.merge(new_music_object)
music_object.mark_as_fetched(*fetched_from_url)
return music_object
def fetch_object_from_source(
self,
source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
type_string: str = "",
entity_string: str = "",
) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source)
if obj_type is None:
return None
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
if stop_at_level > 0:
trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
collection: Collection
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection = music_object.__getattribute__(collection_str)
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
return music_object
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song() return Song()