feat: migrated fetch details and from source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-05-13 18:03:20 +02:00
parent 9769cf4033
commit 0343c11a62
4 changed files with 45 additions and 116 deletions

View File

@ -1,7 +1,7 @@
from typing import Tuple, Type, Dict, Set
from typing import Tuple, Type, Dict, Set, Optional
from .results import SearchResults
from ..objects import DatabaseObject, Source
from ..objects import DatabaseObject as DataObject, Source
from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages
@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
fetch_map = {
Song: "fetch_song",
Album: "fetch_album",
Artist: "fetch_artist",
Label: "fetch_label",
}
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
@ -69,6 +76,11 @@ class Pages:
self._page_instances[page_type] = page_type()
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourcePages) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
def search(self, query: Query) -> SearchResults:
result = SearchResults()
@ -80,22 +92,33 @@ class Pages:
return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return music_object
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object
for source_page in music_object.source_collection.source_pages:
if source_page not in self._source_to_page:
continue
source: Source
for source in data_object.source_collection.get_sources():
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
if new_data_object is not None:
data_object.merge(new_data_object)
page_type = self._source_to_page[source_page]
return data_object
if page_type in self._pages_set:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
page: Page = self._get_page_from_enum(source.page_enum)
if page is None:
return None
return music_object
source_type = page.get_source_type(source)
if not hasattr(page, fetch_map[source_type]):
return None
def is_downloadable(self, music_object: DatabaseObject) -> bool:
func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
data_object: DataObject = func(source=source)
data_object.mark_as_fetched(source.hash_url)
return data_object
def is_downloadable(self, music_object: DataObject) -> bool:
_page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages:
if src in self._source_to_page:
@ -104,7 +127,7 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types)
return len(audio_pages) > 0
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
@ -122,7 +145,7 @@ class Pages:
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
source = Source.match_url(url, SourcePages.MANUAL)
if source is None:

View File

@ -24,4 +24,4 @@ from .parents import OuterProxy
from .artwork import Artwork
DatabaseObject = TypeVar('T', bound=OuterProxy)
DatabaseObject = OuterProxy

View File

@ -99,7 +99,9 @@ class OuterProxy:
Wraps the inner data, and provides apis, to naturally access those values.
"""
_default_factories: dict = {}
source_collection: SourceCollection
_default_factories: dict = {"source_collection": SourceCollection}
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()

View File

@ -189,103 +189,7 @@ class Page:
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_details(
self,
music_object: DatabaseObject,
stop_at_level: int = 1,
) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in, an album, it fetches the tracklist
:param music_object:
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
# creating a new object, of the same type
new_music_object: Optional[DatabaseObject] = None
fetched_from_url: List[str] = []
# only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
if music_object.already_fetched_from(source.hash_url):
continue
tmp = self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
type_string=type(music_object).__name__,
entity_string=music_object.option_string,
)
if new_music_object is None:
new_music_object = tmp
else:
new_music_object.merge(tmp)
fetched_from_url.append(source.hash_url)
if new_music_object is not None:
music_object.merge(new_music_object)
music_object.mark_as_fetched(*fetched_from_url)
return music_object
def fetch_object_from_source(
self,
source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
type_string: str = "",
entity_string: str = "",
) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source)
if obj_type is None:
return None
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
if stop_at_level > 0:
trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
collection: Collection
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection = music_object.__getattribute__(collection_str)
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
return music_object
# to fetch stuff
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()