feat: migrated fetch details and from source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
9769cf4033
commit
0343c11a62
@ -1,7 +1,7 @@
|
||||
from typing import Tuple, Type, Dict, Set
|
||||
from typing import Tuple, Type, Dict, Set, Optional
|
||||
|
||||
from .results import SearchResults
|
||||
from ..objects import DatabaseObject, Source
|
||||
from ..objects import DatabaseObject as DataObject, Source
|
||||
|
||||
from ..utils.config import youtube_settings
|
||||
from ..utils.enums.source import SourcePages
|
||||
@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = {
|
||||
Musify,
|
||||
}
|
||||
|
||||
fetch_map = {
|
||||
Song: "fetch_song",
|
||||
Album: "fetch_album",
|
||||
Artist: "fetch_artist",
|
||||
Label: "fetch_label",
|
||||
}
|
||||
|
||||
if DEBUG_PAGES:
|
||||
DEBUGGING_PAGE = Bandcamp
|
||||
print(f"Only downloading from page {DEBUGGING_PAGE}.")
|
||||
@ -69,6 +76,11 @@ class Pages:
|
||||
self._page_instances[page_type] = page_type()
|
||||
self._source_to_page[page_type.SOURCE_TYPE] = page_type
|
||||
|
||||
def _get_page_from_enum(self, source_page: SourcePages) -> Page:
|
||||
if source_page not in self._source_to_page:
|
||||
return None
|
||||
return self._page_instances[self._source_to_page[source_page]]
|
||||
|
||||
def search(self, query: Query) -> SearchResults:
|
||||
result = SearchResults()
|
||||
|
||||
@ -80,22 +92,33 @@ class Pages:
|
||||
|
||||
return result
|
||||
|
||||
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
|
||||
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||
return music_object
|
||||
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject:
|
||||
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
|
||||
return data_object
|
||||
|
||||
for source_page in music_object.source_collection.source_pages:
|
||||
if source_page not in self._source_to_page:
|
||||
continue
|
||||
source: Source
|
||||
for source in data_object.source_collection.get_sources():
|
||||
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
|
||||
if new_data_object is not None:
|
||||
data_object.merge(new_data_object)
|
||||
|
||||
page_type = self._source_to_page[source_page]
|
||||
return data_object
|
||||
|
||||
if page_type in self._pages_set:
|
||||
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
|
||||
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
|
||||
page: Page = self._get_page_from_enum(source.page_enum)
|
||||
if page is None:
|
||||
return None
|
||||
|
||||
return music_object
|
||||
source_type = page.get_source_type(source)
|
||||
if not hasattr(page, fetch_map[source_type]):
|
||||
return None
|
||||
|
||||
def is_downloadable(self, music_object: DatabaseObject) -> bool:
|
||||
func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
|
||||
data_object: DataObject = func(source=source)
|
||||
data_object.mark_as_fetched(source.hash_url)
|
||||
return data_object
|
||||
|
||||
def is_downloadable(self, music_object: DataObject) -> bool:
|
||||
_page_types = set(self._source_to_page)
|
||||
for src in music_object.source_collection.source_pages:
|
||||
if src in self._source_to_page:
|
||||
@ -104,7 +127,7 @@ class Pages:
|
||||
audio_pages = self._audio_pages_set.intersection(_page_types)
|
||||
return len(audio_pages) > 0
|
||||
|
||||
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
|
||||
def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
|
||||
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
|
||||
|
||||
@ -122,7 +145,7 @@ class Pages:
|
||||
|
||||
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
|
||||
|
||||
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
|
||||
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
|
||||
source = Source.match_url(url, SourcePages.MANUAL)
|
||||
|
||||
if source is None:
|
||||
|
@ -24,4 +24,4 @@ from .parents import OuterProxy
|
||||
|
||||
from .artwork import Artwork
|
||||
|
||||
DatabaseObject = TypeVar('T', bound=OuterProxy)
|
||||
DatabaseObject = OuterProxy
|
||||
|
@ -99,7 +99,9 @@ class OuterProxy:
|
||||
Wraps the inner data, and provides apis, to naturally access those values.
|
||||
"""
|
||||
|
||||
_default_factories: dict = {}
|
||||
source_collection: SourceCollection
|
||||
|
||||
_default_factories: dict = {"source_collection": SourceCollection}
|
||||
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
|
||||
|
||||
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
|
||||
|
@ -189,103 +189,7 @@ class Page:
|
||||
def song_search(self, song: Song) -> List[Song]:
|
||||
return []
|
||||
|
||||
def fetch_details(
|
||||
self,
|
||||
music_object: DatabaseObject,
|
||||
stop_at_level: int = 1,
|
||||
) -> DatabaseObject:
|
||||
"""
|
||||
when a music object with lacking data is passed in, it returns
|
||||
the SAME object **(no copy)** with more detailed data.
|
||||
If you for example put in, an album, it fetches the tracklist
|
||||
|
||||
:param music_object:
|
||||
:param stop_at_level:
|
||||
This says the depth of the level the scraper will recurse to.
|
||||
If this is for example set to 2, then the levels could be:
|
||||
1. Level: the album
|
||||
2. Level: every song of the album + every artist of the album
|
||||
If no additional requests are needed to get the data one level below the supposed stop level
|
||||
this gets ignored
|
||||
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
|
||||
"""
|
||||
# creating a new object, of the same type
|
||||
new_music_object: Optional[DatabaseObject] = None
|
||||
fetched_from_url: List[str] = []
|
||||
|
||||
# only certain database objects, have a source list
|
||||
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||
source: Source
|
||||
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
|
||||
if music_object.already_fetched_from(source.hash_url):
|
||||
continue
|
||||
|
||||
tmp = self.fetch_object_from_source(
|
||||
source=source,
|
||||
enforce_type=type(music_object),
|
||||
stop_at_level=stop_at_level,
|
||||
type_string=type(music_object).__name__,
|
||||
entity_string=music_object.option_string,
|
||||
)
|
||||
|
||||
if new_music_object is None:
|
||||
new_music_object = tmp
|
||||
else:
|
||||
new_music_object.merge(tmp)
|
||||
fetched_from_url.append(source.hash_url)
|
||||
|
||||
if new_music_object is not None:
|
||||
music_object.merge(new_music_object)
|
||||
|
||||
music_object.mark_as_fetched(*fetched_from_url)
|
||||
return music_object
|
||||
|
||||
def fetch_object_from_source(
|
||||
self,
|
||||
source: Source,
|
||||
stop_at_level: int = 2,
|
||||
enforce_type: Type[DatabaseObject] = None,
|
||||
type_string: str = "",
|
||||
entity_string: str = "",
|
||||
) -> Optional[DatabaseObject]:
|
||||
|
||||
obj_type = self.get_source_type(source)
|
||||
|
||||
if obj_type is None:
|
||||
return None
|
||||
|
||||
if enforce_type != obj_type and enforce_type is not None:
|
||||
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
|
||||
return None
|
||||
|
||||
music_object: DatabaseObject = None
|
||||
|
||||
fetch_map = {
|
||||
Song: self.fetch_song,
|
||||
Album: self.fetch_album,
|
||||
Artist: self.fetch_artist,
|
||||
Label: self.fetch_label
|
||||
}
|
||||
|
||||
if obj_type in fetch_map:
|
||||
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
|
||||
else:
|
||||
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
|
||||
return None
|
||||
|
||||
if stop_at_level > 0:
|
||||
trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
|
||||
|
||||
collection: Collection
|
||||
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
|
||||
collection = music_object.__getattribute__(collection_str)
|
||||
|
||||
for sub_element in collection:
|
||||
sub_element.merge(
|
||||
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
|
||||
|
||||
return music_object
|
||||
|
||||
# to fetch stuff
|
||||
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
||||
return Song()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user