feat: migrated fetch details and from source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
9769cf4033
commit
0343c11a62
@ -1,7 +1,7 @@
|
|||||||
from typing import Tuple, Type, Dict, Set
|
from typing import Tuple, Type, Dict, Set, Optional
|
||||||
|
|
||||||
from .results import SearchResults
|
from .results import SearchResults
|
||||||
from ..objects import DatabaseObject, Source
|
from ..objects import DatabaseObject as DataObject, Source
|
||||||
|
|
||||||
from ..utils.config import youtube_settings
|
from ..utils.config import youtube_settings
|
||||||
from ..utils.enums.source import SourcePages
|
from ..utils.enums.source import SourcePages
|
||||||
@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = {
|
|||||||
Musify,
|
Musify,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fetch_map = {
|
||||||
|
Song: "fetch_song",
|
||||||
|
Album: "fetch_album",
|
||||||
|
Artist: "fetch_artist",
|
||||||
|
Label: "fetch_label",
|
||||||
|
}
|
||||||
|
|
||||||
if DEBUG_PAGES:
|
if DEBUG_PAGES:
|
||||||
DEBUGGING_PAGE = Bandcamp
|
DEBUGGING_PAGE = Bandcamp
|
||||||
print(f"Only downloading from page {DEBUGGING_PAGE}.")
|
print(f"Only downloading from page {DEBUGGING_PAGE}.")
|
||||||
@ -68,7 +75,12 @@ class Pages:
|
|||||||
for page_type in self.pages:
|
for page_type in self.pages:
|
||||||
self._page_instances[page_type] = page_type()
|
self._page_instances[page_type] = page_type()
|
||||||
self._source_to_page[page_type.SOURCE_TYPE] = page_type
|
self._source_to_page[page_type.SOURCE_TYPE] = page_type
|
||||||
|
|
||||||
|
def _get_page_from_enum(self, source_page: SourcePages) -> Page:
|
||||||
|
if source_page not in self._source_to_page:
|
||||||
|
return None
|
||||||
|
return self._page_instances[self._source_to_page[source_page]]
|
||||||
|
|
||||||
def search(self, query: Query) -> SearchResults:
|
def search(self, query: Query) -> SearchResults:
|
||||||
result = SearchResults()
|
result = SearchResults()
|
||||||
|
|
||||||
@ -80,22 +92,33 @@ class Pages:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
|
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject:
|
||||||
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
|
||||||
return music_object
|
return data_object
|
||||||
|
|
||||||
for source_page in music_object.source_collection.source_pages:
|
source: Source
|
||||||
if source_page not in self._source_to_page:
|
for source in data_object.source_collection.get_sources():
|
||||||
continue
|
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
|
||||||
|
if new_data_object is not None:
|
||||||
|
data_object.merge(new_data_object)
|
||||||
|
|
||||||
page_type = self._source_to_page[source_page]
|
return data_object
|
||||||
|
|
||||||
if page_type in self._pages_set:
|
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
|
||||||
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
|
page: Page = self._get_page_from_enum(source.page_enum)
|
||||||
|
if page is None:
|
||||||
|
return None
|
||||||
|
|
||||||
return music_object
|
source_type = page.get_source_type(source)
|
||||||
|
if not hasattr(page, fetch_map[source_type]):
|
||||||
|
return None
|
||||||
|
|
||||||
def is_downloadable(self, music_object: DatabaseObject) -> bool:
|
func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
|
||||||
|
data_object: DataObject = func(source=source)
|
||||||
|
data_object.mark_as_fetched(source.hash_url)
|
||||||
|
return data_object
|
||||||
|
|
||||||
|
def is_downloadable(self, music_object: DataObject) -> bool:
|
||||||
_page_types = set(self._source_to_page)
|
_page_types = set(self._source_to_page)
|
||||||
for src in music_object.source_collection.source_pages:
|
for src in music_object.source_collection.source_pages:
|
||||||
if src in self._source_to_page:
|
if src in self._source_to_page:
|
||||||
@ -104,7 +127,7 @@ class Pages:
|
|||||||
audio_pages = self._audio_pages_set.intersection(_page_types)
|
audio_pages = self._audio_pages_set.intersection(_page_types)
|
||||||
return len(audio_pages) > 0
|
return len(audio_pages) > 0
|
||||||
|
|
||||||
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
|
def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
|
||||||
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||||
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
|
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
|
||||||
|
|
||||||
@ -122,7 +145,7 @@ class Pages:
|
|||||||
|
|
||||||
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
|
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
|
||||||
|
|
||||||
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
|
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
|
||||||
source = Source.match_url(url, SourcePages.MANUAL)
|
source = Source.match_url(url, SourcePages.MANUAL)
|
||||||
|
|
||||||
if source is None:
|
if source is None:
|
||||||
|
@ -24,4 +24,4 @@ from .parents import OuterProxy
|
|||||||
|
|
||||||
from .artwork import Artwork
|
from .artwork import Artwork
|
||||||
|
|
||||||
DatabaseObject = TypeVar('T', bound=OuterProxy)
|
DatabaseObject = OuterProxy
|
||||||
|
@ -99,7 +99,9 @@ class OuterProxy:
|
|||||||
Wraps the inner data, and provides apis, to naturally access those values.
|
Wraps the inner data, and provides apis, to naturally access those values.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_default_factories: dict = {}
|
source_collection: SourceCollection
|
||||||
|
|
||||||
|
_default_factories: dict = {"source_collection": SourceCollection}
|
||||||
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
|
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
|
||||||
|
|
||||||
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
|
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
|
||||||
|
@ -189,103 +189,7 @@ class Page:
|
|||||||
def song_search(self, song: Song) -> List[Song]:
|
def song_search(self, song: Song) -> List[Song]:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def fetch_details(
|
# to fetch stuff
|
||||||
self,
|
|
||||||
music_object: DatabaseObject,
|
|
||||||
stop_at_level: int = 1,
|
|
||||||
) -> DatabaseObject:
|
|
||||||
"""
|
|
||||||
when a music object with lacking data is passed in, it returns
|
|
||||||
the SAME object **(no copy)** with more detailed data.
|
|
||||||
If you for example put in, an album, it fetches the tracklist
|
|
||||||
|
|
||||||
:param music_object:
|
|
||||||
:param stop_at_level:
|
|
||||||
This says the depth of the level the scraper will recurse to.
|
|
||||||
If this is for example set to 2, then the levels could be:
|
|
||||||
1. Level: the album
|
|
||||||
2. Level: every song of the album + every artist of the album
|
|
||||||
If no additional requests are needed to get the data one level below the supposed stop level
|
|
||||||
this gets ignored
|
|
||||||
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
|
|
||||||
"""
|
|
||||||
# creating a new object, of the same type
|
|
||||||
new_music_object: Optional[DatabaseObject] = None
|
|
||||||
fetched_from_url: List[str] = []
|
|
||||||
|
|
||||||
# only certain database objects, have a source list
|
|
||||||
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
|
||||||
source: Source
|
|
||||||
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
|
|
||||||
if music_object.already_fetched_from(source.hash_url):
|
|
||||||
continue
|
|
||||||
|
|
||||||
tmp = self.fetch_object_from_source(
|
|
||||||
source=source,
|
|
||||||
enforce_type=type(music_object),
|
|
||||||
stop_at_level=stop_at_level,
|
|
||||||
type_string=type(music_object).__name__,
|
|
||||||
entity_string=music_object.option_string,
|
|
||||||
)
|
|
||||||
|
|
||||||
if new_music_object is None:
|
|
||||||
new_music_object = tmp
|
|
||||||
else:
|
|
||||||
new_music_object.merge(tmp)
|
|
||||||
fetched_from_url.append(source.hash_url)
|
|
||||||
|
|
||||||
if new_music_object is not None:
|
|
||||||
music_object.merge(new_music_object)
|
|
||||||
|
|
||||||
music_object.mark_as_fetched(*fetched_from_url)
|
|
||||||
return music_object
|
|
||||||
|
|
||||||
def fetch_object_from_source(
|
|
||||||
self,
|
|
||||||
source: Source,
|
|
||||||
stop_at_level: int = 2,
|
|
||||||
enforce_type: Type[DatabaseObject] = None,
|
|
||||||
type_string: str = "",
|
|
||||||
entity_string: str = "",
|
|
||||||
) -> Optional[DatabaseObject]:
|
|
||||||
|
|
||||||
obj_type = self.get_source_type(source)
|
|
||||||
|
|
||||||
if obj_type is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if enforce_type != obj_type and enforce_type is not None:
|
|
||||||
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
music_object: DatabaseObject = None
|
|
||||||
|
|
||||||
fetch_map = {
|
|
||||||
Song: self.fetch_song,
|
|
||||||
Album: self.fetch_album,
|
|
||||||
Artist: self.fetch_artist,
|
|
||||||
Label: self.fetch_label
|
|
||||||
}
|
|
||||||
|
|
||||||
if obj_type in fetch_map:
|
|
||||||
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
|
|
||||||
else:
|
|
||||||
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if stop_at_level > 0:
|
|
||||||
trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
|
|
||||||
|
|
||||||
collection: Collection
|
|
||||||
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
||||||
collection = music_object.__getattribute__(collection_str)
|
|
||||||
|
|
||||||
for sub_element in collection:
|
|
||||||
sub_element.merge(
|
|
||||||
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
|
|
||||||
|
|
||||||
return music_object
|
|
||||||
|
|
||||||
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
|
||||||
return Song()
|
return Song()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user