feat: migrated fetch details and from source

2024-05-13 18:03:20 +02:00
parent 9769cf4033
commit 0343c11a62
4 changed files with 45 additions and 116 deletions
--- a/music_kraken/download/page_attributes.py
+++ b/music_kraken/download/page_attributes.py
@@ -1,7 +1,7 @@
-from typing import Tuple, Type, Dict, Set
+from typing import Tuple, Type, Dict, Set, Optional
 from .results import SearchResults
-from ..objects import DatabaseObject, Source
+from ..objects import DatabaseObject as DataObject, Source
 from ..utils.config import youtube_settings
 from ..utils.enums.source import SourcePages
@@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = {
    Musify,
 }
 fetch_map = {
    Song: "fetch_song",
    Album: "fetch_album",
    Artist: "fetch_artist",
    Label: "fetch_label",
 }
 if DEBUG_PAGES:
    DEBUGGING_PAGE = Bandcamp
    print(f"Only downloading from page {DEBUGGING_PAGE}.")
@@ -68,7 +75,12 @@ class Pages:
        for page_type in self.pages:
            self._page_instances[page_type] = page_type()
            self._source_to_page[page_type.SOURCE_TYPE] = page_type
-            
+
    def _get_page_from_enum(self, source_page: SourcePages) -> Page:
        if source_page not in self._source_to_page:
            return None
        return self._page_instances[self._source_to_page[source_page]]
    def search(self, query: Query) -> SearchResults:
        result = SearchResults()
@@ -80,22 +92,33 @@ class Pages:
        return result
-    def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
+    def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject:
-        if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
+        if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
-            return music_object
+            return data_object
-        for source_page in music_object.source_collection.source_pages:
+        source: Source
-            if source_page not in self._source_to_page:
+        for source in data_object.source_collection.get_sources():
-                continue
+            new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
            if new_data_object is not None:
                data_object.merge(new_data_object)
-            page_type = self._source_to_page[source_page]
+        return data_object
-            
+
-            if page_type in self._pages_set:
+    def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
-                music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
+        page: Page = self._get_page_from_enum(source.page_enum)
        if page is None:
            return None
-        return music_object
+        source_type = page.get_source_type(source)
        if not hasattr(page, fetch_map[source_type]):
            return None
-    def is_downloadable(self, music_object: DatabaseObject) -> bool:
+        func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
        data_object: DataObject = func(source=source)
        data_object.mark_as_fetched(source.hash_url)
        return data_object
    def is_downloadable(self, music_object: DataObject) -> bool:
        _page_types = set(self._source_to_page)
        for src in music_object.source_collection.source_pages:
            if src in self._source_to_page:
@@ -104,7 +127,7 @@ class Pages:
        audio_pages = self._audio_pages_set.intersection(_page_types)
        return len(audio_pages) > 0
-    def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
+    def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
        if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
            return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
@@ -122,7 +145,7 @@ class Pages:
        return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
-    def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
+    def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
        source = Source.match_url(url, SourcePages.MANUAL)
        if source is None:
--- a/music_kraken/objects/init.py
+++ b/music_kraken/objects/init.py
@@ -24,4 +24,4 @@ from .parents import OuterProxy
 from .artwork import Artwork
-DatabaseObject = TypeVar('T', bound=OuterProxy)
+DatabaseObject = OuterProxy
--- a/music_kraken/objects/parents.py
+++ b/music_kraken/objects/parents.py
@@ -99,7 +99,9 @@ class OuterProxy:
    Wraps the inner data, and provides apis, to naturally access those values.
    """
-    _default_factories: dict = {}
+    source_collection: SourceCollection
    _default_factories: dict = {"source_collection": SourceCollection}
    _outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
    DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
--- a/music_kraken/pages/abstract.py
+++ b/music_kraken/pages/abstract.py
@@ -189,103 +189,7 @@ class Page:
    def song_search(self, song: Song) -> List[Song]:
        return []
-    def fetch_details(
+    # to fetch stuff
        self, 
        music_object: DatabaseObject, 
        stop_at_level: int = 1,
    ) -> DatabaseObject:
        """
        when a music object with lacking data is passed in, it returns
        the SAME object **(no copy)** with more detailed data.
        If you for example put in, an album, it fetches the tracklist
        :param music_object:
        :param stop_at_level: 
        This says the depth of the level the scraper will recurse to.
        If this is for example set to 2, then the levels could be:
        1. Level: the album
        2. Level: every song of the album + every artist of the album
        If no additional requests are needed to get the data one level below the supposed stop level
        this gets ignored
        :return detailed_music_object: IT MODIFIES THE INPUT OBJ
        """
        # creating a new object, of the same type
        new_music_object: Optional[DatabaseObject] = None
        fetched_from_url: List[str] = []
        # only certain database objects, have a source list
        if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
            source: Source
            for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
                if music_object.already_fetched_from(source.hash_url):
                    continue
                tmp = self.fetch_object_from_source(
                    source=source,
                    enforce_type=type(music_object),
                    stop_at_level=stop_at_level,
                    type_string=type(music_object).__name__,
                    entity_string=music_object.option_string,
                )
                if new_music_object is None:
                    new_music_object = tmp
                else:
                    new_music_object.merge(tmp)
                fetched_from_url.append(source.hash_url)
        if new_music_object is not None:
            music_object.merge(new_music_object)
        music_object.mark_as_fetched(*fetched_from_url)
        return music_object
    def fetch_object_from_source(
        self, 
        source: Source, 
        stop_at_level: int = 2,
        enforce_type: Type[DatabaseObject] = None, 
        type_string: str = "",
        entity_string: str = "",
    ) -> Optional[DatabaseObject]:
        obj_type = self.get_source_type(source)
        if obj_type is None:
            return None
        if enforce_type != obj_type and enforce_type is not None:
            self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
            return None
        music_object: DatabaseObject = None
        fetch_map = {
            Song: self.fetch_song,
            Album: self.fetch_album,
            Artist: self.fetch_artist,
            Label: self.fetch_label
        }
        if obj_type in fetch_map:
            music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
        else:
            self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
            return None
        if stop_at_level > 0:
            trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
            collection: Collection
            for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
                collection = music_object.__getattribute__(collection_str)
                for sub_element in collection:
                    sub_element.merge(
                        self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
        return music_object
    def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
        return Song()
`@@ -24,4 +24,4 @@ from .parents import OuterProxy`

	`from .artwork import Artwork`	`from .artwork import Artwork`

	`DatabaseObject = TypeVar('T', bound=OuterProxy)`	`DatabaseObject = OuterProxy`