From f009bf7bb86c4bab831745850ca6ee65010e425c Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 10 Apr 2024 11:20:49 +0200 Subject: [PATCH] feat: massive improvements to the fetch and download order --- music_kraken/objects/parents.py | 19 +++++- music_kraken/objects/source.py | 4 ++ music_kraken/pages/abstract.py | 63 ++++++++++++++----- .../pages/youtube_music/super_youtube.py | 2 +- music_kraken/utils/__init__.py | 4 ++ .../utils/config/config_files/main_config.py | 2 + 6 files changed, 75 insertions(+), 19 deletions(-) diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 6f28a43..13ae41a 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -7,7 +7,8 @@ from functools import lru_cache from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set from .metadata import Metadata -from ..utils.config import logging_settings +from ..utils import get_unix_time +from ..utils.config import logging_settings, main_settings from ..utils.shared import HIGHEST_ID from ..utils.hacking import MetaClass @@ -96,6 +97,7 @@ class OuterProxy: del kwargs[name] + self._fetched_from: dict = {} self._inner: InnerData = InnerData(**kwargs) self.__init_collections__() @@ -176,6 +178,21 @@ class OuterProxy: self._inner.__merge__(__other._inner, override=override) __other._inner = self._inner + def mark_as_fetched(self, *url_hash_list: List[str]): + for url_hash in url_hash_list: + self._fetched_from[url_hash] = { + "time": get_unix_time(), + "url": url_hash, + } + + def already_fetched_from(self, url_hash: str) -> bool: + res = self._fetched_from.get(url_hash, None) + + if res is None: + return False + + return get_unix_time() - res["time"] < main_settings["refresh_after"] + @property def metadata(self) -> Metadata: """ diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 8f39cb0..a60ce6d 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -86,6 +86,10 @@ class Source(OuterProxy): Mapping.ARTIST_WEBPAGE_URL: [self.url] }) + @property + def hash_url(self) -> str: + return self.url.strip().lower().lstrip("https://").lstrip("http://") + @property def metadata(self) -> Metadata: return self.get_song_metadata() diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 0261d07..bee8b67 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -218,8 +218,12 @@ class Page: def song_search(self, song: Song) -> List[Song]: return [] - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1, - post_process: bool = True) -> DatabaseObject: + def fetch_details( + self, + music_object: DatabaseObject, + stop_at_level: int = 1, + post_process: bool = True + ) -> DatabaseObject: """ when a music object with lacking data is passed in, it returns the SAME object **(no copy)** with more detailed data. @@ -235,36 +239,48 @@ class Page: this gets ignored :return detailed_music_object: IT MODIFIES THE INPUT OBJ """ - - trace(f"fetching {type(music_object).__name__} ({music_object.title_string})") - # creating a new object, of the same type new_music_object: Optional[DatabaseObject] = None + fetched_from_url: List[str] = [] # only certain database objects, have a source list if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): + if music_object.already_fetched_from(source.hash_url): + continue + tmp = self.fetch_object_from_source( source=source, enforce_type=type(music_object), stop_at_level=stop_at_level, - post_process=False + post_process=False, + type_string=type(music_object).__name__, + title_string=music_object.title_string, ) if new_music_object is None: new_music_object = tmp else: new_music_object.merge(tmp) + fetched_from_url.append(source.hash_url) if new_music_object is not None: music_object.merge(new_music_object) + music_object.mark_as_fetched(*fetched_from_url) return music_object - def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, - enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[ - DatabaseObject]: + def fetch_object_from_source( + self, + source: Source, + stop_at_level: int = 2, + enforce_type: Type[DatabaseObject] = None, + post_process: bool = True, + type_string: str = "", + title_string: str = "", + ) -> Optional[DatabaseObject]: + obj_type = self.get_source_type(source) if obj_type is None: @@ -289,7 +305,9 @@ class Page: self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") return None - if stop_at_level > 1: + if stop_at_level > 0: + trace(f"fetching {type_string} [{title_string}] [stop_at_level={stop_at_level}]") + collection: Collection for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: collection = music_object.__getattribute__(collection_str) @@ -312,8 +330,13 @@ class Page: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() - def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, - process_metadata_anyway: bool = False) -> DownloadResult: + def download( + self, + music_object: DatabaseObject, + genre: str, + download_all: bool = False, + process_metadata_anyway: bool = False + ) -> DownloadResult: naming_dict: NamingDict = NamingDict({"genre": genre}) def fill_naming_objects(naming_music_object: DatabaseObject): @@ -333,9 +356,15 @@ class Page: return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway) - def _download(self, music_object: DatabaseObject, naming_dict: NamingDict, download_all: bool = False, - skip_details: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: - trace(f"downloading {type(music_object).__name__} ({music_object.title_string})") + def _download( + self, + music_object: DatabaseObject, + naming_dict: NamingDict, + download_all: bool = False, + skip_details: bool = False, + process_metadata_anyway: bool = False + ) -> DownloadResult: + trace(f"downloading {type(music_object).__name__} [{music_object.title_string}]") skip_next_details = skip_details # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False @@ -346,8 +375,8 @@ class Page: if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]: return DownloadResult() - if not isinstance(music_object, Song) or not self.NO_ADDITIONAL_DATA_FROM_SONG: - self.fetch_details(music_object=music_object, stop_at_level=2) + if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG): + self.fetch_details(music_object=music_object, stop_at_level=1) naming_dict.add_object(music_object) diff --git a/music_kraken/pages/youtube_music/super_youtube.py b/music_kraken/pages/youtube_music/super_youtube.py index a169eb4..d391370 100644 --- a/music_kraken/pages/youtube_music/super_youtube.py +++ b/music_kraken/pages/youtube_music/super_youtube.py @@ -128,7 +128,7 @@ class SuperYouTube(Page): SOURCE_TYPE = SourcePages.YOUTUBE LOGGER = logging_settings["youtube_logger"] - NO_ADDITIONAL_DATA_FROM_SONG = True + NO_ADDITIONAL_DATA_FROM_SONG = False def __init__(self, *args, **kwargs): self.download_connection: Connection = Connection( diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 66e5479..fcf8aad 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -60,3 +60,7 @@ misc functions def get_current_millis() -> int: dt = datetime.now() return int(dt.microsecond / 1_000) + + +def get_unix_time() -> int: + return int(datetime.now().timestamp()) \ No newline at end of file diff --git a/music_kraken/utils/config/config_files/main_config.py b/music_kraken/utils/config/config_files/main_config.py index 395e271..8de212b 100644 --- a/music_kraken/utils/config/config_files/main_config.py +++ b/music_kraken/utils/config/config_files/main_config.py @@ -46,6 +46,7 @@ The folder music kraken should put the songs into."""), "Mixtape" ], options=("Studio Album", "EP (Extended Play)", "Single", "Live Album", "Compilation Album", "Mixtape", "Demo", "Other"), description="""Music Kraken ignores all albums of those types. Following album types exist in the programm:"""), + Attribute(name="refresh_after", default_value=161, description="The time in seconds, after which a song/album/artist/label is newly fetched."), EmptyLine(), @@ -124,6 +125,7 @@ class SettingsStructure(TypedDict): happy_messages: List[str] modify_gc: bool id_bits: int + refresh_after: int # audio bitrate: int