feat: massive improvements to the fetch and download order

This commit is contained in:
Hazel 2024-04-10 11:20:49 +02:00
parent 4e52c0478a
commit f009bf7bb8
6 changed files with 75 additions and 19 deletions

View File

@ -7,7 +7,8 @@ from functools import lru_cache
from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
from .metadata import Metadata from .metadata import Metadata
from ..utils.config import logging_settings from ..utils import get_unix_time
from ..utils.config import logging_settings, main_settings
from ..utils.shared import HIGHEST_ID from ..utils.shared import HIGHEST_ID
from ..utils.hacking import MetaClass from ..utils.hacking import MetaClass
@ -96,6 +97,7 @@ class OuterProxy:
del kwargs[name] del kwargs[name]
self._fetched_from: dict = {}
self._inner: InnerData = InnerData(**kwargs) self._inner: InnerData = InnerData(**kwargs)
self.__init_collections__() self.__init_collections__()
@ -176,6 +178,21 @@ class OuterProxy:
self._inner.__merge__(__other._inner, override=override) self._inner.__merge__(__other._inner, override=override)
__other._inner = self._inner __other._inner = self._inner
def mark_as_fetched(self, *url_hash_list: List[str]):
for url_hash in url_hash_list:
self._fetched_from[url_hash] = {
"time": get_unix_time(),
"url": url_hash,
}
def already_fetched_from(self, url_hash: str) -> bool:
res = self._fetched_from.get(url_hash, None)
if res is None:
return False
return get_unix_time() - res["time"] < main_settings["refresh_after"]
@property @property
def metadata(self) -> Metadata: def metadata(self) -> Metadata:
""" """

View File

@ -86,6 +86,10 @@ class Source(OuterProxy):
Mapping.ARTIST_WEBPAGE_URL: [self.url] Mapping.ARTIST_WEBPAGE_URL: [self.url]
}) })
@property
def hash_url(self) -> str:
return self.url.strip().lower().lstrip("https://").lstrip("http://")
@property @property
def metadata(self) -> Metadata: def metadata(self) -> Metadata:
return self.get_song_metadata() return self.get_song_metadata()

View File

@ -218,8 +218,12 @@ class Page:
def song_search(self, song: Song) -> List[Song]: def song_search(self, song: Song) -> List[Song]:
return [] return []
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1, def fetch_details(
post_process: bool = True) -> DatabaseObject: self,
music_object: DatabaseObject,
stop_at_level: int = 1,
post_process: bool = True
) -> DatabaseObject:
""" """
when a music object with lacking data is passed in, it returns when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data. the SAME object **(no copy)** with more detailed data.
@ -235,36 +239,48 @@ class Page:
this gets ignored this gets ignored
:return detailed_music_object: IT MODIFIES THE INPUT OBJ :return detailed_music_object: IT MODIFIES THE INPUT OBJ
""" """
trace(f"fetching {type(music_object).__name__} ({music_object.title_string})")
# creating a new object, of the same type # creating a new object, of the same type
new_music_object: Optional[DatabaseObject] = None new_music_object: Optional[DatabaseObject] = None
fetched_from_url: List[str] = []
# only certain database objects, have a source list # only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS): if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source source: Source
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE):
if music_object.already_fetched_from(source.hash_url):
continue
tmp = self.fetch_object_from_source( tmp = self.fetch_object_from_source(
source=source, source=source,
enforce_type=type(music_object), enforce_type=type(music_object),
stop_at_level=stop_at_level, stop_at_level=stop_at_level,
post_process=False post_process=False,
type_string=type(music_object).__name__,
title_string=music_object.title_string,
) )
if new_music_object is None: if new_music_object is None:
new_music_object = tmp new_music_object = tmp
else: else:
new_music_object.merge(tmp) new_music_object.merge(tmp)
fetched_from_url.append(source.hash_url)
if new_music_object is not None: if new_music_object is not None:
music_object.merge(new_music_object) music_object.merge(new_music_object)
music_object.mark_as_fetched(*fetched_from_url)
return music_object return music_object
def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, def fetch_object_from_source(
enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[ self,
DatabaseObject]: source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
post_process: bool = True,
type_string: str = "",
title_string: str = "",
) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source) obj_type = self.get_source_type(source)
if obj_type is None: if obj_type is None:
@ -289,7 +305,9 @@ class Page:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None return None
if stop_at_level > 1: if stop_at_level > 0:
trace(f"fetching {type_string} [{title_string}] [stop_at_level={stop_at_level}]")
collection: Collection collection: Collection
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection = music_object.__getattribute__(collection_str) collection = music_object.__getattribute__(collection_str)
@ -312,8 +330,13 @@ class Page:
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label() return Label()
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, def download(
process_metadata_anyway: bool = False) -> DownloadResult: self,
music_object: DatabaseObject,
genre: str,
download_all: bool = False,
process_metadata_anyway: bool = False
) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre}) naming_dict: NamingDict = NamingDict({"genre": genre})
def fill_naming_objects(naming_music_object: DatabaseObject): def fill_naming_objects(naming_music_object: DatabaseObject):
@ -333,9 +356,15 @@ class Page:
return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway) return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway)
def _download(self, music_object: DatabaseObject, naming_dict: NamingDict, download_all: bool = False, def _download(
skip_details: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: self,
trace(f"downloading {type(music_object).__name__} ({music_object.title_string})") music_object: DatabaseObject,
naming_dict: NamingDict,
download_all: bool = False,
skip_details: bool = False,
process_metadata_anyway: bool = False
) -> DownloadResult:
trace(f"downloading {type(music_object).__name__} [{music_object.title_string}]")
skip_next_details = skip_details skip_next_details = skip_details
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
@ -346,8 +375,8 @@ class Page:
if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]: if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]:
return DownloadResult() return DownloadResult()
if not isinstance(music_object, Song) or not self.NO_ADDITIONAL_DATA_FROM_SONG: if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
self.fetch_details(music_object=music_object, stop_at_level=2) self.fetch_details(music_object=music_object, stop_at_level=1)
naming_dict.add_object(music_object) naming_dict.add_object(music_object)

View File

@ -128,7 +128,7 @@ class SuperYouTube(Page):
SOURCE_TYPE = SourcePages.YOUTUBE SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"] LOGGER = logging_settings["youtube_logger"]
NO_ADDITIONAL_DATA_FROM_SONG = True NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.download_connection: Connection = Connection( self.download_connection: Connection = Connection(

View File

@ -60,3 +60,7 @@ misc functions
def get_current_millis() -> int: def get_current_millis() -> int:
dt = datetime.now() dt = datetime.now()
return int(dt.microsecond / 1_000) return int(dt.microsecond / 1_000)
def get_unix_time() -> int:
return int(datetime.now().timestamp())

View File

@ -46,6 +46,7 @@ The folder music kraken should put the songs into."""),
"Mixtape" "Mixtape"
], options=("Studio Album", "EP (Extended Play)", "Single", "Live Album", "Compilation Album", "Mixtape", "Demo", "Other"), description="""Music Kraken ignores all albums of those types. ], options=("Studio Album", "EP (Extended Play)", "Single", "Live Album", "Compilation Album", "Mixtape", "Demo", "Other"), description="""Music Kraken ignores all albums of those types.
Following album types exist in the programm:"""), Following album types exist in the programm:"""),
Attribute(name="refresh_after", default_value=161, description="The time in seconds, after which a song/album/artist/label is newly fetched."),
EmptyLine(), EmptyLine(),
@ -124,6 +125,7 @@ class SettingsStructure(TypedDict):
happy_messages: List[str] happy_messages: List[str]
modify_gc: bool modify_gc: bool
id_bits: int id_bits: int
refresh_after: int
# audio # audio
bitrate: int bitrate: int