draft: rewriting downloading

This commit is contained in:
Hellow 2024-05-13 21:45:12 +02:00
parent 0e6fe8187a
commit b09d6f2691
6 changed files with 114 additions and 24 deletions

View File

@ -0,0 +1,19 @@
from dataclasses import dataclass, field
from ..utils.config import main_settings
from ..utils.enums.album import AlbumType
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True

View File

@ -1,12 +1,15 @@
from typing import Tuple, Type, Dict, Set, Optional from typing import Tuple, Type, Dict, Set, Optional, List
from collections import defaultdict
from . import FetchOptions, DownloadOptions
from .results import SearchResults from .results import SearchResults
from ..objects import DatabaseObject as DataObject, Source from ..objects import DatabaseObject as DataObject, Source, Album, Song, Artist, Label
from ..utils.config import youtube_settings from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages from ..utils.enums.source import SourcePages
from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.download_result import DownloadResult
from ..utils.support_classes.query import Query from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.exception.download import UrlNotFoundException from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES from ..utils.shared import DEBUG_PAGES
@ -50,7 +53,10 @@ if DEBUG_PAGES:
class Pages: class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
# initialize all page instances # initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict() self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourcePages, Type[Page]] = dict() self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
@ -73,7 +79,7 @@ class Pages:
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages: for page_type in self.pages:
self._page_instances[page_type] = page_type() self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
self._source_to_page[page_type.SOURCE_TYPE] = page_type self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourcePages) -> Page: def _get_page_from_enum(self, source_page: SourcePages) -> Page:
@ -92,7 +98,7 @@ class Pages:
return result return result
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject: def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object return data_object
@ -136,23 +142,77 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types) audio_pages = self._audio_pages_set.intersection(_page_types)
return len(audio_pages) > 0 return len(audio_pages) > 0
def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: def _skip_object(self, data_object: DataObject) -> bool:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): if isinstance(data_object, Album):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") if not self.download_options.download_all and data_object.album_type in self.download_options.album_type_blacklist:
return True
self.fetch_details(music_object) return False
_page_types = set(self._source_to_page) def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult:
for src in music_object.source_collection.source_pages: # fetch the given object
if src in self._source_to_page: self.fetch_details(data_object)
_page_types.add(self._source_to_page[src])
audio_pages = self._audio_pages_set.intersection(_page_types) # fetching all parent objects (e.g. if you only download a song)
if not kwargs.get("fetched_upwards", False):
to_fetch: List[DataObject] = [data_object]
for download_page in audio_pages: while len(to_fetch) > 0:
return self._page_instances[download_page].download(music_object=music_object, genre=genre) new_to_fetch = []
for d in to_fetch:
if self._skip_object(d):
continue
return DownloadResult(error_message=f"No audio source has been found for {music_object}.") self.fetch_details(d)
for c in d.get_parent_collections():
new_to_fetch.extend(c)
to_fetch = new_to_fetch
kwargs["fetched_upwards"] = True
# download all children
download_result: DownloadResult = DownloadResult()
for c in data_object.get_children():
for d in c:
if self._skip_object(d):
continue
download_result.merge(self.download(d, genre, **kwargs))
# actually download if the object is a song
if isinstance(data_object, Song):
"""
TODO
add the traced artist and album to the naming.
I am able to do that, because duplicate values are removed later on.
"""
self._download_song(data_object, naming={
"genre": [genre],
"audio_format": main_settings["audio_format"],
})
return download_result
def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
# manage the naming
naming: Dict[str, List[str]] = defaultdict(list, naming)
naming["song"].append(song.title_string)
naming["isrc"].append(song.isrc)
naming["album"].extend(a.title_string for a in song.album_collection)
naming["album_type"].extend(a.album_type.value for a in song.album_collection)
naming["artist"].extend(a.name for a in song.main_artist_collection)
naming["artist"].extend(a.name for a in song.feature_artist_collection)
for a in song.album_collection:
naming["label"].extend([l.title_string for l in a.label_collection])
# removing duplicates from the naming
for key, value in naming.items():
# https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(items))
return DownloadOptions()
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
source = Source.match_url(url, SourcePages.MANUAL) source = Source.match_url(url, SourcePages.MANUAL)

View File

@ -205,6 +205,7 @@ class OuterProxy:
if __other is None: if __other is None:
return return
a_id = self.id
a = self a = self
b = __other b = __other
@ -227,6 +228,8 @@ class OuterProxy:
a._inner.__merge__(old_inner, **kwargs) a._inner.__merge__(old_inner, **kwargs)
del old_inner del old_inner
self.id = a_id
def __merge__(self, __other: Optional[OuterProxy], **kwargs): def __merge__(self, __other: Optional[OuterProxy], **kwargs):
self.merge(__other, **kwargs) self.merge(__other, **kwargs)
@ -337,3 +340,11 @@ class OuterProxy:
def __repr__(self): def __repr__(self):
return f"{type(self).__name__}({self.title_string})" return f"{type(self).__name__}({self.title_string})"
def get_child_collections(self):
for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
yield self.__getattribute__(collection_string_attribute)
def get_parent_collections(self):
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
yield self.__getattribute__(collection_string_attribute)

View File

@ -107,7 +107,7 @@ class Page:
This is an abstract class, laying out the This is an abstract class, laying out the
functionality for every other class fetching something functionality for every other class fetching something
""" """
DOWNLOAD_PRIORITY: int = 0
SOURCE_TYPE: SourcePages SOURCE_TYPE: SourcePages
LOGGER = logging.getLogger("this shouldn't be used") LOGGER = logging.getLogger("this shouldn't be used")

View File

@ -49,7 +49,7 @@ class BandcampTypes(Enum):
class Bandcamp(Page): class Bandcamp(Page):
# CHANGE DOWNLOAD_PRIORITY = 10
SOURCE_TYPE = SourcePages.BANDCAMP SOURCE_TYPE = SourcePages.BANDCAMP
LOGGER = logging_settings["bandcamp_logger"] LOGGER = logging_settings["bandcamp_logger"]

View File

@ -111,7 +111,7 @@ def parse_url(url: str) -> MusifyUrl:
class Musify(Page): class Musify(Page):
# CHANGE DOWNLOAD_PRIORITY = 9
SOURCE_TYPE = SourcePages.MUSIFY SOURCE_TYPE = SourcePages.MUSIFY
LOGGER = logging_settings["musify_logger"] LOGGER = logging_settings["musify_logger"]