From 0343c11a626696ac24b41af8b63dd2943759b195 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 13 May 2024 18:03:20 +0200 Subject: [PATCH 01/18] feat: migrated fetch details and from source --- music_kraken/download/page_attributes.py | 57 ++++++++++---- music_kraken/objects/__init__.py | 2 +- music_kraken/objects/parents.py | 4 +- music_kraken/pages/abstract.py | 98 +----------------------- 4 files changed, 45 insertions(+), 116 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index f0b678c..f37517e 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -1,7 +1,7 @@ -from typing import Tuple, Type, Dict, Set +from typing import Tuple, Type, Dict, Set, Optional from .results import SearchResults -from ..objects import DatabaseObject, Source +from ..objects import DatabaseObject as DataObject, Source from ..utils.config import youtube_settings from ..utils.enums.source import SourcePages @@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = { Musify, } +fetch_map = { + Song: "fetch_song", + Album: "fetch_album", + Artist: "fetch_artist", + Label: "fetch_label", +} + if DEBUG_PAGES: DEBUGGING_PAGE = Bandcamp print(f"Only downloading from page {DEBUGGING_PAGE}.") @@ -68,7 +75,12 @@ class Pages: for page_type in self.pages: self._page_instances[page_type] = page_type() self._source_to_page[page_type.SOURCE_TYPE] = page_type - + + def _get_page_from_enum(self, source_page: SourcePages) -> Page: + if source_page not in self._source_to_page: + return None + return self._page_instances[self._source_to_page[source_page]] + def search(self, query: Query) -> SearchResults: result = SearchResults() @@ -80,22 +92,33 @@ class Pages: return result - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: - if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): - return music_object + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject: + if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): + return data_object - for source_page in music_object.source_collection.source_pages: - if source_page not in self._source_to_page: - continue + source: Source + for source in data_object.source_collection.get_sources(): + new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) + if new_data_object is not None: + data_object.merge(new_data_object) - page_type = self._source_to_page[source_page] - - if page_type in self._pages_set: - music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level)) + return data_object + + def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: + page: Page = self._get_page_from_enum(source.page_enum) + if page is None: + return None - return music_object + source_type = page.get_source_type(source) + if not hasattr(page, fetch_map[source_type]): + return None - def is_downloadable(self, music_object: DatabaseObject) -> bool: + func = getattr(page, fetch_map[source_type])(source=source, **kwargs) + data_object: DataObject = func(source=source) + data_object.mark_as_fetched(source.hash_url) + return data_object + + def is_downloadable(self, music_object: DataObject) -> bool: _page_types = set(self._source_to_page) for src in music_object.source_collection.source_pages: if src in self._source_to_page: @@ -104,7 +127,7 @@ class Pages: audio_pages = self._audio_pages_set.intersection(_page_types) return len(audio_pages) > 0 - def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: + def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") @@ -122,7 +145,7 @@ class Pages: return DownloadResult(error_message=f"No audio source has been found for {music_object}.") - def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]: + def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: source = Source.match_url(url, SourcePages.MANUAL) if source is None: diff --git a/music_kraken/objects/__init__.py b/music_kraken/objects/__init__.py index da5b9aa..0504353 100644 --- a/music_kraken/objects/__init__.py +++ b/music_kraken/objects/__init__.py @@ -24,4 +24,4 @@ from .parents import OuterProxy from .artwork import Artwork -DatabaseObject = TypeVar('T', bound=OuterProxy) +DatabaseObject = OuterProxy diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 3ef6ab3..0b8504e 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -99,7 +99,9 @@ class OuterProxy: Wraps the inner data, and provides apis, to naturally access those values. """ - _default_factories: dict = {} + source_collection: SourceCollection + + _default_factories: dict = {"source_collection": SourceCollection} _outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"} DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index a202b32..965f836 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -189,103 +189,7 @@ class Page: def song_search(self, song: Song) -> List[Song]: return [] - def fetch_details( - self, - music_object: DatabaseObject, - stop_at_level: int = 1, - ) -> DatabaseObject: - """ - when a music object with lacking data is passed in, it returns - the SAME object **(no copy)** with more detailed data. - If you for example put in, an album, it fetches the tracklist - - :param music_object: - :param stop_at_level: - This says the depth of the level the scraper will recurse to. - If this is for example set to 2, then the levels could be: - 1. Level: the album - 2. Level: every song of the album + every artist of the album - If no additional requests are needed to get the data one level below the supposed stop level - this gets ignored - :return detailed_music_object: IT MODIFIES THE INPUT OBJ - """ - # creating a new object, of the same type - new_music_object: Optional[DatabaseObject] = None - fetched_from_url: List[str] = [] - - # only certain database objects, have a source list - if isinstance(music_object, INDEPENDENT_DB_OBJECTS): - source: Source - for source in music_object.source_collection.get_sources(self.SOURCE_TYPE): - if music_object.already_fetched_from(source.hash_url): - continue - - tmp = self.fetch_object_from_source( - source=source, - enforce_type=type(music_object), - stop_at_level=stop_at_level, - type_string=type(music_object).__name__, - entity_string=music_object.option_string, - ) - - if new_music_object is None: - new_music_object = tmp - else: - new_music_object.merge(tmp) - fetched_from_url.append(source.hash_url) - - if new_music_object is not None: - music_object.merge(new_music_object) - - music_object.mark_as_fetched(*fetched_from_url) - return music_object - - def fetch_object_from_source( - self, - source: Source, - stop_at_level: int = 2, - enforce_type: Type[DatabaseObject] = None, - type_string: str = "", - entity_string: str = "", - ) -> Optional[DatabaseObject]: - - obj_type = self.get_source_type(source) - - if obj_type is None: - return None - - if enforce_type != obj_type and enforce_type is not None: - self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") - return None - - music_object: DatabaseObject = None - - fetch_map = { - Song: self.fetch_song, - Album: self.fetch_album, - Artist: self.fetch_artist, - Label: self.fetch_label - } - - if obj_type in fetch_map: - music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level) - else: - self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") - return None - - if stop_at_level > 0: - trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]") - - collection: Collection - for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - collection = music_object.__getattribute__(collection_str) - - for sub_element in collection: - sub_element.merge( - self.fetch_details(sub_element, stop_at_level=stop_at_level - 1)) - - return music_object - + # to fetch stuff def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: return Song() From 0e6fe8187ab508adeda06361a3ee792ca963a77c Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 13 May 2024 18:09:11 +0200 Subject: [PATCH 02/18] feat: fetch_from_url --- music_kraken/download/page_attributes.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index f37517e..8b433b1 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -109,15 +109,24 @@ class Pages: if page is None: return None + # getting the appropriate function for the page and the object type source_type = page.get_source_type(source) if not hasattr(page, fetch_map[source_type]): return None - func = getattr(page, fetch_map[source_type])(source=source, **kwargs) + + # fetching the data object and marking it as fetched data_object: DataObject = func(source=source) data_object.mark_as_fetched(source.hash_url) return data_object + def fetch_from_url(self, url: str) -> Optional[DataObject]: + source = Source.match_url(url, SourcePages.MANUAL) + if source is None: + return None + + return self.fetch_from_source(source=source) + def is_downloadable(self, music_object: DataObject) -> bool: _page_types = set(self._source_to_page) for src in music_object.source_collection.source_pages: From b09d6f2691ef09ca2db9b5f8f414565528c287dd Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 13 May 2024 21:45:12 +0200 Subject: [PATCH 03/18] draft: rewriting downloading --- music_kraken/download/__init__.py | 19 +++++ music_kraken/download/page_attributes.py | 102 ++++++++++++++++++----- music_kraken/objects/parents.py | 11 +++ music_kraken/pages/abstract.py | 2 +- music_kraken/pages/bandcamp.py | 2 +- music_kraken/pages/musify.py | 2 +- 6 files changed, 114 insertions(+), 24 deletions(-) diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py index e69de29..05fb122 100644 --- a/music_kraken/download/__init__.py +++ b/music_kraken/download/__init__.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass, field + +from ..utils.config import main_settings +from ..utils.enums.album import AlbumType + + +@dataclass +class FetchOptions: + download_all: bool = False + album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + + +@dataclass +class DownloadOptions: + download_all: bool = False + album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + + process_audio_if_found: bool = False + process_metadata_if_found: bool = True diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 8b433b1..ea8f0bf 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -1,12 +1,15 @@ -from typing import Tuple, Type, Dict, Set, Optional +from typing import Tuple, Type, Dict, Set, Optional, List +from collections import defaultdict +from . import FetchOptions, DownloadOptions from .results import SearchResults -from ..objects import DatabaseObject as DataObject, Source +from ..objects import DatabaseObject as DataObject, Source, Album, Song, Artist, Label from ..utils.config import youtube_settings from ..utils.enums.source import SourcePages from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.query import Query +from ..utils.support_classes.download_result import DownloadResult from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES @@ -50,7 +53,10 @@ if DEBUG_PAGES: class Pages: - def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: + def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.download_options: DownloadOptions = download_options or DownloadOptions() + self.fetch_options: FetchOptions = fetch_options or FetchOptions() + # initialize all page instances self._page_instances: Dict[Type[Page], Page] = dict() self._source_to_page: Dict[SourcePages, Type[Page]] = dict() @@ -68,12 +74,12 @@ class Pages: self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages) self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set) - + self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES) self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) for page_type in self.pages: - self._page_instances[page_type] = page_type() + self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options) self._source_to_page[page_type.SOURCE_TYPE] = page_type def _get_page_from_enum(self, source_page: SourcePages) -> Page: @@ -92,7 +98,7 @@ class Pages: return result - def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject: + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): return data_object @@ -136,23 +142,77 @@ class Pages: audio_pages = self._audio_pages_set.intersection(_page_types) return len(audio_pages) > 0 - def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: - if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): - return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") - - self.fetch_details(music_object) - - _page_types = set(self._source_to_page) - for src in music_object.source_collection.source_pages: - if src in self._source_to_page: - _page_types.add(self._source_to_page[src]) - - audio_pages = self._audio_pages_set.intersection(_page_types) + def _skip_object(self, data_object: DataObject) -> bool: + if isinstance(data_object, Album): + if not self.download_options.download_all and data_object.album_type in self.download_options.album_type_blacklist: + return True - for download_page in audio_pages: - return self._page_instances[download_page].download(music_object=music_object, genre=genre) + return False + + def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: + # fetch the given object + self.fetch_details(data_object) - return DownloadResult(error_message=f"No audio source has been found for {music_object}.") + # fetching all parent objects (e.g. if you only download a song) + if not kwargs.get("fetched_upwards", False): + to_fetch: List[DataObject] = [data_object] + + while len(to_fetch) > 0: + new_to_fetch = [] + for d in to_fetch: + if self._skip_object(d): + continue + + self.fetch_details(d) + + for c in d.get_parent_collections(): + new_to_fetch.extend(c) + + to_fetch = new_to_fetch + + kwargs["fetched_upwards"] = True + + # download all children + download_result: DownloadResult = DownloadResult() + for c in data_object.get_children(): + for d in c: + if self._skip_object(d): + continue + + download_result.merge(self.download(d, genre, **kwargs)) + + # actually download if the object is a song + if isinstance(data_object, Song): + """ + TODO + add the traced artist and album to the naming. + I am able to do that, because duplicate values are removed later on. + """ + + self._download_song(data_object, naming={ + "genre": [genre], + "audio_format": main_settings["audio_format"], + }) + + return download_result + + def _download_song(self, song: Song, naming: dict) -> DownloadOptions: + # manage the naming + naming: Dict[str, List[str]] = defaultdict(list, naming) + naming["song"].append(song.title_string) + naming["isrc"].append(song.isrc) + naming["album"].extend(a.title_string for a in song.album_collection) + naming["album_type"].extend(a.album_type.value for a in song.album_collection) + naming["artist"].extend(a.name for a in song.main_artist_collection) + naming["artist"].extend(a.name for a in song.feature_artist_collection) + for a in song.album_collection: + naming["label"].extend([l.title_string for l in a.label_collection]) + # removing duplicates from the naming + for key, value in naming.items(): + # https://stackoverflow.com/a/17016257 + naming[key] = list(dict.fromkeys(items)) + + return DownloadOptions() def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: source = Source.match_url(url, SourcePages.MANUAL) diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 0b8504e..51eb8e6 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -205,6 +205,7 @@ class OuterProxy: if __other is None: return + a_id = self.id a = self b = __other @@ -227,6 +228,8 @@ class OuterProxy: a._inner.__merge__(old_inner, **kwargs) del old_inner + self.id = a_id + def __merge__(self, __other: Optional[OuterProxy], **kwargs): self.merge(__other, **kwargs) @@ -337,3 +340,11 @@ class OuterProxy: def __repr__(self): return f"{type(self).__name__}({self.title_string})" + + def get_child_collections(self): + for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: + yield self.__getattribute__(collection_string_attribute) + + def get_parent_collections(self): + for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES: + yield self.__getattribute__(collection_string_attribute) diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 965f836..22e0a7d 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -107,7 +107,7 @@ class Page: This is an abstract class, laying out the functionality for every other class fetching something """ - + DOWNLOAD_PRIORITY: int = 0 SOURCE_TYPE: SourcePages LOGGER = logging.getLogger("this shouldn't be used") diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index fb446d0..4a8fd6c 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -49,7 +49,7 @@ class BandcampTypes(Enum): class Bandcamp(Page): - # CHANGE + DOWNLOAD_PRIORITY = 10 SOURCE_TYPE = SourcePages.BANDCAMP LOGGER = logging_settings["bandcamp_logger"] diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 59d01b8..454425d 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -111,7 +111,7 @@ def parse_url(url: str) -> MusifyUrl: class Musify(Page): - # CHANGE + DOWNLOAD_PRIORITY = 9 SOURCE_TYPE = SourcePages.MUSIFY LOGGER = logging_settings["musify_logger"] From 8c369d79e4d7a3d4fbf4c0b79ea30c29d9dacba5 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 13 May 2024 21:51:32 +0200 Subject: [PATCH 04/18] draft: rewriting downloading --- music_kraken/download/page_attributes.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index ea8f0bf..a658723 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -5,6 +5,7 @@ from . import FetchOptions, DownloadOptions from .results import SearchResults from ..objects import DatabaseObject as DataObject, Source, Album, Song, Artist, Label +from ..utils.string_processing import fit_to_file_system from ..utils.config import youtube_settings from ..utils.enums.source import SourcePages from ..utils.support_classes.download_result import DownloadResult @@ -197,6 +198,9 @@ class Pages: return download_result def _download_song(self, song: Song, naming: dict) -> DownloadOptions: + # pre process the data recursively + song.compile() + # manage the naming naming: Dict[str, List[str]] = defaultdict(list, naming) naming["song"].append(song.title_string) @@ -207,10 +211,16 @@ class Pages: naming["artist"].extend(a.name for a in song.feature_artist_collection) for a in song.album_collection: naming["label"].extend([l.title_string for l in a.label_collection]) - # removing duplicates from the naming + # removing duplicates from the naming, and process the strings for key, value in naming.items(): # https://stackoverflow.com/a/17016257 naming[key] = list(dict.fromkeys(items)) + naming[key] = [fit_to_file_system(i) for i in naming[key] if i is not None] + + # get every possible path + path_format = [*main_settings["download_path"].split("/"), main_settings["download_file"]] + every_possible_path: Set[str] = set() + return DownloadOptions() From bb32fc7647ce3f9b83c894782051ff048946cda7 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 14 May 2024 00:28:05 +0200 Subject: [PATCH 05/18] draft: rewriting downloading --- music_kraken/download/page_attributes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index a658723..13d6ee8 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -1,5 +1,6 @@ from typing import Tuple, Type, Dict, Set, Optional, List from collections import defaultdict +from pathlib import Path from . import FetchOptions, DownloadOptions from .results import SearchResults @@ -218,9 +219,9 @@ class Pages: naming[key] = [fit_to_file_system(i) for i in naming[key] if i is not None] # get every possible path - path_format = [*main_settings["download_path"].split("/"), main_settings["download_file"]] - every_possible_path: Set[str] = set() - + path_format = Path(main_settings["download_path"], main_settings["download_file"]) + for part in path_format.parts: + pass return DownloadOptions() From da8887b279f77b8846fc0cdce355413c52cc95e5 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 14 May 2024 15:18:17 +0200 Subject: [PATCH 06/18] draft: rewriting soure --- .vscode/settings.json | 2 + music_kraken/download/__init__.py | 1 + music_kraken/download/page_attributes.py | 90 +++++++++++++++---- music_kraken/download/results.py | 2 +- music_kraken/objects/__init__.py | 2 +- music_kraken/objects/source.py | 44 ++++----- music_kraken/pages/abstract.py | 73 ++------------- music_kraken/pages/bandcamp.py | 6 +- music_kraken/pages/encyclopaedia_metallum.py | 12 +-- music_kraken/pages/musify.py | 8 +- music_kraken/pages/preset.py | 65 -------------- music_kraken/pages/youtube.py | 5 +- .../pages/youtube_music/_list_render.py | 1 - .../youtube_music/_music_object_render.py | 5 +- .../pages/youtube_music/super_youtube.py | 10 +-- .../pages/youtube_music/youtube_music.py | 8 +- music_kraken/utils/enums/__init__.py | 54 ++++++++++- music_kraken/utils/enums/source.py | 40 --------- music_kraken/utils/exception/__init__.py | 12 +++ 19 files changed, 198 insertions(+), 242 deletions(-) delete mode 100644 music_kraken/pages/preset.py delete mode 100644 music_kraken/utils/enums/source.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 2108a96..a8c503e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "APIC", "Bandcamp", "bitrate", + "DEEZER", "dotenv", "encyclopaedia", "ENDC", @@ -27,6 +28,7 @@ "isrc", "levenshtein", "metallum", + "MUSICBRAINZ", "musify", "OKBLUE", "OKGREEN", diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py index 05fb122..a52bd87 100644 --- a/music_kraken/download/__init__.py +++ b/music_kraken/download/__init__.py @@ -15,5 +15,6 @@ class DownloadOptions: download_all: bool = False album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + download_again_if_found: bool = False process_audio_if_found: bool = False process_metadata_if_found: bool = True diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 13d6ee8..8d9bb42 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -1,17 +1,29 @@ from typing import Tuple, Type, Dict, Set, Optional, List from collections import defaultdict from pathlib import Path +import re from . import FetchOptions, DownloadOptions from .results import SearchResults -from ..objects import DatabaseObject as DataObject, Source, Album, Song, Artist, Label - +from ..objects import ( + DatabaseObject as DataObject, + Collection, + Target, + Source, + Options, + Song, + Album, + Artist, + Label, +) from ..utils.string_processing import fit_to_file_system -from ..utils.config import youtube_settings -from ..utils.enums.source import SourcePages +from ..utils.config import youtube_settings, main_settings +from ..utils.path_manager import LOCATIONS +from ..utils.enums import SourceType from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult +from ..utils.exception import MKMissingNameException from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES @@ -61,7 +73,7 @@ class Pages: # initialize all page instances self._page_instances: Dict[Type[Page], Page] = dict() - self._source_to_page: Dict[SourcePages, Type[Page]] = dict() + self._source_to_page: Dict[SourceType, Type[Page]] = dict() exclude_pages = exclude_pages if exclude_pages is not None else set() @@ -84,7 +96,7 @@ class Pages: self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options) self._source_to_page[page_type.SOURCE_TYPE] = page_type - def _get_page_from_enum(self, source_page: SourcePages) -> Page: + def _get_page_from_enum(self, source_page: SourceType) -> Page: if source_page not in self._source_to_page: return None return self._page_instances[self._source_to_page[source_page]] @@ -113,7 +125,7 @@ class Pages: return data_object def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: - page: Page = self._get_page_from_enum(source.page_enum) + page: Page = self._get_page_from_enum(source.source_type) if page is None: return None @@ -129,7 +141,7 @@ class Pages: return data_object def fetch_from_url(self, url: str) -> Optional[DataObject]: - source = Source.match_url(url, SourcePages.MANUAL) + source = Source.match_url(url, SourceType.MANUAL) if source is None: return None @@ -198,13 +210,39 @@ class Pages: return download_result + def _extract_fields_from_template(self, path_template: str) -> Set[str]: + return set(re.findall(r"{([^}]+)}", path_template)) + + def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str: + field_names: Set[str] = self._extract_fields_from_template(path_template) + + for field in field_names: + if len(naming[field]) == 0: + raise MKMissingNameException(f"Missing field for {field}.") + + path_template = path_template.replace(f"{{{field}}}", naming[field][0]) + + return possible_parts + + def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]: + pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()] + pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True) + return list(pages) + def _download_song(self, song: Song, naming: dict) -> DownloadOptions: + """ + TODO + Search the song in the file system. + """ + r = DownloadResult(total=1) + # pre process the data recursively song.compile() # manage the naming naming: Dict[str, List[str]] = defaultdict(list, naming) naming["song"].append(song.title_string) + naming["genre"].append(song.genre) naming["isrc"].append(song.isrc) naming["album"].extend(a.title_string for a in song.album_collection) naming["album_type"].extend(a.album_type.value for a in song.album_collection) @@ -216,21 +254,41 @@ class Pages: for key, value in naming.items(): # https://stackoverflow.com/a/17016257 naming[key] = list(dict.fromkeys(items)) - naming[key] = [fit_to_file_system(i) for i in naming[key] if i is not None] - # get every possible path - path_format = Path(main_settings["download_path"], main_settings["download_file"]) - for part in path_format.parts: - pass + # manage the targets + tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) + found_on_disc = False - return DownloadOptions() + song.target_collection.append(Target( + relative_to_music_dir=True, + file_path=Path( + self._parse_path_template(main_settings["download_path"], naming=naming), + self._parse_path_template(main_settings["download_file"], naming=naming), + ) + )) + for target in song.target_collection: + if target.exists(): + output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) + + found_on_disc = True + r.found_on_disk += 1 + target.copy_content(tmp) + else: + target.create_parent_directories() + output(f'- {target.file_path}', color=BColors.GREY) + + # actually download + for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"): + r = page.download_song_to_target(song, tmp, r) + + return r def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: - source = Source.match_url(url, SourcePages.MANUAL) + source = Source.match_url(url, SourceType.MANUAL) if source is None: raise UrlNotFoundException(url=url) - _actual_page = self._source_to_page[source.page_enum] + _actual_page = self._source_to_page[source.source_type] return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) \ No newline at end of file diff --git a/music_kraken/download/results.py b/music_kraken/download/results.py index a8fead7..00afea9 100644 --- a/music_kraken/download/results.py +++ b/music_kraken/download/results.py @@ -2,7 +2,7 @@ from typing import Tuple, Type, Dict, List, Generator, Union from dataclasses import dataclass from ..objects import DatabaseObject -from ..utils.enums.source import SourcePages +from ..utils.enums.source import SourceType from ..pages import Page, EncyclopaediaMetallum, Musify diff --git a/music_kraken/objects/__init__.py b/music_kraken/objects/__init__.py index 0504353..7c7515d 100644 --- a/music_kraken/objects/__init__.py +++ b/music_kraken/objects/__init__.py @@ -3,7 +3,7 @@ from .option import Options from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp -from .source import Source, SourcePages, SourceTypes +from .source import Source, SourceType from .song import ( Song, diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index ff68d6a..0385bca 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -8,7 +8,7 @@ from dataclasses import dataclass, field from functools import cached_property from ..utils import generate_id -from ..utils.enums.source import SourcePages, SourceTypes +from ..utils.enums import SourceType from ..utils.config import youtube_settings from ..utils.string_processing import hash_url, shorten_display_url @@ -20,22 +20,22 @@ from .collection import Collection @dataclass class Source: - page_enum: SourcePages + source_type: SourceType url: str - referrer_page: SourcePages = None + referrer_page: SourceType = None audio_url: Optional[str] = None additional_data: dict = field(default_factory=dict) def __post_init__(self): - self.referrer_page = self.referrer_page or self.page_enum + self.referrer_page = self.referrer_page or self.source_type @property def parsed_url(self) -> ParseResult: return urlparse(self.url) @classmethod - def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]: + def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]: """ this shouldn't be used, unless you are not certain what the source is for the reason is that it is more inefficient @@ -44,38 +44,38 @@ class Source: url = parsed_url.geturl() if "musify" in parsed_url.netloc: - return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page) + return cls(SourceType.MUSIFY, url, referrer_page=referrer_page) if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: - return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page) + return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): - return cls(SourcePages.DEEZER, url, referrer_page=referrer_page) + return cls(SourceType.DEEZER, url, referrer_page=referrer_page) if url.startswith("https://open.spotify.com"): - return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page) + return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page) if "bandcamp" in url: - return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page) + return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page) if "wikipedia" in parsed_url.netloc: - return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page) + return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): - return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) + return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) # the less important once if url.startswith("https://www.facebook"): - return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page) + return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page) if url.startswith("https://www.instagram"): - return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page) + return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page) if url.startswith("https://twitter"): - return cls(SourcePages.TWITTER, url, referrer_page=referrer_page) + return cls(SourceType.TWITTER, url, referrer_page=referrer_page) if url.startswith("https://myspace.com"): - return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) + return cls(SourceType.MYSPACE, url, referrer_page=referrer_page) @property def hash_url(self) -> str: @@ -89,21 +89,21 @@ class Source: return r def __repr__(self) -> str: - return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})" + return f"Src({self.source_type.value}: {shorten_display_url(self.url)})" def __merge__(self, other: Source, **kwargs): if self.audio_url is None: self.audio_url = other.audio_url self.additional_data.update(other.additional_data) - page_str = property(fget=lambda self: self.page_enum.value) + page_str = property(fget=lambda self: self.source_type.value) class SourceCollection: __change_version__ = generate_id() _indexed_sources: Dict[str, Source] - _page_to_source_list: Dict[SourcePages, List[Source]] + _page_to_source_list: Dict[SourceType, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list = defaultdict(list) @@ -111,7 +111,7 @@ class SourceCollection: self.extend(data or []) - def has_source_page(self, *source_pages: SourcePages) -> bool: + def has_source_page(self, *source_pages: SourceType) -> bool: return any(source_page in self._page_to_source_list for source_page in source_pages) def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: @@ -135,7 +135,7 @@ class SourceCollection: existing_source.__merge__(source) source = existing_source else: - self._page_to_source_list[source.page_enum].append(source) + self._page_to_source_list[source.source_type].append(source) changed = False for key in source.indexing_values: @@ -157,7 +157,7 @@ class SourceCollection: self.extend(other) @property - def source_pages(self) -> Iterable[SourcePages]: + def source_pages(self) -> Iterable[SourceType]: return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) @property diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 22e0a7d..2e0ae49 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -22,7 +22,7 @@ from ..objects import ( Collection, Label, ) -from ..utils.enums.source import SourcePages +from ..utils.enums import SourceType from ..utils.enums.album import AlbumType from ..audio import write_metadata_to_target, correct_codec from ..utils.config import main_settings @@ -47,72 +47,15 @@ class DownloadOptions: process_audio_if_found: bool = False process_metadata_if_found: bool = True -class NamingDict(dict): - CUSTOM_KEYS: Dict[str, str] = { - "label": "label.name", - "artist": "artist.name", - "song": "song.title", - "isrc": "song.isrc", - "album": "album.title", - "album_type": "album.album_type_string" - } - - def __init__(self, values: dict, object_mappings: Dict[str, DatabaseObject] = None): - self.object_mappings: Dict[str, DatabaseObject] = object_mappings or dict() - - super().__init__(values) - self["audio_format"] = main_settings["audio_format"] - - def add_object(self, music_object: DatabaseObject): - self.object_mappings[type(music_object).__name__.lower()] = music_object - - def copy(self) -> dict: - return type(self)(super().copy(), self.object_mappings.copy()) - - def __getitem__(self, key: str) -> str: - return fit_to_file_system(super().__getitem__(key)) - - def default_value_for_name(self, name: str) -> str: - return f'Various {name.replace("_", " ").title()}' - - def __missing__(self, key: str) -> str: - if "." not in key: - if key not in self.CUSTOM_KEYS: - return self.default_value_for_name(key) - - key = self.CUSTOM_KEYS[key] - - frag_list = key.split(".") - - object_name = frag_list[0].strip().lower() - attribute_name = frag_list[-1].strip().lower() - - if object_name not in self.object_mappings: - return self.default_value_for_name(attribute_name) - - music_object = self.object_mappings[object_name] - try: - value = getattr(music_object, attribute_name) - if value is None: - return self.default_value_for_name(attribute_name) - - return str(value) - - except AttributeError: - return self.default_value_for_name(attribute_name) - - class Page: - """ - This is an abstract class, laying out the - functionality for every other class fetching something - """ - DOWNLOAD_PRIORITY: int = 0 - SOURCE_TYPE: SourcePages - LOGGER = logging.getLogger("this shouldn't be used") + SOURCE_TYPE: SourceType + LOGGER: LOGGER - # set this to true, if all song details can also be fetched by fetching album details - NO_ADDITIONAL_DATA_FROM_SONG = False + def __new__(cls, *args, **kwargs): + cls.SOURCE_TYPE.register_page(cls) + cls.LOGGER = logging.getLogger(cls.__name__) + + return super().__new__(cls) def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): self.download_options: DownloadOptions = download_options or DownloadOptions() diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 4a8fd6c..44bc6a1 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -10,7 +10,7 @@ from .abstract import Page from ..objects import ( Artist, Source, - SourcePages, + SourceType, Song, Album, Label, @@ -23,6 +23,7 @@ from ..objects import ( ) from ..connection import Connection from ..utils import dump_to_file +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.support_classes.download_result import DownloadResult from ..utils.string_processing import clean_song_title from ..utils.config import main_settings, logging_settings @@ -49,8 +50,7 @@ class BandcampTypes(Enum): class Bandcamp(Page): - DOWNLOAD_PRIORITY = 10 - SOURCE_TYPE = SourcePages.BANDCAMP + SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP LOGGER = logging_settings["bandcamp_logger"] def __init__(self, *args, **kwargs): diff --git a/music_kraken/pages/encyclopaedia_metallum.py b/music_kraken/pages/encyclopaedia_metallum.py index dba4527..6ebd1d7 100644 --- a/music_kraken/pages/encyclopaedia_metallum.py +++ b/music_kraken/pages/encyclopaedia_metallum.py @@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode from ..connection import Connection from ..utils.config import logging_settings from .abstract import Page -from ..utils.enums.source import SourcePages +from ..utils.enums.source import SourceType from ..utils.enums.album import AlbumType from ..utils.support_classes.query import Query from ..objects import ( @@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title= _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html) ], source_list=[ - Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id) + Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id) ] ) @@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist: return Artist( name=artist_name, source_list=[ - Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url) + Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url) ] ) @@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al title=album_name, album_type=album_type, source_list=[ - Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url) + Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url) ], artist_list=[ _artist_from_json(artist_html=artist_html) @@ -207,7 +207,7 @@ def create_grid( class EncyclopaediaMetallum(Page): - SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM + SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM LOGGER = logging_settings["metal_archives_logger"] def __init__(self, **kwargs): @@ -832,7 +832,7 @@ class EncyclopaediaMetallum(Page): ) def get_source_type(self, source: Source): - if self.SOURCE_TYPE != source.page_enum: + if self.SOURCE_TYPE != source.source_type: return None url = source.url diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 454425d..cf612af 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup from ..connection import Connection from .abstract import Page -from ..utils.enums.source import SourcePages +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums.album import AlbumType, AlbumStatus from ..objects import ( Artist, @@ -111,9 +111,7 @@ def parse_url(url: str) -> MusifyUrl: class Musify(Page): - DOWNLOAD_PRIORITY = 9 - SOURCE_TYPE = SourcePages.MUSIFY - LOGGER = logging_settings["musify_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.MUSIFY HOST = "https://musify.club" @@ -505,7 +503,7 @@ class Musify(Page): iframe_list: List[BeautifulSoup] = video_container.findAll("iframe") for iframe in iframe_list: source_list.append(Source( - SourcePages.YOUTUBE, + SourceType.YOUTUBE, iframe["src"], referrer_page=self.SOURCE_TYPE )) diff --git a/music_kraken/pages/preset.py b/music_kraken/pages/preset.py deleted file mode 100644 index 0755089..0000000 --- a/music_kraken/pages/preset.py +++ /dev/null @@ -1,65 +0,0 @@ -from typing import List, Optional, Type -from urllib.parse import urlparse -import logging - - -from ..objects import Source, DatabaseObject -from .abstract import Page -from ..objects import ( - Artist, - Source, - SourcePages, - Song, - Album, - Label, - Target -) -from ..connection import Connection -from ..utils.support_classes.query import Query -from ..utils.support_classes.download_result import DownloadResult - -class Preset(Page): - # CHANGE - SOURCE_TYPE = SourcePages.PRESET - LOGGER = logging.getLogger("preset") - - def __init__(self, *args, **kwargs): - self.connection: Connection = Connection( - host="https://www.preset.cum/", - logger=self.LOGGER - ) - - super().__init__(*args, **kwargs) - - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: - return super().get_source_type(source) - - def general_search(self, search_query: str) -> List[DatabaseObject]: - return [] - - def label_search(self, label: Label) -> List[Label]: - return [] - - def artist_search(self, artist: Artist) -> List[Artist]: - return [] - - def album_search(self, album: Album) -> List[Album]: - return [] - - def song_search(self, song: Song) -> List[Song]: - return [] - - def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - return Song() - - def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - return Album() - - def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: - return Artist() - - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: - return Label() - - def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: - return DownloadResult() diff --git a/music_kraken/pages/youtube.py b/music_kraken/pages/youtube.py index afc5501..5dda132 100644 --- a/music_kraken/pages/youtube.py +++ b/music_kraken/pages/youtube.py @@ -9,7 +9,6 @@ from .abstract import Page from ..objects import ( Artist, Source, - SourcePages, Song, Album, Label, @@ -19,6 +18,7 @@ from ..objects import ( ) from ..connection import Connection from ..utils.string_processing import clean_song_title +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.support_classes.download_result import DownloadResult from ..utils.config import youtube_settings, main_settings, logging_settings @@ -39,8 +39,7 @@ def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: s class YouTube(SuperYouTube): # CHANGE - SOURCE_TYPE = SourcePages.YOUTUBE - LOGGER = logging_settings["youtube_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE NO_ADDITIONAL_DATA_FROM_SONG = False diff --git a/music_kraken/pages/youtube_music/_list_render.py b/music_kraken/pages/youtube_music/_list_render.py index bb6f40b..2158385 100644 --- a/music_kraken/pages/youtube_music/_list_render.py +++ b/music_kraken/pages/youtube_music/_list_render.py @@ -7,7 +7,6 @@ from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, Song, Album, Label, diff --git a/music_kraken/pages/youtube_music/_music_object_render.py b/music_kraken/pages/youtube_music/_music_object_render.py index 831d50d..43aee3e 100644 --- a/music_kraken/pages/youtube_music/_music_object_render.py +++ b/music_kraken/pages/youtube_music/_music_object_render.py @@ -3,12 +3,13 @@ from enum import Enum from ...utils.config import youtube_settings, logging_settings from ...utils.string_processing import clean_song_title +from ...utils.enums import SourceType, ALL_SOURCE_TYPES + from ...objects import Source, DatabaseObject from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, Song, Album, Label, @@ -18,7 +19,7 @@ from ...objects import ( LOGGER = logging_settings["youtube_music_logger"] -SOURCE_PAGE = SourcePages.YOUTUBE_MUSIC +SOURCE_PAGE = ALL_SOURCE_TYPES.YOUTUBE class PageType(Enum): diff --git a/music_kraken/pages/youtube_music/super_youtube.py b/music_kraken/pages/youtube_music/super_youtube.py index 3ba1cee..df900a1 100644 --- a/music_kraken/pages/youtube_music/super_youtube.py +++ b/music_kraken/pages/youtube_music/super_youtube.py @@ -10,7 +10,6 @@ from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, Song, Album, Label, @@ -21,6 +20,7 @@ from ...objects import ( from ...connection import Connection from ...utils.support_classes.download_result import DownloadResult from ...utils.config import youtube_settings, logging_settings, main_settings +from ...utils.enums import SourceType, ALL_SOURCE_TYPES def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str: @@ -50,7 +50,7 @@ class YouTubeUrl: """ def __init__(self, url: str) -> None: - self.SOURCE_TYPE = SourcePages.YOUTUBE + self.SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE """ Raises Index exception for wrong url, and value error for not found enum type @@ -58,9 +58,6 @@ class YouTubeUrl: self.id = "" parsed = urlparse(url=url) - if parsed.netloc == "music.youtube.com": - self.SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC - self.url_type: YouTubeUrlType type_frag_list = parsed.path.split("/") @@ -124,8 +121,7 @@ class YouTubeUrl: class SuperYouTube(Page): # CHANGE - SOURCE_TYPE = SourcePages.YOUTUBE - LOGGER = logging_settings["youtube_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE NO_ADDITIONAL_DATA_FROM_SONG = False diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 9339c9f..d62df42 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -27,7 +27,7 @@ from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, + SourceType, Song, Album, Label, @@ -176,7 +176,7 @@ ALBUM_TYPE_MAP = { class YoutubeMusic(SuperYouTube): # CHANGE - SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC + SOURCE_TYPE = SourceType.YOUTUBE_MUSIC LOGGER = logging_settings["youtube_music_logger"] def __init__(self, *args, ydl_opts: dict = None, **kwargs): @@ -619,7 +619,7 @@ class YoutubeMusic(SuperYouTube): Artist( name=name, source_list=[Source( - SourcePages.YOUTUBE_MUSIC, + SourceType.YOUTUBE_MUSIC, f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}" )] ) for name in artist_names] @@ -640,7 +640,7 @@ class YoutubeMusic(SuperYouTube): artwork=Artwork(*ydl_res.get("thumbnails", [])), main_artist_list=artist_list, source_list=[Source( - SourcePages.YOUTUBE_MUSIC, + SourceType.YOUTUBE_MUSIC, f"https://music.youtube.com/watch?v={ydl_res.get('id')}" ), source], ) diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index b90f9aa..e460a03 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -1 +1,53 @@ -from .source import SourcePages \ No newline at end of file +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, TYPE_CHECKING, Type +if TYPE_CHECKING: + from ...pages.abstract import Page + +@dataclass +class SourceType: + name: str + homepage: Optional[str] = None + download_priority: int = 0 + + page_type: Type[Page] = None + page: Page = None + + + def register_page(self, page_type: Type[Page]): + self.page_type = page + self.page = page_type() + + @property + def has_page(self) -> bool: + return self.page is not None + + # for backwards compatibility + @property + def value(self) -> str: + return self.name + + +class ALL_SOURCE_TYPES: + YOUTUBE = SourceType(name="youtube", homepage="https://music.youtube.com/") + BANDCAMP = SourceType(name="bandcamp", homepage="https://bandcamp.com/", download_priority=10) + MUSIFY = SourceType(name="musify", homepage="https://musify.club/", download_priority=7) + + GENIUS = SourceType(name="genius", homepage="https://genius.com/") + MUSICBRAINZ = SourceType(name="musicbrainz", homepage="https://musicbrainz.org/") + ENCYCLOPAEDIA_METALLUM = SourceType(name="encyclopaedia metallum") + DEEZER = SourceType(name="deezer", homepage="https://www.deezer.com/") + SPOTIFY = SourceType(name="spotify", homepage="https://open.spotify.com/") + + # This has nothing to do with audio, but bands can be here + WIKIPEDIA = SourceType(name="wikipedia", homepage="https://en.wikipedia.org/wiki/Main_Page") + INSTAGRAM = SourceType(name="instagram", homepage="https://www.instagram.com/") + FACEBOOK = SourceType(name="facebook", homepage="https://www.facebook.com/") + TWITTER = SourceType(name="twitter", homepage="https://twitter.com/") + # Yes somehow this ancient site is linked EVERYWHERE + MYSPACE = SourceType(name="myspace", homepage="https://myspace.com/") + + MANUAL = SourceType(name="manual") + + PRESET = SourceType(name="preset") diff --git a/music_kraken/utils/enums/source.py b/music_kraken/utils/enums/source.py deleted file mode 100644 index be3171f..0000000 --- a/music_kraken/utils/enums/source.py +++ /dev/null @@ -1,40 +0,0 @@ -from enum import Enum - - -class SourceTypes(Enum): - SONG = "song" - ALBUM = "album" - ARTIST = "artist" - LYRICS = "lyrics" - - -class SourcePages(Enum): - YOUTUBE = "youtube", "https://www.youtube.com/" - MUSIFY = "musify", "https://musify.club/" - YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/" - GENIUS = "genius", "https://genius.com/" - MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/" - ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" - BANDCAMP = "bandcamp", "https://bandcamp.com/" - DEEZER = "deezer", "https://www.deezer.com/" - SPOTIFY = "spotify", "https://open.spotify.com/" - - # This has nothing to do with audio, but bands can be here - WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page" - INSTAGRAM = "instagram", "https://www.instagram.com/" - FACEBOOK = "facebook", "https://www.facebook.com/" - TWITTER = "twitter", "https://twitter.com/" - MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE - - MANUAL = "manual", "" - - PRESET = "preset", "" - - def __new__(cls, value, homepage = None): - member = object.__new__(cls) - - member._value_ = value - member.homepage = homepage - - return member - \ No newline at end of file diff --git a/music_kraken/utils/exception/__init__.py b/music_kraken/utils/exception/__init__.py index 746fe78..8f139fb 100644 --- a/music_kraken/utils/exception/__init__.py +++ b/music_kraken/utils/exception/__init__.py @@ -4,8 +4,20 @@ class MKBaseException(Exception): super().__init__(message, **kwargs) +# Downloading +class MKDownloadException(MKBaseException): + pass + + +class MKMissingNameException(MKDownloadException): + pass + + +# Frontend class MKFrontendException(MKBaseException): pass class MKInvalidInputException(MKFrontendException): pass + + From 14f986a497afef4d8f52ede1882fce7642822732 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 11:44:39 +0200 Subject: [PATCH 07/18] draft: rewrote sources --- music_kraken/cli/main_downloader.py | 2 - music_kraken/download/page_attributes.py | 9 --- music_kraken/objects/source.py | 85 +++++++++++++++++++----- music_kraken/pages/abstract.py | 2 - music_kraken/pages/youtube.py | 2 - 5 files changed, 67 insertions(+), 33 deletions(-) diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index 7140ff3..ac91cab 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -178,8 +178,6 @@ class Downloader: page_count = 0 for option in self.current_results.formatted_generator(): if isinstance(option, Option): - _downloadable = self.pages.is_downloadable(option.music_object) - r = f"{BColors.GREY.value}{option.index:0{self.option_digits}}{BColors.ENDC.value} {option.music_object.option_string}" print(r) else: diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 8d9bb42..6cef729 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -146,15 +146,6 @@ class Pages: return None return self.fetch_from_source(source=source) - - def is_downloadable(self, music_object: DataObject) -> bool: - _page_types = set(self._source_to_page) - for src in music_object.source_collection.source_pages: - if src in self._source_to_page: - _page_types.add(self._source_to_page[src]) - - audio_pages = self._audio_pages_set.intersection(_page_types) - return len(audio_pages) > 0 def _skip_object(self, data_object: DataObject) -> bool: if isinstance(data_object, Album): diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 0385bca..6258468 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -2,7 +2,7 @@ from __future__ import annotations from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator +from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator, TypedDict, Callable, Any from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property @@ -29,10 +29,6 @@ class Source: def __post_init__(self): self.referrer_page = self.referrer_page or self.source_type - - @property - def parsed_url(self) -> ParseResult: - return urlparse(self.url) @classmethod def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]: @@ -77,6 +73,18 @@ class Source: if url.startswith("https://myspace.com"): return cls(SourceType.MYSPACE, url, referrer_page=referrer_page) + @property + def has_page(self) -> bool: + return self.source_type.page is not None + + @property + def page(self) -> OuterProxy: + return self.source_type.page + + @property + def parsed_url(self) -> ParseResult: + return urlparse(self.url) + @property def hash_url(self) -> str: return hash_url(self.url) @@ -99,11 +107,17 @@ class Source: page_str = property(fget=lambda self: self.source_type.value) +class SourceTypeSorting(TypedDict): + sort_key: Callable[[SourceType], Any] + reverse: bool + only_with_page: bool + + class SourceCollection: __change_version__ = generate_id() _indexed_sources: Dict[str, Source] - _page_to_source_list: Dict[SourceType, List[Source]] + _sources_by_type: Dict[SourceType, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list = defaultdict(list) @@ -111,15 +125,54 @@ class SourceCollection: self.extend(data or []) - def has_source_page(self, *source_pages: SourceType) -> bool: - return any(source_page in self._page_to_source_list for source_page in source_pages) + def source_types( + self, + only_with_page: bool = False, + sort_key = lambda page: page.name, + reverse: bool = False + ) -> Iterable[SourceType]: + """ + Returns a list of all source types contained in this source collection. - def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: - if not len(source_pages): - source_pages = self.source_pages + Args: + only_with_page (bool, optional): If True, only returns source types that have a page, meaning you can download from them. + sort_key (function, optional): A function that defines the sorting key for the source types. Defaults to lambda page: page.name. + reverse (bool, optional): If True, sorts the source types in reverse order. Defaults to False. - for page in source_pages: - yield from self._page_to_source_list[page] + Returns: + Iterable[SourceType]: A list of source types. + """ + + source_types: List[SourceType] = self._page_to_source_list.keys() + if only_with_page: + source_types = filter(lambda st: st.has_page, source_types) + + return sorted( + source_types, + key=sort_key, + reverse=reverse + ) + + def get_sources(self, *source_types: List[SourceType], source_type_sorting: SourceTypeSorting = None) -> Generator[Source]: + """ + Retrieves sources based on the provided source types and source type sorting. + + Args: + *source_types (List[Source]): Variable number of source types to filter the sources. + source_type_sorting (SourceTypeSorting): Sorting criteria for the source types. This is only relevant if no source types are provided. + + Yields: + Generator[Source]: A generator that yields the sources based on the provided filters. + + Returns: + None + """ + if not len(source_types): + source_type_sorting = source_type_sorting or {} + source_types = self.source_types(**source_type_sorting) + + for source_type in source_types: + yield from self._page_to_source_list[source_type] def append(self, source: Source): if source is None: @@ -156,10 +209,6 @@ class SourceCollection: def __merge__(self, other: SourceCollection, **kwargs): self.extend(other) - @property - def source_pages(self) -> Iterable[SourceType]: - return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) - @property def hash_url_list(self) -> List[str]: return [hash_url(source.url) for source in self.get_sources()] @@ -170,7 +219,7 @@ class SourceCollection: @property def homepage_list(self) -> List[str]: - return [source.homepage for source in self.source_pages] + return [source_type.homepage for source_type in self._sources_by_type.keys()] def indexing_values(self) -> Generator[Tuple[str, str], None, None]: for index in self._indexed_sources: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 2e0ae49..763c110 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -246,8 +246,6 @@ class Page: else: output(f'- {target.file_path}', color=BColors.GREY) - if not song.source_collection.has_source_page(self.SOURCE_TYPE): - return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.") sources = song.source_collection.get_sources(self.SOURCE_TYPE) diff --git a/music_kraken/pages/youtube.py b/music_kraken/pages/youtube.py index 5dda132..5f65631 100644 --- a/music_kraken/pages/youtube.py +++ b/music_kraken/pages/youtube.py @@ -41,8 +41,6 @@ class YouTube(SuperYouTube): # CHANGE SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE - NO_ADDITIONAL_DATA_FROM_SONG = False - def __init__(self, *args, **kwargs): self.connection: Connection = Connection( host=get_invidious_url(), From cc142532394a64d8556074a2bcf506b77628167d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 12:18:08 +0200 Subject: [PATCH 08/18] draft: streaming the audio --- music_kraken/download/page_attributes.py | 50 +++++++++++++++--------- music_kraken/objects/source.py | 18 ++++++++- music_kraken/pages/bandcamp.py | 1 - 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 6cef729..2a4297d 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -16,6 +16,7 @@ from ..objects import ( Artist, Label, ) +from ..audio import write_metadata_to_target, correct_codec from ..utils.string_processing import fit_to_file_system from ..utils.config import youtube_settings, main_settings from ..utils.path_manager import LOCATIONS @@ -125,15 +126,10 @@ class Pages: return data_object def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: - page: Page = self._get_page_from_enum(source.source_type) - if page is None: + if not source.has_page: return None - # getting the appropriate function for the page and the object type - source_type = page.get_source_type(source) - if not hasattr(page, fetch_map[source_type]): - return None - func = getattr(page, fetch_map[source_type])(source=source, **kwargs) + func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs) # fetching the data object and marking it as fetched data_object: DataObject = func(source=source) @@ -215,11 +211,6 @@ class Pages: return possible_parts - def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]: - pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()] - pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True) - return list(pages) - def _download_song(self, song: Song, naming: dict) -> DownloadOptions: """ TODO @@ -248,7 +239,6 @@ class Pages: # manage the targets tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) - found_on_disc = False song.target_collection.append(Target( relative_to_music_dir=True, @@ -260,17 +250,39 @@ class Pages: for target in song.target_collection: if target.exists(): output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) - - found_on_disc = True r.found_on_disk += 1 - target.copy_content(tmp) + + if self.download_options.download_again_if_found: + target.copy_content(tmp) else: target.create_parent_directories() output(f'- {target.file_path}', color=BColors.GREY) - # actually download - for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"): - r = page.download_song_to_target(song, tmp, r) + # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source + used_source: Optional[Source] = None + skip_intervals: List[Tuple[float, float]] = [] + for source in song.source_collection.get_sources(source_type_sorting={ + "only_with_page": True, + "sort_key": lambda page: page.download_priority, + "reverse": True, + }): + if tmp.exists: + break + + used_source = source + streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download") + skip_intervals = source.page.get_skip_intervals(song=song, source=source) + + # if something has been downloaded but it somehow failed, delete the file + if streaming_results.is_fatal_error and tmp.exists: + tmp.delete() + + # if everything went right, the file should exist now + if not tmp.exists: + r.error_message = f"The song {song.option_string} couldn't be downloaded." + return r + + # post process the audio return r diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 6258468..3d0b492 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -2,7 +2,19 @@ from __future__ import annotations from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator, TypedDict, Callable, Any +from typing import ( + List, + Dict, + Set, + Tuple, + Optional, + Iterable, + Generator, + TypedDict, + Callable, + Any, + TYPE_CHECKING +) from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property @@ -15,6 +27,8 @@ from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata from .parents import OuterProxy from .collection import Collection +if TYPE_CHECKING: + from ..pages.abstract import Page @@ -78,7 +92,7 @@ class Source: return self.source_type.page is not None @property - def page(self) -> OuterProxy: + def page(self) -> Page: return self.source_type.page @property diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 44bc6a1..30dbbb0 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -51,7 +51,6 @@ class BandcampTypes(Enum): class Bandcamp(Page): SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP - LOGGER = logging_settings["bandcamp_logger"] def __init__(self, *args, **kwargs): self.connection: Connection = Connection( From ac6c513d5671c9ae286ee00bce931d37672a27a9 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 12:30:54 +0200 Subject: [PATCH 09/18] draft: post process song --- music_kraken/audio/codec.py | 6 +++--- music_kraken/download/page_attributes.py | 16 +++++++++++++++- music_kraken/pages/abstract.py | 8 ++++---- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/music_kraken/audio/codec.py b/music_kraken/audio/codec.py index ccb7bb1..531bc1f 100644 --- a/music_kraken/audio/codec.py +++ b/music_kraken/audio/codec.py @@ -10,12 +10,12 @@ from ..objects import Target LOGGER = logging_settings["codex_logger"] -def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], interval_list: List[Tuple[float, float]] = None): +def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], skip_intervals: List[Tuple[float, float]] = None): if not target.exists: LOGGER.warning(f"Target doesn't exist: {target.file_path}") return - interval_list = interval_list or [] + skip_intervals = skip_intervals or [] bitrate_b = int(bitrate_kb / 1024) @@ -29,7 +29,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au start = 0 next_start = 0 - for end, next_start in interval_list: + for end, next_start in skip_intervals: aselect_list.append(f"between(t,{start},{end})") start = next_start aselect_list.append(f"gte(t,{next_start})") diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 2a4297d..6b52704 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -279,11 +279,25 @@ class Pages: # if everything went right, the file should exist now if not tmp.exists: - r.error_message = f"The song {song.option_string} couldn't be downloaded." + if used_source is None: + r.error_message = f"No source found for {song.option_string}." + else: + r.error_message = f"Something went wrong downloading {song.option_string}." return r # post process the audio + found_on_disk = used_source is None + if not found_on_disk or self.download_options.process_audio_if_found: + correct_codec(target=tmp, skip_intervals=skip_intervals) + r.sponsor_segments = len(skip_intervals) + if used_source is not None: + used_source.page.post_process_hook(song=song, temp_target=tmp) + + if not found_on_disc or self.download_options.process_metadata_if_found: + write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) + + tmp.delete() return r def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 763c110..f542671 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -262,15 +262,15 @@ class Page: r.merge(self._post_process_targets( song=song, temp_target=temp_target, - interval_list=skip_intervals, + skip_intervals=skip_intervals, found_on_disc=found_on_disc, )) return r - def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult: + def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult: if not found_on_disc or self.download_options.process_audio_if_found: - correct_codec(temp_target, interval_list=interval_list) + correct_codec(temp_target, skip_intervals=skip_intervals) self.post_process_hook(song, temp_target) @@ -286,7 +286,7 @@ class Page: r.add_target(target) temp_target.delete() - r.sponsor_segments += len(interval_list) + r.sponsor_segments += len(skip_intervals) return r From bedd0fe819c068161794d2518567618233b90357 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 13:16:11 +0200 Subject: [PATCH 10/18] fix: runtime errors --- development/actual_donwload.py | 5 +- music_kraken/cli/main_downloader.py | 2 +- music_kraken/download/__init__.py | 1 + music_kraken/download/page_attributes.py | 29 ++-- music_kraken/download/results.py | 1 - music_kraken/objects/parents.py | 1 + music_kraken/objects/source.py | 34 ++-- music_kraken/pages/abstract.py | 151 +----------------- music_kraken/pages/bandcamp.py | 3 +- music_kraken/pages/encyclopaedia_metallum.py | 10 +- .../pages/youtube_music/youtube_music.py | 17 +- music_kraken/utils/enums/__init__.py | 7 +- 12 files changed, 65 insertions(+), 196 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index a8eb732..d91876e 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "d: 20", + "s: #a I'm in a coffin", + "0", + "d: 0", ] diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index ac91cab..e3fe2cb 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -317,7 +317,7 @@ class Downloader: for database_object in data_objects: r = self.pages.download( - music_object=database_object, + data_object=database_object, genre=self.genre, **kwargs ) diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py index a52bd87..7ca0086 100644 --- a/music_kraken/download/__init__.py +++ b/music_kraken/download/__init__.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +from typing import Set from ..utils.config import main_settings from ..utils.enums.album import AlbumType diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 6b52704..f6f7786 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List from collections import defaultdict from pathlib import Path import re +import logging from . import FetchOptions, DownloadOptions from .results import SearchResults @@ -17,6 +18,7 @@ from ..objects import ( Label, ) from ..audio import write_metadata_to_target, correct_codec +from ..utils import output, BColors from ..utils.string_processing import fit_to_file_system from ..utils.config import youtube_settings, main_settings from ..utils.path_manager import LOCATIONS @@ -69,6 +71,8 @@ if DEBUG_PAGES: class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.LOGGER = logging.getLogger("download") + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() @@ -118,7 +122,9 @@ class Pages: return data_object source: Source - for source in data_object.source_collection.get_sources(): + for source in data_object.source_collection.get_sources(source_type_sorting={ + "only_with_page": True, + }): new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) if new_data_object is not None: data_object.merge(new_data_object) @@ -129,10 +135,15 @@ class Pages: if not source.has_page: return None - func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs) + source_type = source.page.get_source_type(source=source) + if source_type is None: + self.LOGGER.debug(f"Could not determine source type for {source}.") + return None + + func = getattr(source.page, fetch_map[source_type]) # fetching the data object and marking it as fetched - data_object: DataObject = func(source=source) + data_object: DataObject = func(source=source, **kwargs) data_object.mark_as_fetched(source.hash_url) return data_object @@ -175,7 +186,7 @@ class Pages: # download all children download_result: DownloadResult = DownloadResult() - for c in data_object.get_children(): + for c in data_object.get_child_collections(): for d in c: if self._skip_object(d): continue @@ -209,7 +220,7 @@ class Pages: path_template = path_template.replace(f"{{{field}}}", naming[field][0]) - return possible_parts + return path_template def _download_song(self, song: Song, naming: dict) -> DownloadOptions: """ @@ -235,7 +246,7 @@ class Pages: # removing duplicates from the naming, and process the strings for key, value in naming.items(): # https://stackoverflow.com/a/17016257 - naming[key] = list(dict.fromkeys(items)) + naming[key] = list(dict.fromkeys(value)) # manage the targets tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) @@ -248,14 +259,14 @@ class Pages: ) )) for target in song.target_collection: - if target.exists(): + if target.exists: output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) r.found_on_disk += 1 if self.download_options.download_again_if_found: target.copy_content(tmp) else: - target.create_parent_directories() + target.create_path() output(f'- {target.file_path}', color=BColors.GREY) # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source @@ -294,7 +305,7 @@ class Pages: if used_source is not None: used_source.page.post_process_hook(song=song, temp_target=tmp) - if not found_on_disc or self.download_options.process_metadata_if_found: + if not found_on_disk or self.download_options.process_metadata_if_found: write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) tmp.delete() diff --git a/music_kraken/download/results.py b/music_kraken/download/results.py index 00afea9..2486c26 100644 --- a/music_kraken/download/results.py +++ b/music_kraken/download/results.py @@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union from dataclasses import dataclass from ..objects import DatabaseObject -from ..utils.enums.source import SourceType from ..pages import Page, EncyclopaediaMetallum, Musify diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 51eb8e6..16ebe6a 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set from pathlib import Path import inspect +from .source import SourceCollection from .metadata import Metadata from ..utils import get_unix_time, object_trace, generate_id from ..utils.config import logging_settings, main_settings diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 3d0b492..b227cc8 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -20,13 +20,11 @@ from dataclasses import dataclass, field from functools import cached_property from ..utils import generate_id -from ..utils.enums import SourceType +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.config import youtube_settings from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata -from .parents import OuterProxy -from .collection import Collection if TYPE_CHECKING: from ..pages.abstract import Page @@ -54,38 +52,38 @@ class Source: url = parsed_url.geturl() if "musify" in parsed_url.netloc: - return cls(SourceType.MUSIFY, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page) if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: - return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): - return cls(SourceType.DEEZER, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page) if url.startswith("https://open.spotify.com"): - return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page) if "bandcamp" in url: - return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page) if "wikipedia" in parsed_url.netloc: - return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): - return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) # the less important once if url.startswith("https://www.facebook"): - return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page) if url.startswith("https://www.instagram"): - return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page) if url.startswith("https://twitter"): - return cls(SourceType.TWITTER, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page) if url.startswith("https://myspace.com"): - return cls(SourceType.MYSPACE, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page) @property def has_page(self) -> bool: @@ -134,7 +132,7 @@ class SourceCollection: _sources_by_type: Dict[SourceType, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): - self._page_to_source_list = defaultdict(list) + self._sources_by_type = defaultdict(list) self._indexed_sources = {} self.extend(data or []) @@ -157,7 +155,7 @@ class SourceCollection: Iterable[SourceType]: A list of source types. """ - source_types: List[SourceType] = self._page_to_source_list.keys() + source_types: List[SourceType] = self._sources_by_type.keys() if only_with_page: source_types = filter(lambda st: st.has_page, source_types) @@ -186,7 +184,7 @@ class SourceCollection: source_types = self.source_types(**source_type_sorting) for source_type in source_types: - yield from self._page_to_source_list[source_type] + yield from self._sources_by_type[source_type] def append(self, source: Source): if source is None: @@ -202,7 +200,7 @@ class SourceCollection: existing_source.__merge__(source) source = existing_source else: - self._page_to_source_list[source.source_type].append(source) + self._sources_by_type[source.source_type].append(source) changed = False for key in source.indexing_values: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index f542671..8783dbb 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -49,15 +49,16 @@ class DownloadOptions: class Page: SOURCE_TYPE: SourceType - LOGGER: LOGGER + LOGGER: logging.Logger def __new__(cls, *args, **kwargs): - cls.SOURCE_TYPE.register_page(cls) cls.LOGGER = logging.getLogger(cls.__name__) return super().__new__(cls) def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.SOURCE_TYPE.register_page(self) + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() @@ -145,151 +146,7 @@ class Page: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() - def download( - self, - music_object: DatabaseObject, - genre: str, - ) -> DownloadResult: - naming_dict: NamingDict = NamingDict({"genre": genre}) - - def fill_naming_objects(naming_music_object: DatabaseObject): - nonlocal naming_dict - - for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES: - collection: Collection = getattr(naming_music_object, collection_name) - - if collection.empty: - continue - - dom_ordered_music_object: DatabaseObject = collection[0] - naming_dict.add_object(dom_ordered_music_object) - return fill_naming_objects(dom_ordered_music_object) - - fill_naming_objects(music_object) - - return self._download(music_object, naming_dict) - - def _download( - self, - music_object: DatabaseObject, - naming_dict: NamingDict, - **kwargs - ) -> DownloadResult: - if isinstance(music_object, Song): - output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD) - else: - output(f"Downloading {music_object.option_string}...", color=BColors.BOLD) - - # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False - if isinstance(music_object, Album): - if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist: - return DownloadResult() - - if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG): - self.fetch_details(music_object=music_object, stop_at_level=1) - - if isinstance(music_object, Album): - music_object.update_tracksort() - - naming_dict.add_object(music_object) - - if isinstance(music_object, Song): - return self._download_song(music_object, naming_dict) - - download_result: DownloadResult = DownloadResult() - - for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - collection: Collection = getattr(music_object, collection_name) - - sub_ordered_music_object: DatabaseObject - for sub_ordered_music_object in collection: - download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy())) - - return download_result - - def _download_song(self, song: Song, naming_dict: NamingDict): - song.compile() - if "genre" not in naming_dict and song.genre is not None: - naming_dict["genre"] = song.genre - - if song.genre is None: - song.genre = naming_dict["genre"] - - path_parts = Formatter().parse(main_settings["download_path"]) - file_parts = Formatter().parse(main_settings["download_file"]) - new_target = Target( - relative_to_music_dir=True, - file_path=Path( - main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}), - main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts}) - ) - ) - - if song.target_collection.empty: - song.target_collection.append(new_target) - - r = DownloadResult(1) - temp_target: Target = Target.temp(file_extension=main_settings["audio_format"]) - - found_on_disc = False - target: Target - for target in song.target_collection: - current_exists = target.exists - - if current_exists: - output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) - target.copy_content(temp_target) - found_on_disc = True - - r.found_on_disk += 1 - r.add_target(target) - else: - output(f'- {target.file_path}', color=BColors.GREY) - - - sources = song.source_collection.get_sources(self.SOURCE_TYPE) - - skip_intervals = [] - if not found_on_disc: - for source in sources: - r = self.download_song_to_target(source=source, target=temp_target, desc="downloading") - - if not r.is_fatal_error: - skip_intervals = self.get_skip_intervals(song, source) - break - - if temp_target.exists: - r.merge(self._post_process_targets( - song=song, - temp_target=temp_target, - skip_intervals=skip_intervals, - found_on_disc=found_on_disc, - )) - - return r - - def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult: - if not found_on_disc or self.download_options.process_audio_if_found: - correct_codec(temp_target, skip_intervals=skip_intervals) - - self.post_process_hook(song, temp_target) - - if not found_on_disc or self.download_options.process_metadata_if_found: - write_metadata_to_target(song.metadata, temp_target, song) - - r = DownloadResult() - - target: Target - for target in song.target_collection: - if temp_target is not target: - temp_target.copy_content(target) - r.add_target(target) - - temp_target.delete() - r.sponsor_segments += len(skip_intervals) - - return r - + # to download stuff def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: return [] diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 30dbbb0..c938189 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -62,8 +62,7 @@ class Bandcamp(Page): super().__init__(*args, **kwargs) def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: - parsed_url = urlparse(source.url) - path = parsed_url.path.replace("/", "") + path = source.parsed_url.path.replace("/", "") if path == "" or path.startswith("music"): return Artist diff --git a/music_kraken/pages/encyclopaedia_metallum.py b/music_kraken/pages/encyclopaedia_metallum.py index 6ebd1d7..9c1fefe 100644 --- a/music_kraken/pages/encyclopaedia_metallum.py +++ b/music_kraken/pages/encyclopaedia_metallum.py @@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode from ..connection import Connection from ..utils.config import logging_settings from .abstract import Page -from ..utils.enums.source import SourceType +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums.album import AlbumType from ..utils.support_classes.query import Query from ..objects import ( @@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title= _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html) ], source_list=[ - Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id) ] ) @@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist: return Artist( name=artist_name, source_list=[ - Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url) ] ) @@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al title=album_name, album_type=album_type, source_list=[ - Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url) ], artist_list=[ _artist_from_json(artist_html=artist_html) @@ -207,7 +207,7 @@ def create_grid( class EncyclopaediaMetallum(Page): - SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM + SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM LOGGER = logging_settings["metal_archives_logger"] def __init__(self, **kwargs): diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index d62df42..2e01805 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path from ...utils import dump_to_file -from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork from ..abstract import Page from ...objects import ( - Artist, + DatabaseObject as DataObject, Source, - SourceType, + FormattedText, + ID3Timestamp, + Artwork, + Artist, Song, Album, Label, Target, Lyrics, - FormattedText ) from ...connection import Connection +from ...utils.enums import SourceType, ALL_SOURCE_TYPES from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult @@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = { class YoutubeMusic(SuperYouTube): # CHANGE - SOURCE_TYPE = SourceType.YOUTUBE_MUSIC - LOGGER = logging_settings["youtube_music_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE def __init__(self, *args, ydl_opts: dict = None, **kwargs): self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection( @@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube): default='{}' )) or {} - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: + def get_source_type(self, source: Source) -> Optional[Type[DataObject]]: return super().get_source_type(source) - def general_search(self, search_query: str) -> List[DatabaseObject]: + def general_search(self, search_query: str) -> List[DataObject]: search_query = search_query.strip() urlescaped_query: str = quote(search_query.strip().replace(" ", "+")) diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index e460a03..28f0b9f 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -14,10 +14,11 @@ class SourceType: page_type: Type[Page] = None page: Page = None + def register_page(self, page: Page): + self.page = page - def register_page(self, page_type: Type[Page]): - self.page_type = page - self.page = page_type() + def __hash__(self): + return hash(self.name) @property def has_page(self) -> bool: From 0096dfe5cb6760dc9b2bb0f2105b6ff760591903 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 13:17:36 +0200 Subject: [PATCH 11/18] feat: copying the downloaded music into the final locations --- music_kraken/download/page_attributes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index f6f7786..fd6f234 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -308,6 +308,10 @@ class Pages: if not found_on_disk or self.download_options.process_metadata_if_found: write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) + # copy the tmp target to the final locations + for target in song.target_collection: + tmp.copy_content(target) + tmp.delete() return r From ea4c73158e4d7e988aefd51b6e93ac1e5252cee8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 13:58:44 +0200 Subject: [PATCH 12/18] fix: audio format is replaced completely --- music_kraken/download/page_attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index fd6f234..878db65 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -203,7 +203,7 @@ class Pages: self._download_song(data_object, naming={ "genre": [genre], - "audio_format": main_settings["audio_format"], + "audio_format": [main_settings["audio_format"]], }) return download_result From 017752c4d02d4286ee71d0e0306776509406f00f Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 14:10:32 +0200 Subject: [PATCH 13/18] feat: better download output --- music_kraken/download/page_attributes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 878db65..c6cf68d 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -164,6 +164,7 @@ class Pages: def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: # fetch the given object self.fetch_details(data_object) + output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) # fetching all parent objects (e.g. if you only download a song) if not kwargs.get("fetched_upwards", False): @@ -260,14 +261,14 @@ class Pages: )) for target in song.target_collection: if target.exists: - output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) + output(f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) r.found_on_disk += 1 - if self.download_options.download_again_if_found: + if not self.download_options.download_again_if_found: target.copy_content(tmp) else: target.create_path() - output(f'- {target.file_path}', color=BColors.GREY) + output(f'{target.file_path}', color=BColors.GREY) # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source used_source: Optional[Source] = None From 98afe5047d05d0c6546ad093d56623f62941896b Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 14:21:15 +0200 Subject: [PATCH 14/18] fix: wrong creation of source types --- .vscode/settings.json | 1 + music_kraken/download/page_attributes.py | 6 +++--- music_kraken/pages/musify.py | 13 +++++++++++-- music_kraken/pages/youtube_music/youtube_music.py | 4 ++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index a8c503e..bea0c42 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -25,6 +25,7 @@ "encyclopaedia", "ENDC", "Gitea", + "iframe", "isrc", "levenshtein", "metallum", diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index c6cf68d..cd047f0 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -22,7 +22,7 @@ from ..utils import output, BColors from ..utils.string_processing import fit_to_file_system from ..utils.config import youtube_settings, main_settings from ..utils.path_manager import LOCATIONS -from ..utils.enums import SourceType +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult @@ -148,7 +148,7 @@ class Pages: return data_object def fetch_from_url(self, url: str) -> Optional[DataObject]: - source = Source.match_url(url, SourceType.MANUAL) + source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) if source is None: return None @@ -317,7 +317,7 @@ class Pages: return r def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: - source = Source.match_url(url, SourceType.MANUAL) + source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) if source is None: raise UrlNotFoundException(url=url) diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index cf612af..54d849a 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -502,9 +502,18 @@ class Musify(Page): for video_container in video_container_list: iframe_list: List[BeautifulSoup] = video_container.findAll("iframe") for iframe in iframe_list: + """ + the url could look like this + https://www.youtube.com/embed/sNObCkhzOYA?si=dNVgnZMBNVlNb0P_ + """ + parsed_url = urlparse(iframe["src"]) + path_parts = parsed_url.path.strip("/").split("/") + if path_parts[0] != "embed" or len(path_parts) < 2: + continue + source_list.append(Source( - SourceType.YOUTUBE, - iframe["src"], + ALL_SOURCE_TYPES.YOUTUBE, + f"https://music.youtube.com/watch?v={path_parts[1]}", referrer_page=self.SOURCE_TYPE )) diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 2e01805..5d9e55c 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -620,7 +620,7 @@ class YoutubeMusic(SuperYouTube): Artist( name=name, source_list=[Source( - SourceType.YOUTUBE_MUSIC, + self.SOURCE_TYPE, f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}" )] ) for name in artist_names] @@ -641,7 +641,7 @@ class YoutubeMusic(SuperYouTube): artwork=Artwork(*ydl_res.get("thumbnails", [])), main_artist_list=artist_list, source_list=[Source( - SourceType.YOUTUBE_MUSIC, + self.SOURCE_TYPE, f"https://music.youtube.com/watch?v={ydl_res.get('id')}" ), source], ) From bab6aeb45d2a96b4c5a0bbf4704629bd66e421d5 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 14:26:19 +0200 Subject: [PATCH 15/18] fix: removed double linebreaks from formated text, plaintext --- music_kraken/objects/formatted_text.py | 7 ++++++- music_kraken/objects/lyrics.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/music_kraken/objects/formatted_text.py b/music_kraken/objects/formatted_text.py index 8f9fc52..99e9ae2 100644 --- a/music_kraken/objects/formatted_text.py +++ b/music_kraken/objects/formatted_text.py @@ -38,8 +38,13 @@ class FormattedText: def markdown(self) -> str: return md(self.html).strip() + @property + def plain(self) -> str: + md = self.markdown + return md.replace("\n\n", "\n") + def __str__(self) -> str: return self.markdown - plaintext = markdown + plaintext = plain diff --git a/music_kraken/objects/lyrics.py b/music_kraken/objects/lyrics.py index 65d550d..788e793 100644 --- a/music_kraken/objects/lyrics.py +++ b/music_kraken/objects/lyrics.py @@ -34,6 +34,6 @@ class Lyrics(OuterProxy): @property def metadata(self) -> Metadata: return Metadata({ - id3Mapping.UNSYNCED_LYRICS: [self.text.markdown] + id3Mapping.UNSYNCED_LYRICS: [self.text.plaintext] }) From 1bf04439f050fce39c2aae0b9a5b12b00657abfd Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 14:51:30 +0200 Subject: [PATCH 16/18] fix: setting the genre of the song --- music_kraken/download/page_attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index cd047f0..3bd14f2 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -236,7 +236,6 @@ class Pages: # manage the naming naming: Dict[str, List[str]] = defaultdict(list, naming) naming["song"].append(song.title_string) - naming["genre"].append(song.genre) naming["isrc"].append(song.isrc) naming["album"].extend(a.title_string for a in song.album_collection) naming["album_type"].extend(a.album_type.value for a in song.album_collection) @@ -248,6 +247,7 @@ class Pages: for key, value in naming.items(): # https://stackoverflow.com/a/17016257 naming[key] = list(dict.fromkeys(value)) + song.genre = naming["genre"][0] # manage the targets tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) From 19b83ce880fe7714c46a73eedcff5a186d8add64 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 15:04:00 +0200 Subject: [PATCH 17/18] fix: saving streaming progress on retry --- music_kraken/connection/connection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index d15aa32..44fdd5f 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -366,6 +366,7 @@ class Connection: if retry: self.LOGGER.warning(f"Retrying stream...") accepted_response_codes.add(206) + stream_kwargs["progress"] = progress return Connection.stream_into(**stream_kwargs) return DownloadResult() From 80ad2727de22762641b06f3c39a1cfd6eb5a68f4 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 17:14:01 +0200 Subject: [PATCH 18/18] fix: stream retry --- music_kraken/connection/connection.py | 2 +- music_kraken/objects/song.py | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index 44fdd5f..31e4b8a 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -317,7 +317,7 @@ class Connection: name = kwargs.pop("description") if progress > 0: - headers = dict() if headers is None else headers + headers = kwargs.get("headers", dict()) headers["Range"] = f"bytes={target.size}-" r = self.request( diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 8b5953b..8e30a9a 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -155,9 +155,6 @@ class Song(Base): self.main_artist_collection.extend_object_to_attribute = { "main_album_collection": self.album_collection } - self.feature_artist_collection.append_object_to_attribute = { - "feature_song_collection": self - } self.feature_artist_collection.push_to = [self.main_artist_collection] self.main_artist_collection.pull_from = [self.feature_artist_collection] @@ -464,7 +461,6 @@ class Artist(Base): source_collection: SourceCollection contact_collection: Collection[Contact] - feature_song_collection: Collection[Song] main_album_collection: Collection[Album] label_collection: Collection[Label] @@ -479,7 +475,6 @@ class Artist(Base): "general_genre": lambda: "", "source_collection": SourceCollection, - "feature_song_collection": Collection, "main_album_collection": Collection, "contact_collection": Collection, "label_collection": Collection, @@ -511,14 +506,10 @@ class Artist(Base): Base.__init__(**real_kwargs) - DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection") + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",) def __init_collections__(self): - self.feature_song_collection.append_object_to_attribute = { - "feature_artist_collection": self - } - self.main_album_collection.append_object_to_attribute = { "artist_collection": self } @@ -530,7 +521,6 @@ class Artist(Base): def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: # this doesn't really make sense - # self.feature_song_collection.extend(object_list) return if object_type is Artist: @@ -628,8 +618,6 @@ class Artist(Base): if len(self.main_album_collection) > 0: r += f" with {len(self.main_album_collection)} albums" - if len(self.feature_song_collection) > 0: - r += f" featured in {len(self.feature_song_collection)} songs" r += BColors.ENDC.value return r