From bedd0fe819c068161794d2518567618233b90357 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 13:16:11 +0200 Subject: [PATCH] fix: runtime errors --- development/actual_donwload.py | 5 +- music_kraken/cli/main_downloader.py | 2 +- music_kraken/download/__init__.py | 1 + music_kraken/download/page_attributes.py | 29 ++-- music_kraken/download/results.py | 1 - music_kraken/objects/parents.py | 1 + music_kraken/objects/source.py | 34 ++-- music_kraken/pages/abstract.py | 151 +----------------- music_kraken/pages/bandcamp.py | 3 +- music_kraken/pages/encyclopaedia_metallum.py | 10 +- .../pages/youtube_music/youtube_music.py | 17 +- music_kraken/utils/enums/__init__.py | 7 +- 12 files changed, 65 insertions(+), 196 deletions(-) diff --git a/development/actual_donwload.py b/development/actual_donwload.py index a8eb732..d91876e 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "d: 20", + "s: #a I'm in a coffin", + "0", + "d: 0", ] diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index ac91cab..e3fe2cb 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -317,7 +317,7 @@ class Downloader: for database_object in data_objects: r = self.pages.download( - music_object=database_object, + data_object=database_object, genre=self.genre, **kwargs ) diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py index a52bd87..7ca0086 100644 --- a/music_kraken/download/__init__.py +++ b/music_kraken/download/__init__.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +from typing import Set from ..utils.config import main_settings from ..utils.enums.album import AlbumType diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 6b52704..f6f7786 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List from collections import defaultdict from pathlib import Path import re +import logging from . import FetchOptions, DownloadOptions from .results import SearchResults @@ -17,6 +18,7 @@ from ..objects import ( Label, ) from ..audio import write_metadata_to_target, correct_codec +from ..utils import output, BColors from ..utils.string_processing import fit_to_file_system from ..utils.config import youtube_settings, main_settings from ..utils.path_manager import LOCATIONS @@ -69,6 +71,8 @@ if DEBUG_PAGES: class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.LOGGER = logging.getLogger("download") + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() @@ -118,7 +122,9 @@ class Pages: return data_object source: Source - for source in data_object.source_collection.get_sources(): + for source in data_object.source_collection.get_sources(source_type_sorting={ + "only_with_page": True, + }): new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) if new_data_object is not None: data_object.merge(new_data_object) @@ -129,10 +135,15 @@ class Pages: if not source.has_page: return None - func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs) + source_type = source.page.get_source_type(source=source) + if source_type is None: + self.LOGGER.debug(f"Could not determine source type for {source}.") + return None + + func = getattr(source.page, fetch_map[source_type]) # fetching the data object and marking it as fetched - data_object: DataObject = func(source=source) + data_object: DataObject = func(source=source, **kwargs) data_object.mark_as_fetched(source.hash_url) return data_object @@ -175,7 +186,7 @@ class Pages: # download all children download_result: DownloadResult = DownloadResult() - for c in data_object.get_children(): + for c in data_object.get_child_collections(): for d in c: if self._skip_object(d): continue @@ -209,7 +220,7 @@ class Pages: path_template = path_template.replace(f"{{{field}}}", naming[field][0]) - return possible_parts + return path_template def _download_song(self, song: Song, naming: dict) -> DownloadOptions: """ @@ -235,7 +246,7 @@ class Pages: # removing duplicates from the naming, and process the strings for key, value in naming.items(): # https://stackoverflow.com/a/17016257 - naming[key] = list(dict.fromkeys(items)) + naming[key] = list(dict.fromkeys(value)) # manage the targets tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) @@ -248,14 +259,14 @@ class Pages: ) )) for target in song.target_collection: - if target.exists(): + if target.exists: output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) r.found_on_disk += 1 if self.download_options.download_again_if_found: target.copy_content(tmp) else: - target.create_parent_directories() + target.create_path() output(f'- {target.file_path}', color=BColors.GREY) # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source @@ -294,7 +305,7 @@ class Pages: if used_source is not None: used_source.page.post_process_hook(song=song, temp_target=tmp) - if not found_on_disc or self.download_options.process_metadata_if_found: + if not found_on_disk or self.download_options.process_metadata_if_found: write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) tmp.delete() diff --git a/music_kraken/download/results.py b/music_kraken/download/results.py index 00afea9..2486c26 100644 --- a/music_kraken/download/results.py +++ b/music_kraken/download/results.py @@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union from dataclasses import dataclass from ..objects import DatabaseObject -from ..utils.enums.source import SourceType from ..pages import Page, EncyclopaediaMetallum, Musify diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 51eb8e6..16ebe6a 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set from pathlib import Path import inspect +from .source import SourceCollection from .metadata import Metadata from ..utils import get_unix_time, object_trace, generate_id from ..utils.config import logging_settings, main_settings diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 3d0b492..b227cc8 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -20,13 +20,11 @@ from dataclasses import dataclass, field from functools import cached_property from ..utils import generate_id -from ..utils.enums import SourceType +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.config import youtube_settings from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata -from .parents import OuterProxy -from .collection import Collection if TYPE_CHECKING: from ..pages.abstract import Page @@ -54,38 +52,38 @@ class Source: url = parsed_url.geturl() if "musify" in parsed_url.netloc: - return cls(SourceType.MUSIFY, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page) if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: - return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): - return cls(SourceType.DEEZER, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page) if url.startswith("https://open.spotify.com"): - return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page) if "bandcamp" in url: - return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page) if "wikipedia" in parsed_url.netloc: - return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): - return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) # the less important once if url.startswith("https://www.facebook"): - return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page) if url.startswith("https://www.instagram"): - return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page) if url.startswith("https://twitter"): - return cls(SourceType.TWITTER, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page) if url.startswith("https://myspace.com"): - return cls(SourceType.MYSPACE, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page) @property def has_page(self) -> bool: @@ -134,7 +132,7 @@ class SourceCollection: _sources_by_type: Dict[SourceType, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): - self._page_to_source_list = defaultdict(list) + self._sources_by_type = defaultdict(list) self._indexed_sources = {} self.extend(data or []) @@ -157,7 +155,7 @@ class SourceCollection: Iterable[SourceType]: A list of source types. """ - source_types: List[SourceType] = self._page_to_source_list.keys() + source_types: List[SourceType] = self._sources_by_type.keys() if only_with_page: source_types = filter(lambda st: st.has_page, source_types) @@ -186,7 +184,7 @@ class SourceCollection: source_types = self.source_types(**source_type_sorting) for source_type in source_types: - yield from self._page_to_source_list[source_type] + yield from self._sources_by_type[source_type] def append(self, source: Source): if source is None: @@ -202,7 +200,7 @@ class SourceCollection: existing_source.__merge__(source) source = existing_source else: - self._page_to_source_list[source.source_type].append(source) + self._sources_by_type[source.source_type].append(source) changed = False for key in source.indexing_values: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index f542671..8783dbb 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -49,15 +49,16 @@ class DownloadOptions: class Page: SOURCE_TYPE: SourceType - LOGGER: LOGGER + LOGGER: logging.Logger def __new__(cls, *args, **kwargs): - cls.SOURCE_TYPE.register_page(cls) cls.LOGGER = logging.getLogger(cls.__name__) return super().__new__(cls) def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.SOURCE_TYPE.register_page(self) + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() @@ -145,151 +146,7 @@ class Page: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() - def download( - self, - music_object: DatabaseObject, - genre: str, - ) -> DownloadResult: - naming_dict: NamingDict = NamingDict({"genre": genre}) - - def fill_naming_objects(naming_music_object: DatabaseObject): - nonlocal naming_dict - - for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES: - collection: Collection = getattr(naming_music_object, collection_name) - - if collection.empty: - continue - - dom_ordered_music_object: DatabaseObject = collection[0] - naming_dict.add_object(dom_ordered_music_object) - return fill_naming_objects(dom_ordered_music_object) - - fill_naming_objects(music_object) - - return self._download(music_object, naming_dict) - - def _download( - self, - music_object: DatabaseObject, - naming_dict: NamingDict, - **kwargs - ) -> DownloadResult: - if isinstance(music_object, Song): - output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD) - else: - output(f"Downloading {music_object.option_string}...", color=BColors.BOLD) - - # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False - if isinstance(music_object, Album): - if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist: - return DownloadResult() - - if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG): - self.fetch_details(music_object=music_object, stop_at_level=1) - - if isinstance(music_object, Album): - music_object.update_tracksort() - - naming_dict.add_object(music_object) - - if isinstance(music_object, Song): - return self._download_song(music_object, naming_dict) - - download_result: DownloadResult = DownloadResult() - - for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - collection: Collection = getattr(music_object, collection_name) - - sub_ordered_music_object: DatabaseObject - for sub_ordered_music_object in collection: - download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy())) - - return download_result - - def _download_song(self, song: Song, naming_dict: NamingDict): - song.compile() - if "genre" not in naming_dict and song.genre is not None: - naming_dict["genre"] = song.genre - - if song.genre is None: - song.genre = naming_dict["genre"] - - path_parts = Formatter().parse(main_settings["download_path"]) - file_parts = Formatter().parse(main_settings["download_file"]) - new_target = Target( - relative_to_music_dir=True, - file_path=Path( - main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}), - main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts}) - ) - ) - - if song.target_collection.empty: - song.target_collection.append(new_target) - - r = DownloadResult(1) - temp_target: Target = Target.temp(file_extension=main_settings["audio_format"]) - - found_on_disc = False - target: Target - for target in song.target_collection: - current_exists = target.exists - - if current_exists: - output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) - target.copy_content(temp_target) - found_on_disc = True - - r.found_on_disk += 1 - r.add_target(target) - else: - output(f'- {target.file_path}', color=BColors.GREY) - - - sources = song.source_collection.get_sources(self.SOURCE_TYPE) - - skip_intervals = [] - if not found_on_disc: - for source in sources: - r = self.download_song_to_target(source=source, target=temp_target, desc="downloading") - - if not r.is_fatal_error: - skip_intervals = self.get_skip_intervals(song, source) - break - - if temp_target.exists: - r.merge(self._post_process_targets( - song=song, - temp_target=temp_target, - skip_intervals=skip_intervals, - found_on_disc=found_on_disc, - )) - - return r - - def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult: - if not found_on_disc or self.download_options.process_audio_if_found: - correct_codec(temp_target, skip_intervals=skip_intervals) - - self.post_process_hook(song, temp_target) - - if not found_on_disc or self.download_options.process_metadata_if_found: - write_metadata_to_target(song.metadata, temp_target, song) - - r = DownloadResult() - - target: Target - for target in song.target_collection: - if temp_target is not target: - temp_target.copy_content(target) - r.add_target(target) - - temp_target.delete() - r.sponsor_segments += len(skip_intervals) - - return r - + # to download stuff def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: return [] diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 30dbbb0..c938189 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -62,8 +62,7 @@ class Bandcamp(Page): super().__init__(*args, **kwargs) def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: - parsed_url = urlparse(source.url) - path = parsed_url.path.replace("/", "") + path = source.parsed_url.path.replace("/", "") if path == "" or path.startswith("music"): return Artist diff --git a/music_kraken/pages/encyclopaedia_metallum.py b/music_kraken/pages/encyclopaedia_metallum.py index 6ebd1d7..9c1fefe 100644 --- a/music_kraken/pages/encyclopaedia_metallum.py +++ b/music_kraken/pages/encyclopaedia_metallum.py @@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode from ..connection import Connection from ..utils.config import logging_settings from .abstract import Page -from ..utils.enums.source import SourceType +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums.album import AlbumType from ..utils.support_classes.query import Query from ..objects import ( @@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title= _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html) ], source_list=[ - Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id) ] ) @@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist: return Artist( name=artist_name, source_list=[ - Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url) ] ) @@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al title=album_name, album_type=album_type, source_list=[ - Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url) ], artist_list=[ _artist_from_json(artist_html=artist_html) @@ -207,7 +207,7 @@ def create_grid( class EncyclopaediaMetallum(Page): - SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM + SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM LOGGER = logging_settings["metal_archives_logger"] def __init__(self, **kwargs): diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index d62df42..2e01805 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path from ...utils import dump_to_file -from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork from ..abstract import Page from ...objects import ( - Artist, + DatabaseObject as DataObject, Source, - SourceType, + FormattedText, + ID3Timestamp, + Artwork, + Artist, Song, Album, Label, Target, Lyrics, - FormattedText ) from ...connection import Connection +from ...utils.enums import SourceType, ALL_SOURCE_TYPES from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult @@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = { class YoutubeMusic(SuperYouTube): # CHANGE - SOURCE_TYPE = SourceType.YOUTUBE_MUSIC - LOGGER = logging_settings["youtube_music_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE def __init__(self, *args, ydl_opts: dict = None, **kwargs): self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection( @@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube): default='{}' )) or {} - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: + def get_source_type(self, source: Source) -> Optional[Type[DataObject]]: return super().get_source_type(source) - def general_search(self, search_query: str) -> List[DatabaseObject]: + def general_search(self, search_query: str) -> List[DataObject]: search_query = search_query.strip() urlescaped_query: str = quote(search_query.strip().replace(" ", "+")) diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index e460a03..28f0b9f 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -14,10 +14,11 @@ class SourceType: page_type: Type[Page] = None page: Page = None + def register_page(self, page: Page): + self.page = page - def register_page(self, page_type: Type[Page]): - self.page_type = page - self.page = page_type() + def __hash__(self): + return hash(self.name) @property def has_page(self) -> bool: