diff --git a/.vscode/settings.json b/.vscode/settings.json index 2108a96..bea0c42 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,13 +20,16 @@ "APIC", "Bandcamp", "bitrate", + "DEEZER", "dotenv", "encyclopaedia", "ENDC", "Gitea", + "iframe", "isrc", "levenshtein", "metallum", + "MUSICBRAINZ", "musify", "OKBLUE", "OKGREEN", diff --git a/development/actual_donwload.py b/development/actual_donwload.py index a8eb732..d91876e 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "d: 20", + "s: #a I'm in a coffin", + "0", + "d: 0", ] diff --git a/music_kraken/audio/codec.py b/music_kraken/audio/codec.py index ccb7bb1..531bc1f 100644 --- a/music_kraken/audio/codec.py +++ b/music_kraken/audio/codec.py @@ -10,12 +10,12 @@ from ..objects import Target LOGGER = logging_settings["codex_logger"] -def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], interval_list: List[Tuple[float, float]] = None): +def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], skip_intervals: List[Tuple[float, float]] = None): if not target.exists: LOGGER.warning(f"Target doesn't exist: {target.file_path}") return - interval_list = interval_list or [] + skip_intervals = skip_intervals or [] bitrate_b = int(bitrate_kb / 1024) @@ -29,7 +29,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au start = 0 next_start = 0 - for end, next_start in interval_list: + for end, next_start in skip_intervals: aselect_list.append(f"between(t,{start},{end})") start = next_start aselect_list.append(f"gte(t,{next_start})") diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index 7140ff3..e3fe2cb 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -178,8 +178,6 @@ class Downloader: page_count = 0 for option in self.current_results.formatted_generator(): if isinstance(option, Option): - _downloadable = self.pages.is_downloadable(option.music_object) - r = f"{BColors.GREY.value}{option.index:0{self.option_digits}}{BColors.ENDC.value} {option.music_object.option_string}" print(r) else: @@ -319,7 +317,7 @@ class Downloader: for database_object in data_objects: r = self.pages.download( - music_object=database_object, + data_object=database_object, genre=self.genre, **kwargs ) diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index d15aa32..31e4b8a 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -317,7 +317,7 @@ class Connection: name = kwargs.pop("description") if progress > 0: - headers = dict() if headers is None else headers + headers = kwargs.get("headers", dict()) headers["Range"] = f"bytes={target.size}-" r = self.request( @@ -366,6 +366,7 @@ class Connection: if retry: self.LOGGER.warning(f"Retrying stream...") accepted_response_codes.add(206) + stream_kwargs["progress"] = progress return Connection.stream_into(**stream_kwargs) return DownloadResult() diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py index e69de29..7ca0086 100644 --- a/music_kraken/download/__init__.py +++ b/music_kraken/download/__init__.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass, field +from typing import Set + +from ..utils.config import main_settings +from ..utils.enums.album import AlbumType + + +@dataclass +class FetchOptions: + download_all: bool = False + album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + + +@dataclass +class DownloadOptions: + download_all: bool = False + album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + + download_again_if_found: bool = False + process_audio_if_found: bool = False + process_metadata_if_found: bool = True diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index f0b678c..3bd14f2 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -1,12 +1,32 @@ -from typing import Tuple, Type, Dict, Set +from typing import Tuple, Type, Dict, Set, Optional, List +from collections import defaultdict +from pathlib import Path +import re +import logging +from . import FetchOptions, DownloadOptions from .results import SearchResults -from ..objects import DatabaseObject, Source - -from ..utils.config import youtube_settings -from ..utils.enums.source import SourcePages +from ..objects import ( + DatabaseObject as DataObject, + Collection, + Target, + Source, + Options, + Song, + Album, + Artist, + Label, +) +from ..audio import write_metadata_to_target, correct_codec +from ..utils import output, BColors +from ..utils.string_processing import fit_to_file_system +from ..utils.config import youtube_settings, main_settings +from ..utils.path_manager import LOCATIONS +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.query import Query +from ..utils.support_classes.download_result import DownloadResult +from ..utils.exception import MKMissingNameException from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES @@ -34,6 +54,13 @@ SHADY_PAGES: Set[Type[Page]] = { Musify, } +fetch_map = { + Song: "fetch_song", + Album: "fetch_album", + Artist: "fetch_artist", + Label: "fetch_label", +} + if DEBUG_PAGES: DEBUGGING_PAGE = Bandcamp print(f"Only downloading from page {DEBUGGING_PAGE}.") @@ -43,10 +70,15 @@ if DEBUG_PAGES: class Pages: - def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: + def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.LOGGER = logging.getLogger("download") + + self.download_options: DownloadOptions = download_options or DownloadOptions() + self.fetch_options: FetchOptions = fetch_options or FetchOptions() + # initialize all page instances self._page_instances: Dict[Type[Page], Page] = dict() - self._source_to_page: Dict[SourcePages, Type[Page]] = dict() + self._source_to_page: Dict[SourceType, Type[Page]] = dict() exclude_pages = exclude_pages if exclude_pages is not None else set() @@ -61,14 +93,19 @@ class Pages: self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages) self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set) - + self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES) self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set) for page_type in self.pages: - self._page_instances[page_type] = page_type() + self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options) self._source_to_page[page_type.SOURCE_TYPE] = page_type - + + def _get_page_from_enum(self, source_page: SourceType) -> Page: + if source_page not in self._source_to_page: + return None + return self._page_instances[self._source_to_page[source_page]] + def search(self, query: Query) -> SearchResults: result = SearchResults() @@ -80,54 +117,211 @@ class Pages: return result - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: - if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): - return music_object + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: + if not isinstance(data_object, INDEPENDENT_DB_OBJECTS): + return data_object - for source_page in music_object.source_collection.source_pages: - if source_page not in self._source_to_page: - continue + source: Source + for source in data_object.source_collection.get_sources(source_type_sorting={ + "only_with_page": True, + }): + new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) + if new_data_object is not None: + data_object.merge(new_data_object) - page_type = self._source_to_page[source_page] - - if page_type in self._pages_set: - music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level)) + return data_object + + def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: + if not source.has_page: + return None - return music_object + source_type = source.page.get_source_type(source=source) + if source_type is None: + self.LOGGER.debug(f"Could not determine source type for {source}.") + return None - def is_downloadable(self, music_object: DatabaseObject) -> bool: - _page_types = set(self._source_to_page) - for src in music_object.source_collection.source_pages: - if src in self._source_to_page: - _page_types.add(self._source_to_page[src]) + func = getattr(source.page, fetch_map[source_type]) + + # fetching the data object and marking it as fetched + data_object: DataObject = func(source=source, **kwargs) + data_object.mark_as_fetched(source.hash_url) + return data_object - audio_pages = self._audio_pages_set.intersection(_page_types) - return len(audio_pages) > 0 + def fetch_from_url(self, url: str) -> Optional[DataObject]: + source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) + if source is None: + return None + + return self.fetch_from_source(source=source) - def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: - if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): - return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") - - self.fetch_details(music_object) - - _page_types = set(self._source_to_page) - for src in music_object.source_collection.source_pages: - if src in self._source_to_page: - _page_types.add(self._source_to_page[src]) - - audio_pages = self._audio_pages_set.intersection(_page_types) + def _skip_object(self, data_object: DataObject) -> bool: + if isinstance(data_object, Album): + if not self.download_options.download_all and data_object.album_type in self.download_options.album_type_blacklist: + return True - for download_page in audio_pages: - return self._page_instances[download_page].download(music_object=music_object, genre=genre) - - return DownloadResult(error_message=f"No audio source has been found for {music_object}.") + return False - def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]: - source = Source.match_url(url, SourcePages.MANUAL) + def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult: + # fetch the given object + self.fetch_details(data_object) + output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD) + + # fetching all parent objects (e.g. if you only download a song) + if not kwargs.get("fetched_upwards", False): + to_fetch: List[DataObject] = [data_object] + + while len(to_fetch) > 0: + new_to_fetch = [] + for d in to_fetch: + if self._skip_object(d): + continue + + self.fetch_details(d) + + for c in d.get_parent_collections(): + new_to_fetch.extend(c) + + to_fetch = new_to_fetch + + kwargs["fetched_upwards"] = True + + # download all children + download_result: DownloadResult = DownloadResult() + for c in data_object.get_child_collections(): + for d in c: + if self._skip_object(d): + continue + + download_result.merge(self.download(d, genre, **kwargs)) + + # actually download if the object is a song + if isinstance(data_object, Song): + """ + TODO + add the traced artist and album to the naming. + I am able to do that, because duplicate values are removed later on. + """ + + self._download_song(data_object, naming={ + "genre": [genre], + "audio_format": [main_settings["audio_format"]], + }) + + return download_result + + def _extract_fields_from_template(self, path_template: str) -> Set[str]: + return set(re.findall(r"{([^}]+)}", path_template)) + + def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str: + field_names: Set[str] = self._extract_fields_from_template(path_template) + + for field in field_names: + if len(naming[field]) == 0: + raise MKMissingNameException(f"Missing field for {field}.") + + path_template = path_template.replace(f"{{{field}}}", naming[field][0]) + + return path_template + + def _download_song(self, song: Song, naming: dict) -> DownloadOptions: + """ + TODO + Search the song in the file system. + """ + r = DownloadResult(total=1) + + # pre process the data recursively + song.compile() + + # manage the naming + naming: Dict[str, List[str]] = defaultdict(list, naming) + naming["song"].append(song.title_string) + naming["isrc"].append(song.isrc) + naming["album"].extend(a.title_string for a in song.album_collection) + naming["album_type"].extend(a.album_type.value for a in song.album_collection) + naming["artist"].extend(a.name for a in song.main_artist_collection) + naming["artist"].extend(a.name for a in song.feature_artist_collection) + for a in song.album_collection: + naming["label"].extend([l.title_string for l in a.label_collection]) + # removing duplicates from the naming, and process the strings + for key, value in naming.items(): + # https://stackoverflow.com/a/17016257 + naming[key] = list(dict.fromkeys(value)) + song.genre = naming["genre"][0] + + # manage the targets + tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) + + song.target_collection.append(Target( + relative_to_music_dir=True, + file_path=Path( + self._parse_path_template(main_settings["download_path"], naming=naming), + self._parse_path_template(main_settings["download_file"], naming=naming), + ) + )) + for target in song.target_collection: + if target.exists: + output(f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) + r.found_on_disk += 1 + + if not self.download_options.download_again_if_found: + target.copy_content(tmp) + else: + target.create_path() + output(f'{target.file_path}', color=BColors.GREY) + + # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source + used_source: Optional[Source] = None + skip_intervals: List[Tuple[float, float]] = [] + for source in song.source_collection.get_sources(source_type_sorting={ + "only_with_page": True, + "sort_key": lambda page: page.download_priority, + "reverse": True, + }): + if tmp.exists: + break + + used_source = source + streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download") + skip_intervals = source.page.get_skip_intervals(song=song, source=source) + + # if something has been downloaded but it somehow failed, delete the file + if streaming_results.is_fatal_error and tmp.exists: + tmp.delete() + + # if everything went right, the file should exist now + if not tmp.exists: + if used_source is None: + r.error_message = f"No source found for {song.option_string}." + else: + r.error_message = f"Something went wrong downloading {song.option_string}." + return r + + # post process the audio + found_on_disk = used_source is None + if not found_on_disk or self.download_options.process_audio_if_found: + correct_codec(target=tmp, skip_intervals=skip_intervals) + r.sponsor_segments = len(skip_intervals) + + if used_source is not None: + used_source.page.post_process_hook(song=song, temp_target=tmp) + + if not found_on_disk or self.download_options.process_metadata_if_found: + write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) + + # copy the tmp target to the final locations + for target in song.target_collection: + tmp.copy_content(target) + + tmp.delete() + return r + + def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: + source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) if source is None: raise UrlNotFoundException(url=url) - _actual_page = self._source_to_page[source.page_enum] + _actual_page = self._source_to_page[source.source_type] return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level) \ No newline at end of file diff --git a/music_kraken/download/results.py b/music_kraken/download/results.py index a8fead7..2486c26 100644 --- a/music_kraken/download/results.py +++ b/music_kraken/download/results.py @@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union from dataclasses import dataclass from ..objects import DatabaseObject -from ..utils.enums.source import SourcePages from ..pages import Page, EncyclopaediaMetallum, Musify diff --git a/music_kraken/objects/__init__.py b/music_kraken/objects/__init__.py index da5b9aa..7c7515d 100644 --- a/music_kraken/objects/__init__.py +++ b/music_kraken/objects/__init__.py @@ -3,7 +3,7 @@ from .option import Options from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp -from .source import Source, SourcePages, SourceTypes +from .source import Source, SourceType from .song import ( Song, @@ -24,4 +24,4 @@ from .parents import OuterProxy from .artwork import Artwork -DatabaseObject = TypeVar('T', bound=OuterProxy) +DatabaseObject = OuterProxy diff --git a/music_kraken/objects/formatted_text.py b/music_kraken/objects/formatted_text.py index 8f9fc52..99e9ae2 100644 --- a/music_kraken/objects/formatted_text.py +++ b/music_kraken/objects/formatted_text.py @@ -38,8 +38,13 @@ class FormattedText: def markdown(self) -> str: return md(self.html).strip() + @property + def plain(self) -> str: + md = self.markdown + return md.replace("\n\n", "\n") + def __str__(self) -> str: return self.markdown - plaintext = markdown + plaintext = plain diff --git a/music_kraken/objects/lyrics.py b/music_kraken/objects/lyrics.py index 65d550d..788e793 100644 --- a/music_kraken/objects/lyrics.py +++ b/music_kraken/objects/lyrics.py @@ -34,6 +34,6 @@ class Lyrics(OuterProxy): @property def metadata(self) -> Metadata: return Metadata({ - id3Mapping.UNSYNCED_LYRICS: [self.text.markdown] + id3Mapping.UNSYNCED_LYRICS: [self.text.plaintext] }) diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 3ef6ab3..16ebe6a 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set from pathlib import Path import inspect +from .source import SourceCollection from .metadata import Metadata from ..utils import get_unix_time, object_trace, generate_id from ..utils.config import logging_settings, main_settings @@ -99,7 +100,9 @@ class OuterProxy: Wraps the inner data, and provides apis, to naturally access those values. """ - _default_factories: dict = {} + source_collection: SourceCollection + + _default_factories: dict = {"source_collection": SourceCollection} _outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"} DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() @@ -203,6 +206,7 @@ class OuterProxy: if __other is None: return + a_id = self.id a = self b = __other @@ -225,6 +229,8 @@ class OuterProxy: a._inner.__merge__(old_inner, **kwargs) del old_inner + self.id = a_id + def __merge__(self, __other: Optional[OuterProxy], **kwargs): self.merge(__other, **kwargs) @@ -335,3 +341,11 @@ class OuterProxy: def __repr__(self): return f"{type(self).__name__}({self.title_string})" + + def get_child_collections(self): + for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: + yield self.__getattribute__(collection_string_attribute) + + def get_parent_collections(self): + for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES: + yield self.__getattribute__(collection_string_attribute) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 8b5953b..8e30a9a 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -155,9 +155,6 @@ class Song(Base): self.main_artist_collection.extend_object_to_attribute = { "main_album_collection": self.album_collection } - self.feature_artist_collection.append_object_to_attribute = { - "feature_song_collection": self - } self.feature_artist_collection.push_to = [self.main_artist_collection] self.main_artist_collection.pull_from = [self.feature_artist_collection] @@ -464,7 +461,6 @@ class Artist(Base): source_collection: SourceCollection contact_collection: Collection[Contact] - feature_song_collection: Collection[Song] main_album_collection: Collection[Album] label_collection: Collection[Label] @@ -479,7 +475,6 @@ class Artist(Base): "general_genre": lambda: "", "source_collection": SourceCollection, - "feature_song_collection": Collection, "main_album_collection": Collection, "contact_collection": Collection, "label_collection": Collection, @@ -511,14 +506,10 @@ class Artist(Base): Base.__init__(**real_kwargs) - DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection") + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",) def __init_collections__(self): - self.feature_song_collection.append_object_to_attribute = { - "feature_artist_collection": self - } - self.main_album_collection.append_object_to_attribute = { "artist_collection": self } @@ -530,7 +521,6 @@ class Artist(Base): def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: # this doesn't really make sense - # self.feature_song_collection.extend(object_list) return if object_type is Artist: @@ -628,8 +618,6 @@ class Artist(Base): if len(self.main_album_collection) > 0: r += f" with {len(self.main_album_collection)} albums" - if len(self.feature_song_collection) > 0: - r += f" featured in {len(self.feature_song_collection)} songs" r += BColors.ENDC.value return r diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index ff68d6a..b227cc8 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -2,40 +2,48 @@ from __future__ import annotations from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator +from typing import ( + List, + Dict, + Set, + Tuple, + Optional, + Iterable, + Generator, + TypedDict, + Callable, + Any, + TYPE_CHECKING +) from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property from ..utils import generate_id -from ..utils.enums.source import SourcePages, SourceTypes +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.config import youtube_settings from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata -from .parents import OuterProxy -from .collection import Collection +if TYPE_CHECKING: + from ..pages.abstract import Page @dataclass class Source: - page_enum: SourcePages + source_type: SourceType url: str - referrer_page: SourcePages = None + referrer_page: SourceType = None audio_url: Optional[str] = None additional_data: dict = field(default_factory=dict) def __post_init__(self): - self.referrer_page = self.referrer_page or self.page_enum - - @property - def parsed_url(self) -> ParseResult: - return urlparse(self.url) + self.referrer_page = self.referrer_page or self.source_type @classmethod - def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]: + def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]: """ this shouldn't be used, unless you are not certain what the source is for the reason is that it is more inefficient @@ -44,38 +52,50 @@ class Source: url = parsed_url.geturl() if "musify" in parsed_url.netloc: - return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page) if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: - return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): - return cls(SourcePages.DEEZER, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page) if url.startswith("https://open.spotify.com"): - return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page) if "bandcamp" in url: - return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page) if "wikipedia" in parsed_url.netloc: - return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): - return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) # the less important once if url.startswith("https://www.facebook"): - return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page) if url.startswith("https://www.instagram"): - return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page) if url.startswith("https://twitter"): - return cls(SourcePages.TWITTER, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page) if url.startswith("https://myspace.com"): - return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) + return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page) + + @property + def has_page(self) -> bool: + return self.source_type.page is not None + + @property + def page(self) -> Page: + return self.source_type.page + + @property + def parsed_url(self) -> ParseResult: + return urlparse(self.url) @property def hash_url(self) -> str: @@ -89,37 +109,82 @@ class Source: return r def __repr__(self) -> str: - return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})" + return f"Src({self.source_type.value}: {shorten_display_url(self.url)})" def __merge__(self, other: Source, **kwargs): if self.audio_url is None: self.audio_url = other.audio_url self.additional_data.update(other.additional_data) - page_str = property(fget=lambda self: self.page_enum.value) + page_str = property(fget=lambda self: self.source_type.value) + + +class SourceTypeSorting(TypedDict): + sort_key: Callable[[SourceType], Any] + reverse: bool + only_with_page: bool class SourceCollection: __change_version__ = generate_id() _indexed_sources: Dict[str, Source] - _page_to_source_list: Dict[SourcePages, List[Source]] + _sources_by_type: Dict[SourceType, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): - self._page_to_source_list = defaultdict(list) + self._sources_by_type = defaultdict(list) self._indexed_sources = {} self.extend(data or []) - def has_source_page(self, *source_pages: SourcePages) -> bool: - return any(source_page in self._page_to_source_list for source_page in source_pages) + def source_types( + self, + only_with_page: bool = False, + sort_key = lambda page: page.name, + reverse: bool = False + ) -> Iterable[SourceType]: + """ + Returns a list of all source types contained in this source collection. - def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: - if not len(source_pages): - source_pages = self.source_pages + Args: + only_with_page (bool, optional): If True, only returns source types that have a page, meaning you can download from them. + sort_key (function, optional): A function that defines the sorting key for the source types. Defaults to lambda page: page.name. + reverse (bool, optional): If True, sorts the source types in reverse order. Defaults to False. - for page in source_pages: - yield from self._page_to_source_list[page] + Returns: + Iterable[SourceType]: A list of source types. + """ + + source_types: List[SourceType] = self._sources_by_type.keys() + if only_with_page: + source_types = filter(lambda st: st.has_page, source_types) + + return sorted( + source_types, + key=sort_key, + reverse=reverse + ) + + def get_sources(self, *source_types: List[SourceType], source_type_sorting: SourceTypeSorting = None) -> Generator[Source]: + """ + Retrieves sources based on the provided source types and source type sorting. + + Args: + *source_types (List[Source]): Variable number of source types to filter the sources. + source_type_sorting (SourceTypeSorting): Sorting criteria for the source types. This is only relevant if no source types are provided. + + Yields: + Generator[Source]: A generator that yields the sources based on the provided filters. + + Returns: + None + """ + if not len(source_types): + source_type_sorting = source_type_sorting or {} + source_types = self.source_types(**source_type_sorting) + + for source_type in source_types: + yield from self._sources_by_type[source_type] def append(self, source: Source): if source is None: @@ -135,7 +200,7 @@ class SourceCollection: existing_source.__merge__(source) source = existing_source else: - self._page_to_source_list[source.page_enum].append(source) + self._sources_by_type[source.source_type].append(source) changed = False for key in source.indexing_values: @@ -156,10 +221,6 @@ class SourceCollection: def __merge__(self, other: SourceCollection, **kwargs): self.extend(other) - @property - def source_pages(self) -> Iterable[SourcePages]: - return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) - @property def hash_url_list(self) -> List[str]: return [hash_url(source.url) for source in self.get_sources()] @@ -170,7 +231,7 @@ class SourceCollection: @property def homepage_list(self) -> List[str]: - return [source.homepage for source in self.source_pages] + return [source_type.homepage for source_type in self._sources_by_type.keys()] def indexing_values(self) -> Generator[Tuple[str, str], None, None]: for index in self._indexed_sources: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index a202b32..8783dbb 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -22,7 +22,7 @@ from ..objects import ( Collection, Label, ) -from ..utils.enums.source import SourcePages +from ..utils.enums import SourceType from ..utils.enums.album import AlbumType from ..audio import write_metadata_to_target, correct_codec from ..utils.config import main_settings @@ -47,74 +47,18 @@ class DownloadOptions: process_audio_if_found: bool = False process_metadata_if_found: bool = True -class NamingDict(dict): - CUSTOM_KEYS: Dict[str, str] = { - "label": "label.name", - "artist": "artist.name", - "song": "song.title", - "isrc": "song.isrc", - "album": "album.title", - "album_type": "album.album_type_string" - } - - def __init__(self, values: dict, object_mappings: Dict[str, DatabaseObject] = None): - self.object_mappings: Dict[str, DatabaseObject] = object_mappings or dict() - - super().__init__(values) - self["audio_format"] = main_settings["audio_format"] - - def add_object(self, music_object: DatabaseObject): - self.object_mappings[type(music_object).__name__.lower()] = music_object - - def copy(self) -> dict: - return type(self)(super().copy(), self.object_mappings.copy()) - - def __getitem__(self, key: str) -> str: - return fit_to_file_system(super().__getitem__(key)) - - def default_value_for_name(self, name: str) -> str: - return f'Various {name.replace("_", " ").title()}' - - def __missing__(self, key: str) -> str: - if "." not in key: - if key not in self.CUSTOM_KEYS: - return self.default_value_for_name(key) - - key = self.CUSTOM_KEYS[key] - - frag_list = key.split(".") - - object_name = frag_list[0].strip().lower() - attribute_name = frag_list[-1].strip().lower() - - if object_name not in self.object_mappings: - return self.default_value_for_name(attribute_name) - - music_object = self.object_mappings[object_name] - try: - value = getattr(music_object, attribute_name) - if value is None: - return self.default_value_for_name(attribute_name) - - return str(value) - - except AttributeError: - return self.default_value_for_name(attribute_name) - - class Page: - """ - This is an abstract class, laying out the - functionality for every other class fetching something - """ + SOURCE_TYPE: SourceType + LOGGER: logging.Logger - SOURCE_TYPE: SourcePages - LOGGER = logging.getLogger("this shouldn't be used") + def __new__(cls, *args, **kwargs): + cls.LOGGER = logging.getLogger(cls.__name__) - # set this to true, if all song details can also be fetched by fetching album details - NO_ADDITIONAL_DATA_FROM_SONG = False + return super().__new__(cls) def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.SOURCE_TYPE.register_page(self) + self.download_options: DownloadOptions = download_options or DownloadOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions() @@ -189,103 +133,7 @@ class Page: def song_search(self, song: Song) -> List[Song]: return [] - def fetch_details( - self, - music_object: DatabaseObject, - stop_at_level: int = 1, - ) -> DatabaseObject: - """ - when a music object with lacking data is passed in, it returns - the SAME object **(no copy)** with more detailed data. - If you for example put in, an album, it fetches the tracklist - - :param music_object: - :param stop_at_level: - This says the depth of the level the scraper will recurse to. - If this is for example set to 2, then the levels could be: - 1. Level: the album - 2. Level: every song of the album + every artist of the album - If no additional requests are needed to get the data one level below the supposed stop level - this gets ignored - :return detailed_music_object: IT MODIFIES THE INPUT OBJ - """ - # creating a new object, of the same type - new_music_object: Optional[DatabaseObject] = None - fetched_from_url: List[str] = [] - - # only certain database objects, have a source list - if isinstance(music_object, INDEPENDENT_DB_OBJECTS): - source: Source - for source in music_object.source_collection.get_sources(self.SOURCE_TYPE): - if music_object.already_fetched_from(source.hash_url): - continue - - tmp = self.fetch_object_from_source( - source=source, - enforce_type=type(music_object), - stop_at_level=stop_at_level, - type_string=type(music_object).__name__, - entity_string=music_object.option_string, - ) - - if new_music_object is None: - new_music_object = tmp - else: - new_music_object.merge(tmp) - fetched_from_url.append(source.hash_url) - - if new_music_object is not None: - music_object.merge(new_music_object) - - music_object.mark_as_fetched(*fetched_from_url) - return music_object - - def fetch_object_from_source( - self, - source: Source, - stop_at_level: int = 2, - enforce_type: Type[DatabaseObject] = None, - type_string: str = "", - entity_string: str = "", - ) -> Optional[DatabaseObject]: - - obj_type = self.get_source_type(source) - - if obj_type is None: - return None - - if enforce_type != obj_type and enforce_type is not None: - self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") - return None - - music_object: DatabaseObject = None - - fetch_map = { - Song: self.fetch_song, - Album: self.fetch_album, - Artist: self.fetch_artist, - Label: self.fetch_label - } - - if obj_type in fetch_map: - music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level) - else: - self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") - return None - - if stop_at_level > 0: - trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]") - - collection: Collection - for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - collection = music_object.__getattribute__(collection_str) - - for sub_element in collection: - sub_element.merge( - self.fetch_details(sub_element, stop_at_level=stop_at_level - 1)) - - return music_object - + # to fetch stuff def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: return Song() @@ -298,153 +146,7 @@ class Page: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() - def download( - self, - music_object: DatabaseObject, - genre: str, - ) -> DownloadResult: - naming_dict: NamingDict = NamingDict({"genre": genre}) - - def fill_naming_objects(naming_music_object: DatabaseObject): - nonlocal naming_dict - - for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES: - collection: Collection = getattr(naming_music_object, collection_name) - - if collection.empty: - continue - - dom_ordered_music_object: DatabaseObject = collection[0] - naming_dict.add_object(dom_ordered_music_object) - return fill_naming_objects(dom_ordered_music_object) - - fill_naming_objects(music_object) - - return self._download(music_object, naming_dict) - - def _download( - self, - music_object: DatabaseObject, - naming_dict: NamingDict, - **kwargs - ) -> DownloadResult: - if isinstance(music_object, Song): - output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD) - else: - output(f"Downloading {music_object.option_string}...", color=BColors.BOLD) - - # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False - if isinstance(music_object, Album): - if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist: - return DownloadResult() - - if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG): - self.fetch_details(music_object=music_object, stop_at_level=1) - - if isinstance(music_object, Album): - music_object.update_tracksort() - - naming_dict.add_object(music_object) - - if isinstance(music_object, Song): - return self._download_song(music_object, naming_dict) - - download_result: DownloadResult = DownloadResult() - - for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - collection: Collection = getattr(music_object, collection_name) - - sub_ordered_music_object: DatabaseObject - for sub_ordered_music_object in collection: - download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy())) - - return download_result - - def _download_song(self, song: Song, naming_dict: NamingDict): - song.compile() - if "genre" not in naming_dict and song.genre is not None: - naming_dict["genre"] = song.genre - - if song.genre is None: - song.genre = naming_dict["genre"] - - path_parts = Formatter().parse(main_settings["download_path"]) - file_parts = Formatter().parse(main_settings["download_file"]) - new_target = Target( - relative_to_music_dir=True, - file_path=Path( - main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}), - main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts}) - ) - ) - - if song.target_collection.empty: - song.target_collection.append(new_target) - - r = DownloadResult(1) - temp_target: Target = Target.temp(file_extension=main_settings["audio_format"]) - - found_on_disc = False - target: Target - for target in song.target_collection: - current_exists = target.exists - - if current_exists: - output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) - target.copy_content(temp_target) - found_on_disc = True - - r.found_on_disk += 1 - r.add_target(target) - else: - output(f'- {target.file_path}', color=BColors.GREY) - - if not song.source_collection.has_source_page(self.SOURCE_TYPE): - return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.") - - sources = song.source_collection.get_sources(self.SOURCE_TYPE) - - skip_intervals = [] - if not found_on_disc: - for source in sources: - r = self.download_song_to_target(source=source, target=temp_target, desc="downloading") - - if not r.is_fatal_error: - skip_intervals = self.get_skip_intervals(song, source) - break - - if temp_target.exists: - r.merge(self._post_process_targets( - song=song, - temp_target=temp_target, - interval_list=skip_intervals, - found_on_disc=found_on_disc, - )) - - return r - - def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult: - if not found_on_disc or self.download_options.process_audio_if_found: - correct_codec(temp_target, interval_list=interval_list) - - self.post_process_hook(song, temp_target) - - if not found_on_disc or self.download_options.process_metadata_if_found: - write_metadata_to_target(song.metadata, temp_target, song) - - r = DownloadResult() - - target: Target - for target in song.target_collection: - if temp_target is not target: - temp_target.copy_content(target) - r.add_target(target) - - temp_target.delete() - r.sponsor_segments += len(interval_list) - - return r - + # to download stuff def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: return [] diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index fb446d0..c938189 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -10,7 +10,7 @@ from .abstract import Page from ..objects import ( Artist, Source, - SourcePages, + SourceType, Song, Album, Label, @@ -23,6 +23,7 @@ from ..objects import ( ) from ..connection import Connection from ..utils import dump_to_file +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.support_classes.download_result import DownloadResult from ..utils.string_processing import clean_song_title from ..utils.config import main_settings, logging_settings @@ -49,9 +50,7 @@ class BandcampTypes(Enum): class Bandcamp(Page): - # CHANGE - SOURCE_TYPE = SourcePages.BANDCAMP - LOGGER = logging_settings["bandcamp_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP def __init__(self, *args, **kwargs): self.connection: Connection = Connection( @@ -63,8 +62,7 @@ class Bandcamp(Page): super().__init__(*args, **kwargs) def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: - parsed_url = urlparse(source.url) - path = parsed_url.path.replace("/", "") + path = source.parsed_url.path.replace("/", "") if path == "" or path.startswith("music"): return Artist diff --git a/music_kraken/pages/encyclopaedia_metallum.py b/music_kraken/pages/encyclopaedia_metallum.py index dba4527..9c1fefe 100644 --- a/music_kraken/pages/encyclopaedia_metallum.py +++ b/music_kraken/pages/encyclopaedia_metallum.py @@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode from ..connection import Connection from ..utils.config import logging_settings from .abstract import Page -from ..utils.enums.source import SourcePages +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums.album import AlbumType from ..utils.support_classes.query import Query from ..objects import ( @@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title= _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html) ], source_list=[ - Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id) ] ) @@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist: return Artist( name=artist_name, source_list=[ - Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url) ] ) @@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al title=album_name, album_type=album_type, source_list=[ - Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url) + Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url) ], artist_list=[ _artist_from_json(artist_html=artist_html) @@ -207,7 +207,7 @@ def create_grid( class EncyclopaediaMetallum(Page): - SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM + SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM LOGGER = logging_settings["metal_archives_logger"] def __init__(self, **kwargs): @@ -832,7 +832,7 @@ class EncyclopaediaMetallum(Page): ) def get_source_type(self, source: Source): - if self.SOURCE_TYPE != source.page_enum: + if self.SOURCE_TYPE != source.source_type: return None url = source.url diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index 59d01b8..54d849a 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup from ..connection import Connection from .abstract import Page -from ..utils.enums.source import SourcePages +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums.album import AlbumType, AlbumStatus from ..objects import ( Artist, @@ -111,9 +111,7 @@ def parse_url(url: str) -> MusifyUrl: class Musify(Page): - # CHANGE - SOURCE_TYPE = SourcePages.MUSIFY - LOGGER = logging_settings["musify_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.MUSIFY HOST = "https://musify.club" @@ -504,9 +502,18 @@ class Musify(Page): for video_container in video_container_list: iframe_list: List[BeautifulSoup] = video_container.findAll("iframe") for iframe in iframe_list: + """ + the url could look like this + https://www.youtube.com/embed/sNObCkhzOYA?si=dNVgnZMBNVlNb0P_ + """ + parsed_url = urlparse(iframe["src"]) + path_parts = parsed_url.path.strip("/").split("/") + if path_parts[0] != "embed" or len(path_parts) < 2: + continue + source_list.append(Source( - SourcePages.YOUTUBE, - iframe["src"], + ALL_SOURCE_TYPES.YOUTUBE, + f"https://music.youtube.com/watch?v={path_parts[1]}", referrer_page=self.SOURCE_TYPE )) diff --git a/music_kraken/pages/preset.py b/music_kraken/pages/preset.py deleted file mode 100644 index 0755089..0000000 --- a/music_kraken/pages/preset.py +++ /dev/null @@ -1,65 +0,0 @@ -from typing import List, Optional, Type -from urllib.parse import urlparse -import logging - - -from ..objects import Source, DatabaseObject -from .abstract import Page -from ..objects import ( - Artist, - Source, - SourcePages, - Song, - Album, - Label, - Target -) -from ..connection import Connection -from ..utils.support_classes.query import Query -from ..utils.support_classes.download_result import DownloadResult - -class Preset(Page): - # CHANGE - SOURCE_TYPE = SourcePages.PRESET - LOGGER = logging.getLogger("preset") - - def __init__(self, *args, **kwargs): - self.connection: Connection = Connection( - host="https://www.preset.cum/", - logger=self.LOGGER - ) - - super().__init__(*args, **kwargs) - - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: - return super().get_source_type(source) - - def general_search(self, search_query: str) -> List[DatabaseObject]: - return [] - - def label_search(self, label: Label) -> List[Label]: - return [] - - def artist_search(self, artist: Artist) -> List[Artist]: - return [] - - def album_search(self, album: Album) -> List[Album]: - return [] - - def song_search(self, song: Song) -> List[Song]: - return [] - - def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - return Song() - - def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - return Album() - - def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: - return Artist() - - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: - return Label() - - def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: - return DownloadResult() diff --git a/music_kraken/pages/youtube.py b/music_kraken/pages/youtube.py index afc5501..5f65631 100644 --- a/music_kraken/pages/youtube.py +++ b/music_kraken/pages/youtube.py @@ -9,7 +9,6 @@ from .abstract import Page from ..objects import ( Artist, Source, - SourcePages, Song, Album, Label, @@ -19,6 +18,7 @@ from ..objects import ( ) from ..connection import Connection from ..utils.string_processing import clean_song_title +from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.support_classes.download_result import DownloadResult from ..utils.config import youtube_settings, main_settings, logging_settings @@ -39,10 +39,7 @@ def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: s class YouTube(SuperYouTube): # CHANGE - SOURCE_TYPE = SourcePages.YOUTUBE - LOGGER = logging_settings["youtube_logger"] - - NO_ADDITIONAL_DATA_FROM_SONG = False + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE def __init__(self, *args, **kwargs): self.connection: Connection = Connection( diff --git a/music_kraken/pages/youtube_music/_list_render.py b/music_kraken/pages/youtube_music/_list_render.py index bb6f40b..2158385 100644 --- a/music_kraken/pages/youtube_music/_list_render.py +++ b/music_kraken/pages/youtube_music/_list_render.py @@ -7,7 +7,6 @@ from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, Song, Album, Label, diff --git a/music_kraken/pages/youtube_music/_music_object_render.py b/music_kraken/pages/youtube_music/_music_object_render.py index 831d50d..43aee3e 100644 --- a/music_kraken/pages/youtube_music/_music_object_render.py +++ b/music_kraken/pages/youtube_music/_music_object_render.py @@ -3,12 +3,13 @@ from enum import Enum from ...utils.config import youtube_settings, logging_settings from ...utils.string_processing import clean_song_title +from ...utils.enums import SourceType, ALL_SOURCE_TYPES + from ...objects import Source, DatabaseObject from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, Song, Album, Label, @@ -18,7 +19,7 @@ from ...objects import ( LOGGER = logging_settings["youtube_music_logger"] -SOURCE_PAGE = SourcePages.YOUTUBE_MUSIC +SOURCE_PAGE = ALL_SOURCE_TYPES.YOUTUBE class PageType(Enum): diff --git a/music_kraken/pages/youtube_music/super_youtube.py b/music_kraken/pages/youtube_music/super_youtube.py index 3ba1cee..df900a1 100644 --- a/music_kraken/pages/youtube_music/super_youtube.py +++ b/music_kraken/pages/youtube_music/super_youtube.py @@ -10,7 +10,6 @@ from ..abstract import Page from ...objects import ( Artist, Source, - SourcePages, Song, Album, Label, @@ -21,6 +20,7 @@ from ...objects import ( from ...connection import Connection from ...utils.support_classes.download_result import DownloadResult from ...utils.config import youtube_settings, logging_settings, main_settings +from ...utils.enums import SourceType, ALL_SOURCE_TYPES def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str: @@ -50,7 +50,7 @@ class YouTubeUrl: """ def __init__(self, url: str) -> None: - self.SOURCE_TYPE = SourcePages.YOUTUBE + self.SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE """ Raises Index exception for wrong url, and value error for not found enum type @@ -58,9 +58,6 @@ class YouTubeUrl: self.id = "" parsed = urlparse(url=url) - if parsed.netloc == "music.youtube.com": - self.SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC - self.url_type: YouTubeUrlType type_frag_list = parsed.path.split("/") @@ -124,8 +121,7 @@ class YouTubeUrl: class SuperYouTube(Page): # CHANGE - SOURCE_TYPE = SourcePages.YOUTUBE - LOGGER = logging_settings["youtube_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE NO_ADDITIONAL_DATA_FROM_SONG = False diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py index 9339c9f..5d9e55c 100644 --- a/music_kraken/pages/youtube_music/youtube_music.py +++ b/music_kraken/pages/youtube_music/youtube_music.py @@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path from ...utils import dump_to_file -from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork from ..abstract import Page from ...objects import ( - Artist, + DatabaseObject as DataObject, Source, - SourcePages, + FormattedText, + ID3Timestamp, + Artwork, + Artist, Song, Album, Label, Target, Lyrics, - FormattedText ) from ...connection import Connection +from ...utils.enums import SourceType, ALL_SOURCE_TYPES from ...utils.enums.album import AlbumType from ...utils.support_classes.download_result import DownloadResult @@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = { class YoutubeMusic(SuperYouTube): # CHANGE - SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC - LOGGER = logging_settings["youtube_music_logger"] + SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE def __init__(self, *args, ydl_opts: dict = None, **kwargs): self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection( @@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube): default='{}' )) or {} - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: + def get_source_type(self, source: Source) -> Optional[Type[DataObject]]: return super().get_source_type(source) - def general_search(self, search_query: str) -> List[DatabaseObject]: + def general_search(self, search_query: str) -> List[DataObject]: search_query = search_query.strip() urlescaped_query: str = quote(search_query.strip().replace(" ", "+")) @@ -619,7 +620,7 @@ class YoutubeMusic(SuperYouTube): Artist( name=name, source_list=[Source( - SourcePages.YOUTUBE_MUSIC, + self.SOURCE_TYPE, f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}" )] ) for name in artist_names] @@ -640,7 +641,7 @@ class YoutubeMusic(SuperYouTube): artwork=Artwork(*ydl_res.get("thumbnails", [])), main_artist_list=artist_list, source_list=[Source( - SourcePages.YOUTUBE_MUSIC, + self.SOURCE_TYPE, f"https://music.youtube.com/watch?v={ydl_res.get('id')}" ), source], ) diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py index b90f9aa..28f0b9f 100644 --- a/music_kraken/utils/enums/__init__.py +++ b/music_kraken/utils/enums/__init__.py @@ -1 +1,54 @@ -from .source import SourcePages \ No newline at end of file +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, TYPE_CHECKING, Type +if TYPE_CHECKING: + from ...pages.abstract import Page + +@dataclass +class SourceType: + name: str + homepage: Optional[str] = None + download_priority: int = 0 + + page_type: Type[Page] = None + page: Page = None + + def register_page(self, page: Page): + self.page = page + + def __hash__(self): + return hash(self.name) + + @property + def has_page(self) -> bool: + return self.page is not None + + # for backwards compatibility + @property + def value(self) -> str: + return self.name + + +class ALL_SOURCE_TYPES: + YOUTUBE = SourceType(name="youtube", homepage="https://music.youtube.com/") + BANDCAMP = SourceType(name="bandcamp", homepage="https://bandcamp.com/", download_priority=10) + MUSIFY = SourceType(name="musify", homepage="https://musify.club/", download_priority=7) + + GENIUS = SourceType(name="genius", homepage="https://genius.com/") + MUSICBRAINZ = SourceType(name="musicbrainz", homepage="https://musicbrainz.org/") + ENCYCLOPAEDIA_METALLUM = SourceType(name="encyclopaedia metallum") + DEEZER = SourceType(name="deezer", homepage="https://www.deezer.com/") + SPOTIFY = SourceType(name="spotify", homepage="https://open.spotify.com/") + + # This has nothing to do with audio, but bands can be here + WIKIPEDIA = SourceType(name="wikipedia", homepage="https://en.wikipedia.org/wiki/Main_Page") + INSTAGRAM = SourceType(name="instagram", homepage="https://www.instagram.com/") + FACEBOOK = SourceType(name="facebook", homepage="https://www.facebook.com/") + TWITTER = SourceType(name="twitter", homepage="https://twitter.com/") + # Yes somehow this ancient site is linked EVERYWHERE + MYSPACE = SourceType(name="myspace", homepage="https://myspace.com/") + + MANUAL = SourceType(name="manual") + + PRESET = SourceType(name="preset") diff --git a/music_kraken/utils/enums/source.py b/music_kraken/utils/enums/source.py deleted file mode 100644 index be3171f..0000000 --- a/music_kraken/utils/enums/source.py +++ /dev/null @@ -1,40 +0,0 @@ -from enum import Enum - - -class SourceTypes(Enum): - SONG = "song" - ALBUM = "album" - ARTIST = "artist" - LYRICS = "lyrics" - - -class SourcePages(Enum): - YOUTUBE = "youtube", "https://www.youtube.com/" - MUSIFY = "musify", "https://musify.club/" - YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/" - GENIUS = "genius", "https://genius.com/" - MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/" - ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" - BANDCAMP = "bandcamp", "https://bandcamp.com/" - DEEZER = "deezer", "https://www.deezer.com/" - SPOTIFY = "spotify", "https://open.spotify.com/" - - # This has nothing to do with audio, but bands can be here - WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page" - INSTAGRAM = "instagram", "https://www.instagram.com/" - FACEBOOK = "facebook", "https://www.facebook.com/" - TWITTER = "twitter", "https://twitter.com/" - MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE - - MANUAL = "manual", "" - - PRESET = "preset", "" - - def __new__(cls, value, homepage = None): - member = object.__new__(cls) - - member._value_ = value - member.homepage = homepage - - return member - \ No newline at end of file diff --git a/music_kraken/utils/exception/__init__.py b/music_kraken/utils/exception/__init__.py index 746fe78..8f139fb 100644 --- a/music_kraken/utils/exception/__init__.py +++ b/music_kraken/utils/exception/__init__.py @@ -4,8 +4,20 @@ class MKBaseException(Exception): super().__init__(message, **kwargs) +# Downloading +class MKDownloadException(MKBaseException): + pass + + +class MKMissingNameException(MKDownloadException): + pass + + +# Frontend class MKFrontendException(MKBaseException): pass class MKInvalidInputException(MKFrontendException): pass + +