diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..24c2088 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + }, + { + "name": "Python Debugger: Download script", + "type": "debugpy", + "request": "launch", + "program": "development/actual_donwload.py", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index d8e7fc7..ace0f19 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -19,6 +19,7 @@ "albumsort", "APIC", "Bandcamp", + "bitrate", "dotenv", "encyclopaedia", "ENDC", @@ -27,6 +28,7 @@ "metallum", "musify", "OKBLUE", + "OKGREEN", "pathvalidate", "Referer", "sponsorblock", diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 548e228..748bf1b 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a Crystal F", - "10", - "2", + "s: #a Psychonaut 4", + "d: 0", ] diff --git a/music_kraken/audio/codec.py b/music_kraken/audio/codec.py index 9ec70e6..ccb7bb1 100644 --- a/music_kraken/audio/codec.py +++ b/music_kraken/audio/codec.py @@ -47,7 +47,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au # run the ffmpeg command with a progressbar ff = FfmpegProgress(ffmpeg_command) - with tqdm(total=100, desc=f"removing {len(interval_list)} segments") as pbar: + with tqdm(total=100, desc=f"processing") as pbar: for progress in ff.run_command_with_progress(): pbar.update(progress-pbar.n) diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index f9321b4..7140ff3 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -166,9 +166,9 @@ class Downloader: self.genre = genre or get_genre() self.process_metadata_anyway = process_metadata_anyway - print() - print(f"Downloading to: \"{self.genre}\"") - print() + output() + output(f"Downloading to: \"{self.genre}\"", color=BColors.HEADER) + output() def print_current_options(self): self.page_dict = dict() @@ -312,10 +312,8 @@ class Downloader: def download(self, data_objects: List[DatabaseObject], **kwargs) -> bool: output() - if len(data_objects) == 1: - output(f"Downloading {data_objects[0].option_string}...", color=BColors.BOLD) - else: - output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n") + if len(data_objects) > 1: + output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n") _result_map: Dict[DatabaseObject, DownloadResult] = dict() diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index ebd7423..f0b678c 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -118,7 +118,7 @@ class Pages: audio_pages = self._audio_pages_set.intersection(_page_types) for download_page in audio_pages: - return self._page_instances[download_page].download(music_object=music_object, genre=genre, download_all=download_all, process_metadata_anyway=process_metadata_anyway) + return self._page_instances[download_page].download(music_object=music_object, genre=genre) return DownloadResult(error_message=f"No audio source has been found for {music_object}.") diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 9fd9f90..b058630 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -153,6 +153,8 @@ class Collection(Generic[T]): if other is None: return + if not other._inner._has_data: + return if other.id in self._indexed_from_id: return diff --git a/music_kraken/objects/formatted_text.py b/music_kraken/objects/formatted_text.py index b1891b6..8f9fc52 100644 --- a/music_kraken/objects/formatted_text.py +++ b/music_kraken/objects/formatted_text.py @@ -32,7 +32,7 @@ class FormattedText: if self.is_empty and other.is_empty: return True - return self.doc == other.doc + return self.html == other.html @property def markdown(self) -> str: diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index b4f867a..725e2c9 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -30,6 +30,8 @@ class InnerData: _refers_to_instances: set = None _is_in_collection: set = None + + _has_data: bool = False """ Attribute versions keep track, of if the attribute has been changed. """ @@ -48,9 +50,19 @@ class InnerData: for key, value in kwargs.items(): if hasattr(value, "__is_collection__"): value._collection_for[self] = key - + self.__setattr__(key, value) + if self._has_data: + continue + + def __setattr__(self, key: str, value): + if self._has_data or not hasattr(self, "_default_values"): + return super().__setattr__(key, value) + + super().__setattr__("_has_data", not (key in self._default_values and self._default_values[key] == value)) + return super().__setattr__(key, value) + def __hash__(self): return self.id diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 33f68a0..beccb2d 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -111,7 +111,7 @@ class Song(Base): "album_collection": Collection, "feature_artist_collection": Collection, - "title": lambda: "", + "title": lambda: None, "unified_title": lambda: None, "isrc": lambda: None, "genre": lambda: None, diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index e322048..8d6dba9 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -3,8 +3,9 @@ import random import re from copy import copy from pathlib import Path -from typing import Optional, Union, Type, Dict, Set, List, Tuple +from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict from string import Formatter +from dataclasses import dataclass, field import requests from bs4 import BeautifulSoup @@ -28,11 +29,23 @@ from ..utils.config import main_settings from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult from ..utils.string_processing import fit_to_file_system -from ..utils import trace +from ..utils import trace, output, BColors INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]] +@dataclass +class FetchOptions: + download_all: bool = False + album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + +@dataclass +class DownloadOptions: + download_all: bool = False + album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"])) + + process_audio_if_found: bool = False + process_metadata_if_found: bool = True class NamingDict(dict): CUSTOM_KEYS: Dict[str, str] = { @@ -101,6 +114,10 @@ class Page: # set this to true, if all song details can also be fetched by fetching album details NO_ADDITIONAL_DATA_FROM_SONG = False + def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): + self.download_options: DownloadOptions = download_options or DownloadOptions() + self.fetch_options: FetchOptions = fetch_options or FetchOptions() + def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None): """ Perform a regex search on the given string, using a single or a list of @@ -176,7 +193,6 @@ class Page: self, music_object: DatabaseObject, stop_at_level: int = 1, - post_process: bool = True ) -> DatabaseObject: """ when a music object with lacking data is passed in, it returns @@ -208,7 +224,6 @@ class Page: source=source, enforce_type=type(music_object), stop_at_level=stop_at_level, - post_process=False, type_string=type(music_object).__name__, entity_string=music_object.option_string, ) @@ -230,7 +245,6 @@ class Page: source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, - post_process: bool = True, type_string: str = "", entity_string: str = "", ) -> Optional[DatabaseObject]: @@ -268,7 +282,7 @@ class Page: for sub_element in collection: sub_element.merge( - self.fetch_details(sub_element, stop_at_level=stop_at_level - 1, post_process=False)) + self.fetch_details(sub_element, stop_at_level=stop_at_level - 1)) return music_object @@ -288,8 +302,6 @@ class Page: self, music_object: DatabaseObject, genre: str, - download_all: bool = False, - process_metadata_anyway: bool = True ) -> DownloadResult: naming_dict: NamingDict = NamingDict({"genre": genre}) @@ -308,25 +320,22 @@ class Page: fill_naming_objects(music_object) - return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway) + return self._download(music_object, naming_dict) def _download( self, music_object: DatabaseObject, naming_dict: NamingDict, - download_all: bool = False, - skip_details: bool = False, - process_metadata_anyway: bool = True + **kwargs ) -> DownloadResult: - trace(f"downloading {type(music_object).__name__} [{music_object.option_string}]") - skip_next_details = skip_details + if isinstance(music_object, Song): + output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD) + else: + output(f"Downloading {music_object.option_string}...", color=BColors.BOLD) # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False if isinstance(music_object, Album): - if self.NO_ADDITIONAL_DATA_FROM_SONG: - skip_next_details = True - - if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]: + if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist: return DownloadResult() if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG): @@ -338,7 +347,7 @@ class Page: naming_dict.add_object(music_object) if isinstance(music_object, Song): - return self._download_song(music_object, naming_dict, process_metadata_anyway=process_metadata_anyway) + return self._download_song(music_object, naming_dict) download_result: DownloadResult = DownloadResult() @@ -347,13 +356,11 @@ class Page: sub_ordered_music_object: DatabaseObject for sub_ordered_music_object in collection: - download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all, - skip_details=skip_next_details, - process_metadata_anyway=process_metadata_anyway)) + download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy())) return download_result - def _download_song(self, song: Song, naming_dict: NamingDict, process_metadata_anyway: bool = True): + def _download_song(self, song: Song, naming_dict: NamingDict): if "genre" not in naming_dict and song.genre is not None: naming_dict["genre"] = song.genre @@ -373,40 +380,33 @@ class Page: if song.target_collection.empty: song.target_collection.append(new_target) + r = DownloadResult(1) + temp_target: Target = Target.temp() + + found_on_disc = False + target: Target + for target in song.target_collection: + current_exists = target.exists + + if current_exists: + output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) + target.copy_content(temp_target) + found_on_disc = True + + r.found_on_disk += 1 + r.add_target(target) + else: + output(f'- {target.file_path}', color=BColors.GREY) + if not song.source_collection.has_source_page(self.SOURCE_TYPE): return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.") sources = song.source_collection.get_sources(self.SOURCE_TYPE) - temp_target: Target = Target( - relative_to_music_dir=False, - file_path=Path( - main_settings["temp_directory"], - str(song.id) - ) - ) - - r = DownloadResult(1) - - found_on_disc = False - target: Target - for target in song.target_collection: - if target.exists: - if process_metadata_anyway: - target.copy_content(temp_target) - found_on_disc = True - - r.found_on_disk += 1 - r.add_target(target) - - if found_on_disc and not process_metadata_anyway: - self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.") - return r - skip_intervals = [] if not found_on_disc: for source in sources: - r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string) + r = self.download_song_to_target(source=source, target=temp_target, desc="downloading") if not r.is_fatal_error: skip_intervals = self.get_skip_intervals(song, source) @@ -417,16 +417,19 @@ class Page: song=song, temp_target=temp_target, interval_list=skip_intervals, + found_on_disc=found_on_disc, )) return r - def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult: - correct_codec(temp_target, interval_list=interval_list) + def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult: + if not found_on_disc or self.download_options.process_audio_if_found: + correct_codec(temp_target, interval_list=interval_list) self.post_process_hook(song, temp_target) - write_metadata_to_target(song.metadata, temp_target, song) + if not found_on_disc or self.download_options.process_metadata_if_found: + write_metadata_to_target(song.metadata, temp_target, song) r = DownloadResult() diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index dcfebbf..fb446d0 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -22,6 +22,7 @@ from ..objects import ( Artwork, ) from ..connection import Connection +from ..utils import dump_to_file from ..utils.support_classes.download_result import DownloadResult from ..utils.string_processing import clean_song_title from ..utils.config import main_settings, logging_settings diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index ebcb8e6..59d01b8 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -418,6 +418,10 @@ class Musify(Page): href = artist_soup["href"] if href is not None: + href_parts = href.split("/") + if len(href_parts) <= 1 or href_parts[-2] != "artist": + return + artist_src_list.append(Source(self.SOURCE_TYPE, self.HOST + href)) name_elem: BeautifulSoup = artist_soup.find("span", {"itemprop": "name"}) @@ -681,17 +685,20 @@ class Musify(Page): anchor: BeautifulSoup = artist_crumb.find("a") if anchor is not None: href = anchor.get("href") - artist_source_list: List[Source] = [] - if href is not None: - artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip())) + href_parts = href.split("/") + if not(len(href_parts) <= 1 or href_parts[-2] != "artist"): + artist_source_list: List[Source] = [] - span: BeautifulSoup = anchor.find("span") - if span is not None: - artist_list.append(Artist( - name=span.get_text(strip=True), - source_list=artist_source_list - )) + if href is not None: + artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip())) + + span: BeautifulSoup = anchor.find("span") + if span is not None: + artist_list.append(Artist( + name=span.get_text(strip=True), + source_list=artist_source_list + )) else: self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case") @@ -938,10 +945,10 @@ class Musify(Page): album_status_id = album_card.get("data-type") if album_status_id.isdigit(): album_status_id = int(album_status_id) - album_type = ALBUM_TYPE_MAP[album_status_id] + album_kwargs["album_type"] = ALBUM_TYPE_MAP[album_status_id] if album_status_id == 5: - album_status = AlbumStatus.BOOTLEG + album_kwargs["album_status"] = AlbumStatus.BOOTLEG def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False): nonlocal album_kwargs @@ -1037,7 +1044,7 @@ class Musify(Page): for card_soup in soup.find_all("div", {"class": "card"}): album = self._parse_album_card(card_soup, artist_name, **kwargs) - if album.album_type in _album_type_blacklist: + if not self.fetch_options.download_all and album.album_type in self.fetch_options.album_type_blacklist: continue artist.main_album_collection.append(album) diff --git a/music_kraken/pages/youtube.py b/music_kraken/pages/youtube.py index 73b92ad..afc5501 100644 --- a/music_kraken/pages/youtube.py +++ b/music_kraken/pages/youtube.py @@ -42,7 +42,7 @@ class YouTube(SuperYouTube): SOURCE_TYPE = SourcePages.YOUTUBE LOGGER = logging_settings["youtube_logger"] - NO_ADDITIONAL_DATA_FROM_SONG = True + NO_ADDITIONAL_DATA_FROM_SONG = False def __init__(self, *args, **kwargs): self.connection: Connection = Connection( diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 8f671f9..5f87876 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -12,7 +12,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"): __stage__ = os.getenv("STAGE", "prod") -DEBUG = (__stage__ == "dev") and True +DEBUG = (__stage__ == "dev") and False DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True DEBUG_OBJECT_TRACE = DEBUG and False diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 22ae63e..b76e3fc 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -116,10 +116,13 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> # Remove artist from the start of the title if raw_song_title.lower().startswith(artist_name.lower()): - raw_song_title = raw_song_title[len(artist_name):].strip() - if raw_song_title.startswith("-"): - raw_song_title = raw_song_title[1:].strip() + possible_new_name = raw_song_title[len(artist_name):].strip() + + for char in ("-", "–", ":", "|"): + if possible_new_name.startswith(char): + raw_song_title = possible_new_name[1:].strip() + break return raw_song_title.strip() diff --git a/pyproject.toml b/pyproject.toml index 9c8232b..b01e131 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "toml~=0.10.2", "typing_extensions~=4.7.1", - "python-sponsorblock~=0.0.0", + "python-sponsorblock~=0.0.dev1", "youtube_dl", ] dynamic = [