From bdf05adcb1fa87c24bf1d245e08dfa46fc24b799 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 09:43:45 +0200 Subject: [PATCH 01/11] cleaned up download --- src/music_kraken/__init__.py | 2 +- .../pages/download_center/multiple_options.py | 100 ++++++++++ .../pages/download_center/search.py | 174 +++++------------- 3 files changed, 146 insertions(+), 130 deletions(-) create mode 100644 src/music_kraken/pages/download_center/multiple_options.py diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index 6f46644..587d714 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -72,7 +72,7 @@ def cli(): print("The given url couldn't be downloaded") return - page = search.get_page_from_query(parsed) + page = search._get_page_from_query(parsed) if page is not None: search.choose_page(page) return diff --git a/src/music_kraken/pages/download_center/multiple_options.py b/src/music_kraken/pages/download_center/multiple_options.py new file mode 100644 index 0000000..8326a66 --- /dev/null +++ b/src/music_kraken/pages/download_center/multiple_options.py @@ -0,0 +1,100 @@ +from collections import defaultdict +from typing import Tuple, List, Dict, Type + +from . import page_attributes +from ..abstract import Page +from ...objects import Options, DatabaseObject, Source + + +class MultiPageOptions: + def __init__( + self, + max_displayed_options: int = 10, + option_digits: int = 3, + derived_from: DatabaseObject = None + ) -> None: + self.max_displayed_options = max_displayed_options + self.option_digits: int = option_digits + + self._length = 0 + self._current_option_dict: Dict[Type[Page], Options] = defaultdict(lambda: Options()) + + self._derive_from = derived_from + + def __getitem__(self, key: Type[Page]): + return self._current_option_dict[key] + + def __setitem__(self, key: Type[Page], value: Options): + self._current_option_dict[key] = value + + self._length = 0 + for key in self._current_option_dict: + self._length += 1 + + def __len__(self) -> int: + return self._length + + def get_page_str(self, page: Type[Page]) -> str: + page_name_fill = "-" + max_page_len = 21 + + return f"({page_attributes.PAGE_NAME_MAP[page]}) ------------------------{page.__name__:{page_name_fill}<{max_page_len}}------------" + + def string_from_all_pages(self) -> str: + if self._length == 1: + for key in self._current_option_dict: + return self.string_from_single_page(key) + + lines: List[str] = [] + + j = 0 + for page, options in self._current_option_dict.items(): + lines.append(self.get_page_str(page)) + + i = -1 + + option_obj: DatabaseObject + for i, option_obj in enumerate(options): + if i >= self.max_displayed_options: + lines.append("...") + break + + lines.append(f"{j + i:0{self.option_digits}} {option_obj.option_string}") + + j += i + 1 + + return "\n".join(lines) + + def choose_from_all_pages(self, index: int) -> Tuple[DatabaseObject, Type[Page]]: + if self._length == 1: + for key in self._current_option_dict: + return self.choose_from_single_page(key, index), key + + sum_of_length = 0 + for page, options in self._current_option_dict.items(): + option_len = min((len(options), self.max_displayed_options)) + + index_of_list = index - sum_of_length + + if index_of_list < option_len: + return options[index_of_list], page + + sum_of_length += option_len + + raise IndexError("index is out of range") + + def string_from_single_page(self, page: Type[Page]) -> str: + lines: List[str] = [self.get_page_str(page)] + + option_obj: DatabaseObject + for i, option_obj in enumerate(self._current_option_dict[page]): + lines.append(f"{i:0{self.option_digits}} {option_obj.option_string}") + + return "\n".join(lines) + + def choose_from_single_page(self, page: Type[Page], index: int) -> DatabaseObject: + return self._current_option_dict[page][index] + + def __repr__(self) -> str: + return self.string_from_all_pages() + \ No newline at end of file diff --git a/src/music_kraken/pages/download_center/search.py b/src/music_kraken/pages/download_center/search.py index cf0867d..9717838 100644 --- a/src/music_kraken/pages/download_center/search.py +++ b/src/music_kraken/pages/download_center/search.py @@ -3,117 +3,10 @@ from typing import Tuple, List, Set, Dict, Type, Union, Optional from . import page_attributes from .download import Download +from .multiple_options import MultiPageOptions from ..abstract import Page from ...objects import Options, DatabaseObject, Source - - -class MultiPageOptions: - def __init__( - self, - max_displayed_options: int = 10, - option_digits: int = 3, - database_object: DatabaseObject = None, - page: Type[Page] = None - ) -> None: - self.max_displayed_options = max_displayed_options - self.option_digits: int = option_digits - - self._length = 0 - self._current_option_dict: Dict[Type[Page], Options] = defaultdict(lambda: Options()) - - self.database_object = database_object - self.page = page - - if database_object is not None and page is not None: - self[page] = database_object.options - - def __getitem__(self, key: Type[Page]): - return self._current_option_dict[key] - - def __setitem__(self, key: Type[Page], value: Options): - self._current_option_dict[key] = value - - self._length = 0 - for key in self._current_option_dict: - self._length += 1 - - def __len__(self) -> int: - return self._length - - def get_page_str(self, page: Type[Page]) -> str: - page_name_fill = "-" - max_page_len = 21 - - return f"({page_attributes.PAGE_NAME_MAP[page]}) ------------------------{page.__name__:{page_name_fill}<{max_page_len}}------------" - - def string_from_all_pages(self) -> str: - if self._length == 1: - for key in self._current_option_dict: - return self.string_from_single_page(key) - - lines: List[str] = [] - - j = 0 - for page, options in self._current_option_dict.items(): - lines.append(self.get_page_str(page)) - - i = -1 - - option_obj: DatabaseObject - for i, option_obj in enumerate(options): - if i >= self.max_displayed_options: - lines.append("...") - break - - lines.append(f"{j + i:0{self.option_digits}} {option_obj.option_string}") - - j += i + 1 - - return "\n".join(lines) - - def choose_from_all_pages(self, index: int) -> Tuple[DatabaseObject, Type[Page]]: - if self._length == 1: - for key in self._current_option_dict: - return self.choose_from_single_page(key, index), key - - sum_of_length = 0 - for page, options in self._current_option_dict.items(): - option_len = min((len(options), self.max_displayed_options)) - - index_of_list = index - sum_of_length - - if index_of_list < option_len: - return options[index_of_list], page - - sum_of_length += option_len - - raise IndexError("index is out of range") - - def string_from_single_page(self, page: Type[Page]) -> str: - lines: List[str] = [self.get_page_str(page)] - - option_obj: DatabaseObject - for i, option_obj in enumerate(self._current_option_dict[page]): - lines.append(f"{i:0{self.option_digits}} {option_obj.option_string}") - - return "\n".join(lines) - - def choose_from_single_page(self, page: Type[Page], index: int) -> DatabaseObject: - return self._current_option_dict[page][index] - - def __repr__(self) -> str: - return self.string_from_all_pages() - - def download(self) -> bool: - if self._length != 1: - return False - - if self.database_object is None or self.page is None: - return False - - self.page.download(self.database_object) - - return True +from ...utils.shared import DOWNLOAD_LOGGER as LOGGER class Search(Download): @@ -141,32 +34,23 @@ class Search(Download): def __repr__(self): return self._current_option.__repr__() - - @property - def next_options(self) -> MultiPageOptions: - mpo = MultiPageOptions( - max_displayed_options=self.max_displayed_options, - option_digits=self.option_digits - ) - self._option_history.append(mpo) - self._current_option = mpo - return mpo - def next_options_from_music_obj(self, database_obj: DatabaseObject, page: Type[Page]) -> MultiPageOptions: + def next_options(self, derive_from: DatabaseObject = None) -> MultiPageOptions: mpo = MultiPageOptions( max_displayed_options=self.max_displayed_options, option_digits=self.option_digits, - database_object=database_obj, - page=page + derived_from=derive_from ) + self._option_history.append(mpo) self._current_option = mpo + return mpo - @property def _previous_options(self) -> MultiPageOptions: self._option_history.pop() self._current_option = self._option_history[-1] + return self._option_history[-1] def search(self, query: str): @@ -177,40 +61,62 @@ class Search(Download): the letter behind it defines the *type* of parameter, followed by a space "#a Psychonaut 4 #r Tired, Numb and #t Drop by Drop" if no # is in the query it gets treated as "unspecified query" + + doesn't set derived_from thus, + can't download right after """ for page in self.pages: self._current_option[page] = page.search_by_query(query=query) def choose_page(self, page: Type[Page]): + """ + doesn't set derived_from thus, + can't download right after + """ + if page not in page_attributes.ALL_PAGES: raise ValueError(f"Page \"{page.__name__}\" does not exist in page_attributes.ALL_PAGES") prev_mpo = self._current_option - mpo = self.next_options + mpo = self.next_options() mpo[page] = prev_mpo[page] - def get_page_from_query(self, query: str) -> Optional[Type[Page]]: + def _get_page_from_query(self, query: str) -> Optional[Type[Page]]: + """ + query can be for example: + "a" or "EncyclopaediaMetallum" to choose a page + """ + page = page_attributes.NAME_PAGE_MAP.get(query.lower().strip()) if page in self.pages: return page + + def _get_page_from_source(self, source: Source) -> Optional[Type[Page]]: + return page_attributes.SOURCE_PAGE_MAP.get(source.page_enum) def choose_index(self, index: int): db_object, page = self._current_option.choose_from_all_pages(index=index) music_object = self.fetch_details(db_object) - mpo = self.next_options_from_music_obj(music_object, page) + mpo[page] = music_object.options + def goto_previous(self): try: - self._current_option = self._previous_options + self._previous_options() except IndexError: pass def search_url(self, url: str) -> bool: + """ + sets derived_from, thus + can download directly after + """ + source = Source.match_url(url=url) if source is None: return False @@ -220,10 +126,20 @@ class Search(Download): return False page = page_attributes.SOURCE_PAGE_MAP[source.page_enum] - mpo = self.next_options + mpo = self.next_options(derive_from=new_object) mpo[page] = new_object.options return True def download_chosen(self) -> bool: - return self._current_option.download() + if self._current_option._derive_from is None: + LOGGER.warning(f"can't download from an non choosen stuff") + return False + + source: Source + for source in self._current_option._derive_from.source_collection: + page = self._get_page_from_source(source=source) + + if page in self.audio_pages: + return page.download(music_object=self._current_option._derive_from) + From 376bbf2fa26b983ec85010fb58fd355ecd65154a Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 09:45:44 +0200 Subject: [PATCH 02/11] Update search.py --- src/music_kraken/pages/download_center/search.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/music_kraken/pages/download_center/search.py b/src/music_kraken/pages/download_center/search.py index 9717838..5a84a77 100644 --- a/src/music_kraken/pages/download_center/search.py +++ b/src/music_kraken/pages/download_center/search.py @@ -143,3 +143,5 @@ class Search(Download): if page in self.audio_pages: return page.download(music_object=self._current_option._derive_from) + return False + From c05199d928261b1a09b2aec1c45aae496891118e Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 10:38:12 +0200 Subject: [PATCH 03/11] added better default targets --- src/music_kraken/pages/abstract.py | 97 +++++++++++++++++++++++------- src/music_kraken/pages/musify.py | 13 +++- src/music_kraken/utils/shared.py | 47 +++++++++------ 3 files changed, 113 insertions(+), 44 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 1cb8421..15055a4 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -3,6 +3,8 @@ from typing import Optional, Union, Type, Dict, List from bs4 import BeautifulSoup import requests import logging +from dataclasses import dataclass +from copy import copy from ..utils import shared from ..objects import ( @@ -19,9 +21,34 @@ from ..objects import ( Label ) from ..tagging import write_metadata_to_target +from ..utils.shared import DOWNLOAD_PATH, DOWNLOAD_FILE, DEFAULT_VALUES LOGGER = logging.getLogger("this shouldn't be used") +@dataclass +class DefaultTarget: + genre: str = DEFAULT_VALUES["genre"] + label: str = DEFAULT_VALUES["label"] + artist: str = DEFAULT_VALUES["artist"] + album: str = DEFAULT_VALUES["album"] + song: str = DEFAULT_VALUES["song"] + + def __setattr__(self, __name: str, __value: str) -> None: + if __name in DEFAULT_VALUES: + if self.__getattribute__(__name) == DEFAULT_VALUES[__name]: + super().__setattr__(__name, __value) + return + + super().__setattr__(__name, __value) + + @property + def target(self) -> Target: + return Target( + relative_to_music_dir=True, + path=DOWNLOAD_PATH.format(genre=self.genre, label=self.label, artist=self.artist, album=self.album, song=self.song), + file=DOWNLOAD_FILE.format(genre=self.genre, label=self.label, artist=self.artist, album=self.album, song=self.song) + ) + class Page: """ @@ -283,56 +310,80 @@ class Page: cls._clean_collection(song.main_artist_collection, collections) @classmethod - def download(cls, music_object: Union[Song, Album, Artist, Label], download_features: bool = True): - print("downloading") - print(music_object) + def download( + cls, + music_object: Union[Song, Album, Artist, Label], + download_features: bool = True, + default_target: DefaultTarget = None + ): + if default_target is None: + default_target = DefaultTarget() + if type(music_object) is Song: - return cls.download_song(music_object) + return cls.download_song(music_object, default_target) if type(music_object) is Album: - return cls.download_album(music_object) + return cls.download_album(music_object, default_target) if type(music_object) is Artist: - return cls.download_artist(music_object, download_features=download_features) + return cls.download_artist(music_object, default_target) if type(music_object) is Label: - return cls.download_label(music_object, download_features=download_features) + return cls.download_label(music_object, download_features=download_features, default_target=default_target) @classmethod - def download_label(cls, label: Label, download_features: bool = True, override_existing: bool = False): + def download_label(cls, label: Label, download_features: bool = True, override_existing: bool = False, default_target: DefaultTarget = None): + if default_target is None: + default_target = DefaultTarget() + else: + default_target = copy(default_target) + default_target.label = label.name + cls.fetch_details(label) for artist in label.current_artist_collection: - cls.download_artist(artist, download_features=download_features, override_existing=override_existing) + cls.download_artist(artist, download_features=download_features, override_existing=override_existing, default_target=default_target) for album in label.album_collection: - cls.download_album(album, override_existing=override_existing) + cls.download_album(album, override_existing=override_existing, default_target=default_target) @classmethod - def download_artist(cls, artist: Artist, download_features: bool = True, override_existing: bool = False): + def download_artist(cls, artist: Artist, download_features: bool = True, override_existing: bool = False, default_target: DefaultTarget = None): + if default_target is None: + default_target = DefaultTarget() + else: + default_target = copy(default_target) + default_target.artist = artist.name + cls.fetch_details(artist) for album in artist.main_album_collection: - cls.download_album(album, override_existing=override_existing) + cls.download_album(album, override_existing=override_existing, default_target=default_target) if download_features: for song in artist.feature_album.song_collection: - cls.download_song(song, override_existing=override_existing) + cls.download_song(song, override_existing=override_existing, default_target=default_target) @classmethod - def download_album(cls, album: Album, override_existing: bool = False): + def download_album(cls, album: Album, override_existing: bool = False, default_target: DefaultTarget = None): + if default_target is None: + default_target = DefaultTarget() + else: + default_target = copy(default_target) + default_target.album = album.title + cls.fetch_details(album) for song in album.song_collection: - cls.download_song(song, override_existing=override_existing) + cls.download_song(song, override_existing=override_existing, default_target=default_target) @classmethod - def download_song(cls, song: Song, override_existing: bool = False, create_target_on_demand: bool = True): + def download_song(cls, song: Song, override_existing: bool = False, create_target_on_demand: bool = True, default_target: DefaultTarget = None): + if default_target is None: + default_target = DefaultTarget() + else: + default_target = copy(default_target) + default_target.song = song.title + cls.fetch_details(song) if song.target_collection.empty: if create_target_on_demand and not song.main_artist_collection.empty and not song.album_collection.empty: - song.target_collection.append( - Target( - file=f"{song.title}.mp3", - relative_to_music_dir=True, - path=f"{song.main_artist_collection[0].name}/{song.album_collection[0].title}" - ) - ) + song.target_collection.append(default_target.target) else: return diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 0f8d71a..5a81d11 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -10,8 +10,7 @@ from pathlib import Path import random from ..utils.shared import ( - ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER, - TEMP_FOLDER + ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER ) from .abstract import Page @@ -864,7 +863,7 @@ class Musify(Page): :param source: :return: """ - album = Album(title="Hi :)") + album = Album(title="Hi :)", source_list=[source]) url = cls.parse_url(source.url) @@ -881,6 +880,14 @@ class Musify(Page): card_soup: BeautifulSoup for card_soup in cards_soup.find_all("div", {"class": "playlist__item"}): album.song_collection.append(cls.parse_song_card(card_soup)) + + if stop_at_level > 1: + song: Song + for song in album.song_collection: + sources = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE) + for source in sources: + song.merge(cls._fetch_song_from_source(source=source)) + album.update_tracksort() return album diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index f8ea941..ecd8edc 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -4,18 +4,13 @@ import tempfile import os import configparser from sys import platform as current_os +from pathlib import Path -TEMP_FOLDER = "music-downloader" LOG_FILE = "download_logs.log" TEMP_DATABASE_FILE = "metadata.db" -DATABASE_STRUCTURE_FILE = "database_structure.sql" -DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql" -TEMP_DIR = os.path.join(tempfile.gettempdir(), TEMP_FOLDER) -if not os.path.exists(TEMP_DIR): - os.mkdir(TEMP_DIR) - -TEMP_DATABASE_PATH = os.path.join(TEMP_DIR, TEMP_DATABASE_FILE) +TEMP_DIR = Path(tempfile.gettempdir(), "music-downloader") +TEMP_DIR.mkdir(exist_ok=True) # configure logger default logging.basicConfig( @@ -33,18 +28,17 @@ INIT_PATH_LOGGER = logging.getLogger("init_path") DATABASE_LOGGER = logging.getLogger("database") METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata") URL_DOWNLOAD_LOGGER = logging.getLogger("AudioSource") +TAGGING_LOGGER = logging.getLogger("tagging") YOUTUBE_LOGGER = logging.getLogger("Youtube") MUSIFY_LOGGER = logging.getLogger("Musify") PATH_LOGGER = logging.getLogger("create-paths") DOWNLOAD_LOGGER = logging.getLogger("download") LYRICS_LOGGER = logging.getLogger("lyrics") GENIUS_LOGGER = logging.getLogger("genius") -TAGGING_LOGGER = logging.getLogger("tagging") - ENCYCLOPAEDIA_METALLUM_LOGGER = logging.getLogger("ma") NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea" -MUSIC_DIR = os.path.join(os.path.expanduser("~"), "Music") +MUSIC_DIR = Path(os.path.expanduser("~"), "Music") if current_os == "linux": # XDG_USER_DIRS_FILE reference: https://freedesktop.org/wiki/Software/xdg-user-dirs/ @@ -58,17 +52,34 @@ if current_os == "linux": config.read_string(data) xdg_config = config['XDG_USER_DIRS'] MUSIC_DIR = os.path.expandvars(xdg_config['xdg_music_dir'].strip('"')) + except (FileNotFoundError, KeyError) as E: - logger.warning(f''' -Missing file or No entry found for "xdg_music_dir" in: \'{XDG_USER_DIRS_FILE}\'. -Will fallback on default '$HOME/Music'. ----- - ''') + logger.warning( + f"Missing file or No entry found for \"xdg_music_dir\" in: \"{XDG_USER_DIRS_FILE}\".\n" \ + f"Will fallback on default \"$HOME/Music\"." + ) + TOR = False proxies = { 'http': 'socks5h://127.0.0.1:9150', 'https': 'socks5h://127.0.0.1:9150' } if TOR else {} -# only the sources here will get downloaded, in the order the list is ordered -AUDIO_SOURCES = ["Musify", "Youtube"] + +""" +available variables: +- genre +- label +- artist +- album +- song +""" +DOWNLOAD_PATH = "{genre}/{artist}/{album}" +DOWNLOAD_FILE = "{song}.mp3" +DEFAULT_VALUES = { + "genre": "Various Genre", + "label": "Various Labels", + "artist": "Various Artists", + "album": "Various Album", + "song": "Various Song", +} From 66bd99b52487d376f4f942bcfe948bcdf23ff0f0 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 10:39:16 +0200 Subject: [PATCH 04/11] reformating --- src/music_kraken/pages/musify.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 5a81d11..579bbba 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -855,9 +855,9 @@ class Musify(Page): eg. 'https://musify.club/release/linkin-park-hybrid-theory-2000-188' /html/musify/album_overview.html - [] tracklist - [] attributes - [] ratings + - [x] tracklist + - [ ] attributes + - [ ] ratings :param stop_at_level: :param source: From c501461b1ef57f137fa56087ef09b486807f873d Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 11:17:55 +0200 Subject: [PATCH 05/11] escaping strings --- src/music_kraken/pages/abstract.py | 3 ++- src/music_kraken/utils/string_processing.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 15055a4..ebb2f17 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -22,6 +22,7 @@ from ..objects import ( ) from ..tagging import write_metadata_to_target from ..utils.shared import DOWNLOAD_PATH, DOWNLOAD_FILE, DEFAULT_VALUES +from ..utils.string_processing import fit_to_file_system LOGGER = logging.getLogger("this shouldn't be used") @@ -36,7 +37,7 @@ class DefaultTarget: def __setattr__(self, __name: str, __value: str) -> None: if __name in DEFAULT_VALUES: if self.__getattribute__(__name) == DEFAULT_VALUES[__name]: - super().__setattr__(__name, __value) + super().__setattr__(__name, fit_to_file_system(__value)) return super().__setattr__(__name, __value) diff --git a/src/music_kraken/utils/string_processing.py b/src/music_kraken/utils/string_processing.py index 46edb8c..0268d3e 100644 --- a/src/music_kraken/utils/string_processing.py +++ b/src/music_kraken/utils/string_processing.py @@ -6,4 +6,17 @@ def unify(string: str) -> str: """ return string.lower() + +def fit_to_file_system(string: str) -> str: + string = string.strip() + + while string[0] == ".": + if len(string) == 0: + return string + + string = string[1:] + + string = string.replace("/", "|").replace("\\", "|") + + return string \ No newline at end of file From 45b5f0875a8fa35a7689a50a2ee1b6c79e033bad Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 11:47:30 +0200 Subject: [PATCH 06/11] fixed the download directory --- src/music_kraken/pages/abstract.py | 18 ++++++++++++++++-- .../pages/download_center/search.py | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index ebb2f17..028c195 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -85,7 +85,7 @@ class Page: LOGGER.warning("to many tries. Aborting.") return None - return cls.get_request(url, accepted_response_codes, trie + 1) + return cls.get_request(url=url, stream=stream, accepted_response_codes=accepted_response_codes, trie=trie + 1) @classmethod def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ @@ -109,7 +109,7 @@ class Page: LOGGER.warning("to many tries. Aborting.") return None - return cls.post_request(url, json, accepted_response_codes, trie + 1) + return cls.post_request(url=url, json=json, accepted_response_codes=accepted_response_codes, trie=trie + 1) @classmethod def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup: @@ -351,6 +351,8 @@ class Page: else: default_target = copy(default_target) default_target.artist = artist.name + if not artist.label_collection.empty: + default_target.label = artist.label_collection[0].name cls.fetch_details(artist) for album in artist.main_album_collection: @@ -367,6 +369,10 @@ class Page: else: default_target = copy(default_target) default_target.album = album.title + if not album.artist_collection.empty: + default_target.artist = album.artist_collection[0].name + if not album.label_collection.empty: + default_target.label = album.label_collection[0].name cls.fetch_details(album) for song in album.song_collection: @@ -379,6 +385,14 @@ class Page: else: default_target = copy(default_target) default_target.song = song.title + if not song.album_collection.empty: + default_target.album = song.album_collection[0].title + if not song.main_artist_collection.empty: + artist: Artist = song.main_artist_collection[0] + default_target.artist = artist.name + + if not artist.label_collection.empty: + default_target.label = artist.label_collection[0].name cls.fetch_details(song) diff --git a/src/music_kraken/pages/download_center/search.py b/src/music_kraken/pages/download_center/search.py index 5a84a77..65835a4 100644 --- a/src/music_kraken/pages/download_center/search.py +++ b/src/music_kraken/pages/download_center/search.py @@ -101,7 +101,7 @@ class Search(Download): db_object, page = self._current_option.choose_from_all_pages(index=index) music_object = self.fetch_details(db_object) - mpo = self.next_options_from_music_obj(music_object, page) + mpo = self.next_options(derive_from=music_object) mpo[page] = music_object.options From 041612e08353c8e092d252eac45f68492e4c313b Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 11:47:51 +0200 Subject: [PATCH 07/11] dsada --- src/music_kraken/pages/abstract.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 028c195..51b0fcb 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -419,8 +419,6 @@ class Page: if len(sources) == 0: return - print("great") - temp_target: Target = Target( path=shared.TEMP_DIR, file=str(random.randint(0, 999999)) From 77eef63fac3f34b6fc627e40c81022c163f60c09 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 12:14:58 +0200 Subject: [PATCH 08/11] fixed issue with crash on connection exception --- src/music_kraken/objects/target.py | 18 ++++++++++++++---- src/music_kraken/pages/abstract.py | 27 ++++++++++++++++++--------- src/music_kraken/pages/musify.py | 6 ++---- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index b83b577..6d3162d 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -63,17 +63,25 @@ class Target(DatabaseObject): with open(copy_to.file_path, "wb") as write_to: write_to.write(read_from.read()) - def stream_into(self, r: requests.Response): + def stream_into(self, r: requests.Response) -> bool: + if r is None: + return False + self.create_path() chunk_size = 1024 total_size = int(r.headers.get('content-length')) initial_pos = 0 - with open(self.file_path,'wb') as f: - for chunk in r.iter_content(chunk_size=chunk_size): - size = f.write(chunk) + with open(self.file_path,'wb') as f: + try: + for chunk in r.iter_content(chunk_size=chunk_size): + size = f.write(chunk) + except requests.exceptions.Timeout: + shared.DOWNLOAD_LOGGER.error("Stream timed out.") + return False + """ # doesn't work yet due to # https://github.com/tqdm/tqdm/issues/261 @@ -85,3 +93,5 @@ class Target(DatabaseObject): size = f.write(chunk) pbar.update(size) """ + + return True diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 51b0fcb..cba14bf 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -60,6 +60,7 @@ class Page: API_SESSION.proxies = shared.proxies TIMEOUT = 5 TRIES = 5 + LOGGER = LOGGER SOURCE_TYPE: SourcePages @@ -78,11 +79,11 @@ class Page: return r if not retry: - LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})") - LOGGER.debug(r.content) + cls.LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})") + cls.LOGGER.debug(r.content) if trie >= cls.TRIES: - LOGGER.warning("to many tries. Aborting.") + cls.LOGGER.warning("to many tries. Aborting.") return None return cls.get_request(url=url, stream=stream, accepted_response_codes=accepted_response_codes, trie=trie + 1) @@ -102,11 +103,11 @@ class Page: return r if not retry: - LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at POST:{url}. ({trie}-{cls.TRIES})") - LOGGER.debug(r.content) + cls.LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at POST:{url}. ({trie}-{cls.TRIES})") + cls.LOGGER.debug(r.content) if trie >= cls.TRIES: - LOGGER.warning("to many tries. Aborting.") + cls.LOGGER.warning("to many tries. Aborting.") return None return cls.post_request(url=url, json=json, accepted_response_codes=accepted_response_codes, trie=trie + 1) @@ -414,18 +415,26 @@ class Page: continue existing_target.copy_content(target) + return True sources = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE) if len(sources) == 0: - return + return False temp_target: Target = Target( path=shared.TEMP_DIR, file=str(random.randint(0, 999999)) ) - cls._download_song_to_targets(source=sources[0], target=temp_target) - cls._post_process_targets(song, temp_target) + success = True + + if not cls._download_song_to_targets(source=sources[0], target=temp_target): + success = False + + if not cls._post_process_targets(song, temp_target): + success = False + + return success @classmethod def _post_process_targets(cls, song: Song, temp_target: Target): diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 579bbba..84515e8 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -9,10 +9,6 @@ from dataclasses import dataclass from pathlib import Path import random -from ..utils.shared import ( - ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER -) - from .abstract import Page from ..objects import ( DatabaseObject, @@ -88,6 +84,8 @@ class Musify(Page): HOST = "https://musify.club" SOURCE_TYPE = SourcePages.MUSIFY + + LOGGER = LOGGER @classmethod def parse_url(cls, url: str) -> MusifyUrl: From a20f7ae59e53ca6b5d03ce7546f67262312e28bb Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 12:38:09 +0200 Subject: [PATCH 09/11] stuff --- src/music_kraken/pages/abstract.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index cba14bf..fd908f9 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -317,7 +317,7 @@ class Page: music_object: Union[Song, Album, Artist, Label], download_features: bool = True, default_target: DefaultTarget = None - ): + ) -> bool: if default_target is None: default_target = DefaultTarget() @@ -330,6 +330,8 @@ class Page: if type(music_object) is Label: return cls.download_label(music_object, download_features=download_features, default_target=default_target) + return False + @classmethod def download_label(cls, label: Label, download_features: bool = True, override_existing: bool = False, default_target: DefaultTarget = None): if default_target is None: @@ -376,6 +378,8 @@ class Page: default_target.label = album.label_collection[0].name cls.fetch_details(album) + + album.update_tracksort() for song in album.song_collection: cls.download_song(song, override_existing=override_existing, default_target=default_target) From f58685af58809fe8dd1409cfad395f6e7688cbe9 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 16:23:30 +0200 Subject: [PATCH 10/11] fixed really disgusting bug --- src/music_kraken/objects/collection.py | 14 ++++++++++---- src/music_kraken/pages/abstract.py | 14 +++++++------- src/music_kraken/pages/musify.py | 5 +++-- src/musify_search.py | 6 +++++- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 3f30908..3a35e0a 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -9,6 +9,7 @@ from .parents import DatabaseObject class AppendResult: was_in_collection: bool current_element: DatabaseObject + was_the_same: bool class Collection: @@ -81,13 +82,18 @@ class Collection: # if the element type has been defined in the initializer it checks if the type matches if self.element_type is not None and not isinstance(element, self.element_type): raise TypeError(f"{type(element)} is not the set type {self.element_type}") + + # return if the same instance of the object is in the list + for existing in self._data: + if element is existing: + return AppendResult(True, element, True) for name, value in element.indexing_values: if value in self._attribute_to_object_map[name]: existing_object = self._attribute_to_object_map[name][value] if not merge_on_conflict: - return AppendResult(True, existing_object) + return AppendResult(True, existing_object, False) # if the object does already exist # thus merging and don't add it afterwards @@ -95,7 +101,7 @@ class Collection: existing_object.merge(element) # in case any relevant data has been added (e.g. it remaps the old object) self.map_element(existing_object) - return AppendResult(True, existing_object) + return AppendResult(True, existing_object, False) element.merge(existing_object) @@ -104,12 +110,12 @@ class Collection: self.unmap_element(existing_object) self.map_element(element) - return AppendResult(True, existing_object) + return AppendResult(True, existing_object, False) self._data.append(element) self.map_element(element) - return AppendResult(False, element) + return AppendResult(False, element, False) def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True, merge_into_existing: bool = True): diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index fd908f9..176ac71 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -220,9 +220,11 @@ class Page: Album: Collection(element_type=Album), Song: Collection(element_type=Song) } - + cls._clean_music_object(new_music_object, collections) + print(collections[Album]) + music_object.merge(new_music_object) music_object.compile(merge_into=True) @@ -280,13 +282,11 @@ class Page: return for i, element in enumerate(collection): - r = collection_dict[collection.element_type].append(element) - if not r.was_in_collection: - cls._clean_music_object(r.current_element, collection_dict) - continue - + r = collection_dict[collection.element_type].append(element, merge_into_existing=True) collection[i] = r.current_element - cls._clean_music_object(r.current_element, collection_dict) + + if not r.was_the_same: + cls._clean_music_object(r.current_element, collection_dict) @classmethod def _clean_label(cls, label: Label, collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]): diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 84515e8..a820770 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -565,10 +565,11 @@ class Musify(Page): for card_soup in soup.find_all("div", {"class": "card"}): new_album: Album = cls.parse_album_card(card_soup, artist_name) album_source: Source + if stop_at_level > 1: for album_source in new_album.source_collection.get_sources_from_page(cls.SOURCE_TYPE): new_album.merge(cls._fetch_album_from_source(album_source, stop_at_level=stop_at_level-1)) - + discography.append(new_album) return discography @@ -726,7 +727,7 @@ class Musify(Page): discography: List[Album] = cls.get_discography(url, artist.name) artist.main_album_collection.extend(discography) - + return artist @classmethod diff --git a/src/musify_search.py b/src/musify_search.py index a90c209..51459dd 100644 --- a/src/musify_search.py +++ b/src/musify_search.py @@ -11,6 +11,10 @@ def fetch_artist(): artist = objects.Artist( source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] ) + + artist = objects.Artist( + source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/ghost-bath-280348/")] + ) artist = Musify.fetch_details(artist) print(artist.options) @@ -33,4 +37,4 @@ def fetch_album(): print(artist.id, artist.name) if __name__ == "__main__": - search() + fetch_artist() From 310bf3194d525758121bc3d2d026e3db7e4472d0 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Mon, 3 Apr 2023 16:24:45 +0200 Subject: [PATCH 11/11] removed debug print --- src/music_kraken/pages/abstract.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 176ac71..87f2027 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -223,8 +223,6 @@ class Page: cls._clean_music_object(new_music_object, collections) - print(collections[Album]) - music_object.merge(new_music_object) music_object.compile(merge_into=True)