From 14f986a497afef4d8f52ede1882fce7642822732 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 11:44:39 +0200 Subject: [PATCH] draft: rewrote sources --- music_kraken/cli/main_downloader.py | 2 - music_kraken/download/page_attributes.py | 9 --- music_kraken/objects/source.py | 85 +++++++++++++++++++----- music_kraken/pages/abstract.py | 2 - music_kraken/pages/youtube.py | 2 - 5 files changed, 67 insertions(+), 33 deletions(-) diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index 7140ff3..ac91cab 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -178,8 +178,6 @@ class Downloader: page_count = 0 for option in self.current_results.formatted_generator(): if isinstance(option, Option): - _downloadable = self.pages.is_downloadable(option.music_object) - r = f"{BColors.GREY.value}{option.index:0{self.option_digits}}{BColors.ENDC.value} {option.music_object.option_string}" print(r) else: diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 8d9bb42..6cef729 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -146,15 +146,6 @@ class Pages: return None return self.fetch_from_source(source=source) - - def is_downloadable(self, music_object: DataObject) -> bool: - _page_types = set(self._source_to_page) - for src in music_object.source_collection.source_pages: - if src in self._source_to_page: - _page_types.add(self._source_to_page[src]) - - audio_pages = self._audio_pages_set.intersection(_page_types) - return len(audio_pages) > 0 def _skip_object(self, data_object: DataObject) -> bool: if isinstance(data_object, Album): diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 0385bca..6258468 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -2,7 +2,7 @@ from __future__ import annotations from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator +from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator, TypedDict, Callable, Any from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property @@ -29,10 +29,6 @@ class Source: def __post_init__(self): self.referrer_page = self.referrer_page or self.source_type - - @property - def parsed_url(self) -> ParseResult: - return urlparse(self.url) @classmethod def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]: @@ -77,6 +73,18 @@ class Source: if url.startswith("https://myspace.com"): return cls(SourceType.MYSPACE, url, referrer_page=referrer_page) + @property + def has_page(self) -> bool: + return self.source_type.page is not None + + @property + def page(self) -> OuterProxy: + return self.source_type.page + + @property + def parsed_url(self) -> ParseResult: + return urlparse(self.url) + @property def hash_url(self) -> str: return hash_url(self.url) @@ -99,11 +107,17 @@ class Source: page_str = property(fget=lambda self: self.source_type.value) +class SourceTypeSorting(TypedDict): + sort_key: Callable[[SourceType], Any] + reverse: bool + only_with_page: bool + + class SourceCollection: __change_version__ = generate_id() _indexed_sources: Dict[str, Source] - _page_to_source_list: Dict[SourceType, List[Source]] + _sources_by_type: Dict[SourceType, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list = defaultdict(list) @@ -111,15 +125,54 @@ class SourceCollection: self.extend(data or []) - def has_source_page(self, *source_pages: SourceType) -> bool: - return any(source_page in self._page_to_source_list for source_page in source_pages) + def source_types( + self, + only_with_page: bool = False, + sort_key = lambda page: page.name, + reverse: bool = False + ) -> Iterable[SourceType]: + """ + Returns a list of all source types contained in this source collection. - def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: - if not len(source_pages): - source_pages = self.source_pages + Args: + only_with_page (bool, optional): If True, only returns source types that have a page, meaning you can download from them. + sort_key (function, optional): A function that defines the sorting key for the source types. Defaults to lambda page: page.name. + reverse (bool, optional): If True, sorts the source types in reverse order. Defaults to False. - for page in source_pages: - yield from self._page_to_source_list[page] + Returns: + Iterable[SourceType]: A list of source types. + """ + + source_types: List[SourceType] = self._page_to_source_list.keys() + if only_with_page: + source_types = filter(lambda st: st.has_page, source_types) + + return sorted( + source_types, + key=sort_key, + reverse=reverse + ) + + def get_sources(self, *source_types: List[SourceType], source_type_sorting: SourceTypeSorting = None) -> Generator[Source]: + """ + Retrieves sources based on the provided source types and source type sorting. + + Args: + *source_types (List[Source]): Variable number of source types to filter the sources. + source_type_sorting (SourceTypeSorting): Sorting criteria for the source types. This is only relevant if no source types are provided. + + Yields: + Generator[Source]: A generator that yields the sources based on the provided filters. + + Returns: + None + """ + if not len(source_types): + source_type_sorting = source_type_sorting or {} + source_types = self.source_types(**source_type_sorting) + + for source_type in source_types: + yield from self._page_to_source_list[source_type] def append(self, source: Source): if source is None: @@ -156,10 +209,6 @@ class SourceCollection: def __merge__(self, other: SourceCollection, **kwargs): self.extend(other) - @property - def source_pages(self) -> Iterable[SourceType]: - return sorted(self._page_to_source_list.keys(), key=lambda page: page.value) - @property def hash_url_list(self) -> List[str]: return [hash_url(source.url) for source in self.get_sources()] @@ -170,7 +219,7 @@ class SourceCollection: @property def homepage_list(self) -> List[str]: - return [source.homepage for source in self.source_pages] + return [source_type.homepage for source_type in self._sources_by_type.keys()] def indexing_values(self) -> Generator[Tuple[str, str], None, None]: for index in self._indexed_sources: diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py index 2e0ae49..763c110 100644 --- a/music_kraken/pages/abstract.py +++ b/music_kraken/pages/abstract.py @@ -246,8 +246,6 @@ class Page: else: output(f'- {target.file_path}', color=BColors.GREY) - if not song.source_collection.has_source_page(self.SOURCE_TYPE): - return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.") sources = song.source_collection.get_sources(self.SOURCE_TYPE) diff --git a/music_kraken/pages/youtube.py b/music_kraken/pages/youtube.py index 5dda132..5f65631 100644 --- a/music_kraken/pages/youtube.py +++ b/music_kraken/pages/youtube.py @@ -41,8 +41,6 @@ class YouTube(SuperYouTube): # CHANGE SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE - NO_ADDITIONAL_DATA_FROM_SONG = False - def __init__(self, *args, **kwargs): self.connection: Connection = Connection( host=get_invidious_url(),