From c306da793494475b006dd0fa26c652387a634e80 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 12 Jun 2024 14:18:52 +0200 Subject: [PATCH] feat: improved and documented the search functions --- music_kraken/download/__init__.py | 42 +++++++++++++++++++++++++++++-- music_kraken/utils/__init__.py | 6 +++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py index 7484eef..6ec70dd 100644 --- a/music_kraken/download/__init__.py +++ b/music_kraken/download/__init__.py @@ -19,7 +19,7 @@ from ..connection import Connection from ..objects import Album, Artist, Collection from ..objects import DatabaseObject as DataObject from ..objects import Label, Options, Song, Source, Target -from ..utils import BColors, output, trace +from ..utils import BColors, limit_generator, output, trace from ..utils.config import main_settings, youtube_settings from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.enums.album import AlbumType @@ -74,6 +74,8 @@ class Downloader: if auto_register_pages: self.scan_for_pages(**kwargs) + # manage which pages to use + def register_page(self, page_type: Type[Page], **kwargs): if page_type in self._registered_pages: return @@ -122,10 +124,46 @@ class Downloader: for page_type in page_types: yield from self._registered_pages[page_type] + # fetching/downloading data + def search(self, query: Query) -> Generator[DataObject, None, None]: + """Yields all data objects that were found by the query. + Other than `Downloader.search_yield_pages`, this function just yields all data objects. + This looses the data, where th objects were searched originally, so this might not be the best choice. + + Args: + query (Query): The query to search for. + + Yields: + Generator[DataObject, None, None]: A generator that yields all found data objects. + """ + for page in self.get_pages(): yield from page.search(query=query) - + + def search_yield_pages(self, query: Query, results_per_page: Optional[int] = None) -> Generator[Tuple[Page, Generator[DataObject, None, None]], None, None]: + """Yields all data objects that were found by the query, grouped by the page they were found on. + every yield is a tuple of the page and a generator that yields the data objects. + So this could be how it is used: + + ```python + for page, data_objects in downloader.search_yield_pages(query): + print(f"Found on {page}:") + for data_object in data_objects: + print(data_object) + ``` + + Args: + query (Query): The query to search for. + results_per_page (Optional[int], optional): If this is set, the generators only yield this amount of data objects per page. + + Yields: + Generator[Tuple[Page, Generator[DataObject, None, None]], None, None]: yields the page and a generator that yields the data objects. + """ + + for page in self.get_pages(): + yield page, limit_generator(page.search(query=query), limit=results_per_page) + def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: source: Source for source in data_object.source_collection.get_sources(source_type_sorting={ diff --git a/music_kraken/utils/__init__.py b/music_kraken/utils/__init__.py index 8e96fce..bc386a9 100644 --- a/music_kraken/utils/__init__.py +++ b/music_kraken/utils/__init__.py @@ -2,6 +2,7 @@ import inspect import json import logging from datetime import datetime +from itertools import takewhile from pathlib import Path from typing import List, Union @@ -127,3 +128,8 @@ def get_current_millis() -> int: def get_unix_time() -> int: return int(datetime.now().timestamp()) + + +def limit_generator(generator, limit: Optional[int] = None): + return takewhile(lambda x: x < limit, generator) if limit is not None else generator + \ No newline at end of file