feat: improved and documented the search functions
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-06-12 14:18:52 +02:00
parent 684c90a7b4
commit c306da7934
2 changed files with 46 additions and 2 deletions

View File

@ -19,7 +19,7 @@ from ..connection import Connection
from ..objects import Album, Artist, Collection from ..objects import Album, Artist, Collection
from ..objects import DatabaseObject as DataObject from ..objects import DatabaseObject as DataObject
from ..objects import Label, Options, Song, Source, Target from ..objects import Label, Options, Song, Source, Target
from ..utils import BColors, output, trace from ..utils import BColors, limit_generator, output, trace
from ..utils.config import main_settings, youtube_settings from ..utils.config import main_settings, youtube_settings
from ..utils.enums import ALL_SOURCE_TYPES, SourceType from ..utils.enums import ALL_SOURCE_TYPES, SourceType
from ..utils.enums.album import AlbumType from ..utils.enums.album import AlbumType
@ -74,6 +74,8 @@ class Downloader:
if auto_register_pages: if auto_register_pages:
self.scan_for_pages(**kwargs) self.scan_for_pages(**kwargs)
# manage which pages to use
def register_page(self, page_type: Type[Page], **kwargs): def register_page(self, page_type: Type[Page], **kwargs):
if page_type in self._registered_pages: if page_type in self._registered_pages:
return return
@ -122,10 +124,46 @@ class Downloader:
for page_type in page_types: for page_type in page_types:
yield from self._registered_pages[page_type] yield from self._registered_pages[page_type]
# fetching/downloading data
def search(self, query: Query) -> Generator[DataObject, None, None]: def search(self, query: Query) -> Generator[DataObject, None, None]:
"""Yields all data objects that were found by the query.
Other than `Downloader.search_yield_pages`, this function just yields all data objects.
This looses the data, where th objects were searched originally, so this might not be the best choice.
Args:
query (Query): The query to search for.
Yields:
Generator[DataObject, None, None]: A generator that yields all found data objects.
"""
for page in self.get_pages(): for page in self.get_pages():
yield from page.search(query=query) yield from page.search(query=query)
def search_yield_pages(self, query: Query, results_per_page: Optional[int] = None) -> Generator[Tuple[Page, Generator[DataObject, None, None]], None, None]:
"""Yields all data objects that were found by the query, grouped by the page they were found on.
every yield is a tuple of the page and a generator that yields the data objects.
So this could be how it is used:
```python
for page, data_objects in downloader.search_yield_pages(query):
print(f"Found on {page}:")
for data_object in data_objects:
print(data_object)
```
Args:
query (Query): The query to search for.
results_per_page (Optional[int], optional): If this is set, the generators only yield this amount of data objects per page.
Yields:
Generator[Tuple[Page, Generator[DataObject, None, None]], None, None]: yields the page and a generator that yields the data objects.
"""
for page in self.get_pages():
yield page, limit_generator(page.search(query=query), limit=results_per_page)
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject:
source: Source source: Source
for source in data_object.source_collection.get_sources(source_type_sorting={ for source in data_object.source_collection.get_sources(source_type_sorting={

View File

@ -2,6 +2,7 @@ import inspect
import json import json
import logging import logging
from datetime import datetime from datetime import datetime
from itertools import takewhile
from pathlib import Path from pathlib import Path
from typing import List, Union from typing import List, Union
@ -127,3 +128,8 @@ def get_current_millis() -> int:
def get_unix_time() -> int: def get_unix_time() -> int:
return int(datetime.now().timestamp()) return int(datetime.now().timestamp())
def limit_generator(generator, limit: Optional[int] = None):
return takewhile(lambda x: x < limit, generator) if limit is not None else generator