Compare commits

..

2 Commits

Author SHA1 Message Date
aafbba3b1c feat: implemented consistent settings
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 14:36:19 +02:00
40e9366a0b feat: implemented the new page mechanics in the downloader 2024-05-23 14:32:31 +02:00
3 changed files with 18 additions and 89 deletions

View File

@ -30,31 +30,9 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS from ..pages import scan_for_pages, get_pages
ALL_PAGES: Set[Type[Page]] = {
# EncyclopaediaMetallum,
Genius,
Musify,
YoutubeMusic,
Bandcamp
}
if youtube_settings["use_youtube_alongside_youtube_music"]:
ALL_PAGES.add(YouTube)
AUDIO_PAGES: Set[Type[Page]] = {
Musify,
YouTube,
YoutubeMusic,
Bandcamp
}
SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
fetch_map = { fetch_map = {
Song: "fetch_song", Song: "fetch_song",
Album: "fetch_album", Album: "fetch_album",
@ -62,66 +40,28 @@ fetch_map = {
Label: "fetch_label", Label: "fetch_label",
} }
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
ALL_PAGES = {DEBUGGING_PAGE}
AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES)
class Pages: class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs):
self.LOGGER = logging.getLogger("download") self.LOGGER = logging.getLogger("download")
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
# initialize all page instances scan_for_pages(download_options=self.download_options, fetch_options=self.fetch_options, **kwargs)
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourceType, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
if exclude_shady:
exclude_pages = exclude_pages.union(SHADY_PAGES)
if not exclude_pages.issubset(ALL_PAGES):
raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}")
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
return tuple(sorted(page_set, key=lambda page: page.__name__))
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set)
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages:
self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourceType) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
def search(self, query: Query) -> SearchResults: def search(self, query: Query) -> SearchResults:
result = SearchResults() result = SearchResults()
for page_type in self.pages: for page in get_pages():
result.add( result.add(
page=page_type, page=type(page),
search_result=self._page_instances[page_type].search(query=query) search_result=page.search(query=query)
) )
return result return result
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object
source: Source source: Source
for source in data_object.source_collection.get_sources(source_type_sorting={ for source in data_object.source_collection.get_sources(source_type_sorting={
"only_with_page": True, "only_with_page": True,
@ -317,12 +257,10 @@ class Pages:
tmp.delete() tmp.delete()
return r return r
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: def fetch_url(self, url: str, **kwargs) -> DataObject:
source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
if source is None: if source is None or source.page is None:
raise UrlNotFoundException(url=url) raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.source_type] return source.page.fetch_object_from_source(source=source, **kwargs)
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)

View File

@ -36,7 +36,7 @@ def deregister_page(page_type: Type[Page]):
p.__del__() p.__del__()
del _registered_pages[page_type] del _registered_pages[page_type]
def scan_for_pages(): def scan_for_pages(**kwargs):
# assuming the wanted pages are the leaf classes of the interface # assuming the wanted pages are the leaf classes of the interface
leaf_classes = [] leaf_classes = []
@ -51,6 +51,5 @@ def scan_for_pages():
else: else:
_class_list.extend(_class_subclasses) _class_list.extend(_class_subclasses)
print(leaf_classes)
for leaf_class in leaf_classes: for leaf_class in leaf_classes:
register_page(leaf_class) register_page(leaf_class, **kwargs)

View File

@ -1,15 +1,19 @@
from __future__ import annotations
import logging import logging
import random import random
import re import re
from copy import copy from copy import copy
from pathlib import Path from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict, TYPE_CHECKING
from string import Formatter from string import Formatter
from dataclasses import dataclass, field from dataclasses import dataclass, field
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
if TYPE_CHECKING:
from ..download.page_attributes import DownloadOptions, FetchOptions
from ..connection import Connection from ..connection import Connection
from ..objects import ( from ..objects import (
Song, Song,
@ -34,18 +38,6 @@ from ..utils import trace, output, BColors
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]] INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class Page: class Page:
REGISTER = True REGISTER = True
@ -56,14 +48,14 @@ class Page:
cls.LOGGER = logging.getLogger(cls.__name__) cls.LOGGER = logging.getLogger(cls.__name__)
return super().__new__(cls) return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs):
self.SOURCE_TYPE.register_page(self) self.SOURCE_TYPE.register_page(self)
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def __del__(self): def __del__(self):
self.SOURCE_TYPE.deregister_page(self) self.SOURCE_TYPE.deregister_page()
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None): def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
""" """