3 Commits

Author SHA1 Message Date
aafbba3b1c feat: implemented consistent settings
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 14:36:19 +02:00
40e9366a0b feat: implemented the new page mechanics in the downloader 2024-05-23 14:32:31 +02:00
8255ad5264 feat: added detection to autoscann pages
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 14:24:20 +02:00
14 changed files with 86 additions and 104 deletions

View File

@@ -30,31 +30,9 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS
from ..pages import scan_for_pages, get_pages
ALL_PAGES: Set[Type[Page]] = {
# EncyclopaediaMetallum,
Genius,
Musify,
YoutubeMusic,
Bandcamp
}
if youtube_settings["use_youtube_alongside_youtube_music"]:
ALL_PAGES.add(YouTube)
AUDIO_PAGES: Set[Type[Page]] = {
Musify,
YouTube,
YoutubeMusic,
Bandcamp
}
SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
fetch_map = {
Song: "fetch_song",
Album: "fetch_album",
@@ -62,66 +40,28 @@ fetch_map = {
Label: "fetch_label",
}
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
ALL_PAGES = {DEBUGGING_PAGE}
AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES)
class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs):
self.LOGGER = logging.getLogger("download")
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
# initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourceType, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
if exclude_shady:
exclude_pages = exclude_pages.union(SHADY_PAGES)
if not exclude_pages.issubset(ALL_PAGES):
raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}")
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
return tuple(sorted(page_set, key=lambda page: page.__name__))
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set)
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages:
self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourceType) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
scan_for_pages(download_options=self.download_options, fetch_options=self.fetch_options, **kwargs)
def search(self, query: Query) -> SearchResults:
result = SearchResults()
for page_type in self.pages:
for page in get_pages():
result.add(
page=page_type,
search_result=self._page_instances[page_type].search(query=query)
page=type(page),
search_result=page.search(query=query)
)
return result
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object
source: Source
for source in data_object.source_collection.get_sources(source_type_sorting={
"only_with_page": True,
@@ -317,12 +257,10 @@ class Pages:
tmp.delete()
return r
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
def fetch_url(self, url: str, **kwargs) -> DataObject:
source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
if source is None:
if source is None or source.page is None:
raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.source_type]
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)
return source.page.fetch_object_from_source(source=source, **kwargs)

View File

@@ -1,8 +1,55 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify
from .youtube import YouTube
from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp
from .genius import Genius
from typing import Type, Generator, Set, Dict, List
from collections import defaultdict
from .abstract import Page, INDEPENDENT_DB_OBJECTS
from ._encyclopaedia_metallum import EncyclopaediaMetallum
from ._musify import Musify
from ._youtube import YouTube
from ._youtube_music import YoutubeMusic
from ._bandcamp import Bandcamp
from ._genius import Genius
from ._abstract import Page, INDEPENDENT_DB_OBJECTS
_registered_pages: Dict[Type[Page], Set[Page]] = defaultdict(set)
def get_pages(*page_types: List[Type[Page]]) -> Generator[Page, None, None]:
if len(page_types) == 0:
page_types = _registered_pages.keys()
for page_type in page_types:
yield from _registered_pages[page_type]
def register_page(page_type: Type[Page], **kwargs):
if page_type in _registered_pages:
return
_registered_pages[page_type].add(page_type(**kwargs))
def deregister_page(page_type: Type[Page]):
if page_type not in _registered_pages:
return
for p in _registered_pages[page_type]:
p.__del__()
del _registered_pages[page_type]
def scan_for_pages(**kwargs):
# assuming the wanted pages are the leaf classes of the interface
leaf_classes = []
_class_list = [Page]
while len(_class_list):
_class = _class_list.pop()
_class_subclasses = _class.__subclasses__()
if len(_class_subclasses) == 0:
if _class.REGISTER:
leaf_classes.append(_class)
else:
_class_list.extend(_class_subclasses)
for leaf_class in leaf_classes:
register_page(leaf_class, **kwargs)

View File

@@ -1,15 +1,19 @@
from __future__ import annotations
import logging
import random
import re
from copy import copy
from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict, TYPE_CHECKING
from string import Formatter
from dataclasses import dataclass, field
import requests
from bs4 import BeautifulSoup
if TYPE_CHECKING:
from ..download.page_attributes import DownloadOptions, FetchOptions
from ..connection import Connection
from ..objects import (
Song,
@@ -34,34 +38,25 @@ from ..utils import trace, output, BColors
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class Page:
REGISTER = True
SOURCE_TYPE: SourceType
LOGGER: logging.Logger
def __new__(cls, *args, **kwargs):
cls.LOGGER = logging.getLogger(cls.__name__)
return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs):
self.SOURCE_TYPE.register_page(self)
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def __del__(self):
self.SOURCE_TYPE.deregister_page()
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of

View File

@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
import pycountry
from ..objects import Source, DatabaseObject
from .abstract import Page
from ._abstract import Page
from ..objects import (
Artist,
Source,

View File

@@ -6,7 +6,7 @@ from urllib.parse import urlparse, urlencode
from ..connection import Connection
from ..utils.config import logging_settings
from .abstract import Page
from ._abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query
@@ -207,6 +207,7 @@ def create_grid(
class EncyclopaediaMetallum(Page):
REGISTER = False
SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"]

View File

@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
import pycountry
from ..objects import Source, DatabaseObject
from .abstract import Page
from ._abstract import Page
from ..objects import (
Artist,
Source,

View File

@@ -8,7 +8,7 @@ import pycountry
from bs4 import BeautifulSoup
from ..connection import Connection
from .abstract import Page
from ._abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import (

View File

@@ -5,7 +5,7 @@ from enum import Enum
import python_sponsorblock
from ..objects import Source, DatabaseObject, Song, Target
from .abstract import Page
from ._abstract import Page
from ..objects import (
Artist,
Source,
@@ -22,7 +22,7 @@ from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult
from ..utils.config import youtube_settings, main_settings, logging_settings
from .youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType
from ._youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType
"""
@@ -38,7 +38,7 @@ def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: s
class YouTube(SuperYouTube):
# CHANGE
REGISTER = youtube_settings["use_youtube_alongside_youtube_music"]
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
def __init__(self, *args, **kwargs):

View File

@@ -3,7 +3,6 @@ from enum import Enum
from ...utils.config import logging_settings
from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import (
Artist,
Source,

View File

@@ -6,7 +6,6 @@ from ...utils.string_processing import clean_song_title
from ...utils.enums import SourceType, ALL_SOURCE_TYPES
from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import (
Artist,
Source,

View File

@@ -6,7 +6,7 @@ import requests
import python_sponsorblock
from ...objects import Source, DatabaseObject, Song, Target
from ..abstract import Page
from .._abstract import Page
from ...objects import (
Artist,
Source,

View File

@@ -22,7 +22,7 @@ from ...utils import get_current_millis, traverse_json_path
from ...utils import dump_to_file
from ..abstract import Page
from .._abstract import Page
from ...objects import (
DatabaseObject as DataObject,
Source,

View File

@@ -17,6 +17,9 @@ class SourceType:
def register_page(self, page: Page):
self.page = page
def deregister_page(self):
self.page = None
def __hash__(self):
return hash(self.name)