1 Commits

Author SHA1 Message Date
8255ad5264 feat: added detection to autoscann pages
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 14:24:20 +02:00
16 changed files with 75 additions and 174 deletions

View File

@@ -30,7 +30,7 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Musicbrainz, Genius, INDEPENDENT_DB_OBJECTS from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = { ALL_PAGES: Set[Type[Page]] = {
@@ -38,8 +38,7 @@ ALL_PAGES: Set[Type[Page]] = {
Genius, Genius,
Musify, Musify,
YoutubeMusic, YoutubeMusic,
Bandcamp, Bandcamp
Musicbrainz
} }
if youtube_settings["use_youtube_alongside_youtube_music"]: if youtube_settings["use_youtube_alongside_youtube_music"]:

View File

@@ -1,9 +1,56 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum from typing import Type, Generator, Set, Dict, List
from .musify import Musify from collections import defaultdict
from .musicbrainz import Musicbrainz
from .youtube import YouTube
from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp
from .genius import Genius
from .abstract import Page, INDEPENDENT_DB_OBJECTS from ._encyclopaedia_metallum import EncyclopaediaMetallum
from ._musify import Musify
from ._youtube import YouTube
from ._youtube_music import YoutubeMusic
from ._bandcamp import Bandcamp
from ._genius import Genius
from ._abstract import Page, INDEPENDENT_DB_OBJECTS
_registered_pages: Dict[Type[Page], Set[Page]] = defaultdict(set)
def get_pages(*page_types: List[Type[Page]]) -> Generator[Page, None, None]:
if len(page_types) == 0:
page_types = _registered_pages.keys()
for page_type in page_types:
yield from _registered_pages[page_type]
def register_page(page_type: Type[Page], **kwargs):
if page_type in _registered_pages:
return
_registered_pages[page_type].add(page_type(**kwargs))
def deregister_page(page_type: Type[Page]):
if page_type not in _registered_pages:
return
for p in _registered_pages[page_type]:
p.__del__()
del _registered_pages[page_type]
def scan_for_pages():
# assuming the wanted pages are the leaf classes of the interface
leaf_classes = []
_class_list = [Page]
while len(_class_list):
_class = _class_list.pop()
_class_subclasses = _class.__subclasses__()
if len(_class_subclasses) == 0:
if _class.REGISTER:
leaf_classes.append(_class)
else:
_class_list.extend(_class_subclasses)
print(leaf_classes)
for leaf_class in leaf_classes:
register_page(leaf_class)

View File

@@ -48,12 +48,12 @@ class DownloadOptions:
process_metadata_if_found: bool = True process_metadata_if_found: bool = True
class Page: class Page:
REGISTER = True
SOURCE_TYPE: SourceType SOURCE_TYPE: SourceType
LOGGER: logging.Logger LOGGER: logging.Logger
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
cls.LOGGER = logging.getLogger(cls.__name__) cls.LOGGER = logging.getLogger(cls.__name__)
return super().__new__(cls) return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
@@ -62,6 +62,9 @@ class Page:
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def __del__(self):
self.SOURCE_TYPE.deregister_page(self)
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None): def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
""" """
Perform a regex search on the given string, using a single or a list of Perform a regex search on the given string, using a single or a list of

View File

@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
import pycountry import pycountry
from ..objects import Source, DatabaseObject from ..objects import Source, DatabaseObject
from .abstract import Page from ._abstract import Page
from ..objects import ( from ..objects import (
Artist, Artist,
Source, Source,

View File

@@ -6,7 +6,7 @@ from urllib.parse import urlparse, urlencode
from ..connection import Connection from ..connection import Connection
from ..utils.config import logging_settings from ..utils.config import logging_settings
from .abstract import Page from ._abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query from ..utils.support_classes.query import Query
@@ -207,6 +207,7 @@ def create_grid(
class EncyclopaediaMetallum(Page): class EncyclopaediaMetallum(Page):
REGISTER = False
SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"] LOGGER = logging_settings["metal_archives_logger"]

View File

@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
import pycountry import pycountry
from ..objects import Source, DatabaseObject from ..objects import Source, DatabaseObject
from .abstract import Page from ._abstract import Page
from ..objects import ( from ..objects import (
Artist, Artist,
Source, Source,
@@ -134,7 +134,7 @@ class Genius(Page):
source_list=[source], source_list=[source],
artist_list=[self.parse_api_object(data.get("artist"))], artist_list=[self.parse_api_object(data.get("artist"))],
artwork=artwork, artwork=artwork,
date=ID3Timestamp(**(data.get("release_date_components") or {})), date=ID3Timestamp(**data.get("release_date_components", {})),
) )
if object_type == "song": if object_type == "song":

View File

@@ -8,7 +8,7 @@ import pycountry
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ..connection import Connection from ..connection import Connection
from .abstract import Page from ._abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import ( from ..objects import (

View File

@@ -5,7 +5,7 @@ from enum import Enum
import python_sponsorblock import python_sponsorblock
from ..objects import Source, DatabaseObject, Song, Target from ..objects import Source, DatabaseObject, Song, Target
from .abstract import Page from ._abstract import Page
from ..objects import ( from ..objects import (
Artist, Artist,
Source, Source,
@@ -22,7 +22,7 @@ from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.download_result import DownloadResult
from ..utils.config import youtube_settings, main_settings, logging_settings from ..utils.config import youtube_settings, main_settings, logging_settings
from .youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType from ._youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType
""" """
@@ -38,7 +38,7 @@ def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: s
class YouTube(SuperYouTube): class YouTube(SuperYouTube):
# CHANGE REGISTER = youtube_settings["use_youtube_alongside_youtube_music"]
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View File

@@ -3,7 +3,6 @@ from enum import Enum
from ...utils.config import logging_settings from ...utils.config import logging_settings
from ...objects import Source, DatabaseObject from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import ( from ...objects import (
Artist, Artist,
Source, Source,

View File

@@ -6,7 +6,6 @@ from ...utils.string_processing import clean_song_title
from ...utils.enums import SourceType, ALL_SOURCE_TYPES from ...utils.enums import SourceType, ALL_SOURCE_TYPES
from ...objects import Source, DatabaseObject from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import ( from ...objects import (
Artist, Artist,
Source, Source,

View File

@@ -6,7 +6,7 @@ import requests
import python_sponsorblock import python_sponsorblock
from ...objects import Source, DatabaseObject, Song, Target from ...objects import Source, DatabaseObject, Song, Target
from ..abstract import Page from .._abstract import Page
from ...objects import ( from ...objects import (
Artist, Artist,
Source, Source,

View File

@@ -22,7 +22,7 @@ from ...utils import get_current_millis, traverse_json_path
from ...utils import dump_to_file from ...utils import dump_to_file
from ..abstract import Page from .._abstract import Page
from ...objects import ( from ...objects import (
DatabaseObject as DataObject, DatabaseObject as DataObject,
Source, Source,

View File

@@ -1,145 +0,0 @@
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Type, Union, Generator, Dict, Any
from urllib.parse import urlparse
import pycountry
import musicbrainzngs
from bs4 import BeautifulSoup
from ..connection import Connection
from .abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import (
Artist,
Source,
Song,
Album,
ID3Timestamp,
FormattedText,
Label,
Target,
DatabaseObject,
Lyrics,
Artwork
)
from ..utils.config import logging_settings, main_settings
from ..utils import string_processing, shared
from ..utils.string_processing import clean_song_title
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
class Musicbrainz(Page):
SOURCE_TYPE = ALL_SOURCE_TYPES.MUSICBRAINZ
HOST = "https://musicbrainz.org"
def __init__(self, *args, **kwargs):
musicbrainzngs.set_useragent("mk", "1")
super().__init__(*args, **kwargs)
def general_search(self, search_query: str) -> List[DatabaseObject]:
search_results = []
#Artist
search_results += self.artist_search(search_query).copy()
#Album
search_results += self.album_search(search_query).copy()
#Song
search_results += self.song_search(search_query).copy()
return search_results
def artist_search(self, search_query: str) -> List[Artist]:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list
))
return artist_list
def song_search(self, search_query: str) -> List[Song]:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))
return song_list
def album_search(self, search_query: str) -> List[Album]:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
return album_list
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album_list = []
#Album
album_dict_list: list = musicbrainzngs.search_release_groups(search_query)['release-group-list']
album_source_list: List[Source] = []
for album_dict in album_dict_list:
album_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/release-group/" + album_dict['id']))
album_list.append(Album(
title=album_dict['title'],
source_list=album_source_list
))
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist_list = []
#Artist
artist_dict_list: list = musicbrainzngs.search_artists(search_query)['artist-list']
artist_source_list: List[Source] = []
for artist_dict in artist_dict_list:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/artist/" + artist_dict['id']))
artist_list.append(Artist(
name=artist_dict['name'],
source_list=artist_source_list,
))
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song_list = []
#Song
song_dict_list: list = musicbrainzngs.search_recordings(search_query)['recording-list']
song_source_list: List[Source] = []
for song_dict in song_dict_list:
song_source_list.append(Source(self.SOURCE_TYPE, self.HOST + "/recording/" + song_dict['id']))
song_list.append(Song(
title=song_dict['title'],
source_list=song_source_list
))

View File

@@ -59,11 +59,6 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm
description="The logger for the musify scraper.", description="The logger for the musify scraper.",
default_value="musify" default_value="musify"
), ),
LoggerAttribute(
name="musicbrainz_logger",
description="The logger for the musicbrainz scraper.",
default_value="musicbrainz"
),
LoggerAttribute( LoggerAttribute(
name="youtube_logger", name="youtube_logger",
description="The logger for the youtube scraper.", description="The logger for the youtube scraper.",

View File

@@ -17,6 +17,9 @@ class SourceType:
def register_page(self, page: Page): def register_page(self, page: Page):
self.page = page self.page = page
def deregister_page(self):
self.page = None
def __hash__(self): def __hash__(self):
return hash(self.name) return hash(self.name)