17 Commits

Author SHA1 Message Date
aafbba3b1c feat: implemented consistent settings
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 14:36:19 +02:00
40e9366a0b feat: implemented the new page mechanics in the downloader 2024-05-23 14:32:31 +02:00
8255ad5264 feat: added detection to autoscann pages
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 14:24:20 +02:00
2aa0f02fa5 Merge branch 'adding_genius' into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 13:36:10 +02:00
344da0a0bf fix: converting pictures to rgb before saving
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-22 15:20:26 +02:00
49dc7093c8 fix: genius fallback
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-22 15:18:43 +02:00
90f70638b4 feat: better lyrics support
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 17:55:08 +02:00
7b4eee858a feat: parsed script json
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 17:14:58 +02:00
f61b34dd40 feat: improved feature artists by also adding writer and producer to it
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:52:01 +02:00
688b4fd357 feat: getting the album tracklist
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:47:38 +02:00
769d27dc5c feat: album details
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:43:52 +02:00
f5d953d9ce feat: theoretically fetching feature songs
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:34:04 +02:00
46b64b8f8d feat: fetched the flat artist details 2024-05-21 16:23:05 +02:00
adfce16d2a feat: fetched the flat artist details 2024-05-21 16:21:58 +02:00
e4fd9faf12 feat: detecting url type 2024-05-21 15:57:09 +02:00
f6caee41a8 feat: finished searching genious
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 15:52:41 +02:00
068c749c38 feat: implemented artist search 2024-05-21 15:27:10 +02:00
21 changed files with 406 additions and 109 deletions

View File

@@ -27,6 +27,7 @@
"Gitea", "Gitea",
"iframe", "iframe",
"isrc", "isrc",
"itemprop",
"levenshtein", "levenshtein",
"metallum", "metallum",
"MUSICBRAINZ", "MUSICBRAINZ",

View File

@@ -6,9 +6,10 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__": if __name__ == "__main__":
commands = [ commands = [
"s: #a I'm in a coffin", "s: #a Crystal F",
"0", "10",
"d: 0", "1",
"3",
] ]

View File

@@ -93,6 +93,10 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song):
# resize the image to the preferred resolution # resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
img.save(converted_target.file_path, "JPEG") img.save(converted_target.file_path, "JPEG")
# https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t # https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t

View File

@@ -137,13 +137,13 @@ class Cache:
) )
self._write_attribute(cache_attribute) self._write_attribute(cache_attribute)
cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True) cache_path = fit_to_file_system(Path(module_path, name.replace("/", "_")), hidden_ok=True)
with cache_path.open("wb") as content_file: with cache_path.open("wb") as content_file:
self.logger.debug(f"writing cache to {cache_path}") self.logger.debug(f"writing cache to {cache_path}")
content_file.write(content) content_file.write(content)
def get(self, name: str) -> Optional[CacheResult]: def get(self, name: str) -> Optional[CacheResult]:
path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True) path = fit_to_file_system(Path(self._dir, self.module, name.replace("/", "_")), hidden_ok=True)
if not path.is_file(): if not path.is_file():
return None return None
@@ -166,7 +166,7 @@ class Cache:
if ca.name == "": if ca.name == "":
continue continue
file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True) file = fit_to_file_system(Path(self._dir, ca.module, ca.name.replace("/", "_")), hidden_ok=True)
if not ca.is_valid: if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}") self.logger.debug(f"deleting cache {ca.id}")

View File

@@ -30,30 +30,9 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, INDEPENDENT_DB_OBJECTS from ..pages import scan_for_pages, get_pages
ALL_PAGES: Set[Type[Page]] = {
# EncyclopaediaMetallum,
Musify,
YoutubeMusic,
Bandcamp
}
if youtube_settings["use_youtube_alongside_youtube_music"]:
ALL_PAGES.add(YouTube)
AUDIO_PAGES: Set[Type[Page]] = {
Musify,
YouTube,
YoutubeMusic,
Bandcamp
}
SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
fetch_map = { fetch_map = {
Song: "fetch_song", Song: "fetch_song",
Album: "fetch_album", Album: "fetch_album",
@@ -61,66 +40,28 @@ fetch_map = {
Label: "fetch_label", Label: "fetch_label",
} }
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
ALL_PAGES = {DEBUGGING_PAGE}
AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES)
class Pages: class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs):
self.LOGGER = logging.getLogger("download") self.LOGGER = logging.getLogger("download")
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
# initialize all page instances scan_for_pages(download_options=self.download_options, fetch_options=self.fetch_options, **kwargs)
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourceType, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
if exclude_shady:
exclude_pages = exclude_pages.union(SHADY_PAGES)
if not exclude_pages.issubset(ALL_PAGES):
raise ValueError(f"The excluded pages have to be a subset of all pages: {exclude_pages} | {ALL_PAGES}")
def _set_to_tuple(page_set: Set[Type[Page]]) -> Tuple[Type[Page], ...]:
return tuple(sorted(page_set, key=lambda page: page.__name__))
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set)
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages:
self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourceType) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
def search(self, query: Query) -> SearchResults: def search(self, query: Query) -> SearchResults:
result = SearchResults() result = SearchResults()
for page_type in self.pages: for page in get_pages():
result.add( result.add(
page=page_type, page=type(page),
search_result=self._page_instances[page_type].search(query=query) search_result=page.search(query=query)
) )
return result return result
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject: def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object
source: Source source: Source
for source in data_object.source_collection.get_sources(source_type_sorting={ for source in data_object.source_collection.get_sources(source_type_sorting={
"only_with_page": True, "only_with_page": True,
@@ -316,12 +257,10 @@ class Pages:
tmp.delete() tmp.delete()
return r return r
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]: def fetch_url(self, url: str, **kwargs) -> DataObject:
source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL) source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
if source is None: if source is None or source.page is None:
raise UrlNotFoundException(url=url) raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.source_type] return source.page.fetch_object_from_source(source=source, **kwargs)
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)

View File

@@ -59,4 +59,6 @@ class Artwork:
self._variant_mapping[key] = value self._variant_mapping[key] = value
def __eq__(self, other: Artwork) -> bool: def __eq__(self, other: Artwork) -> bool:
if not isinstance(other, Artwork):
return False
return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys()))

View File

@@ -38,11 +38,19 @@ class FormattedText:
def markdown(self) -> str: def markdown(self) -> str:
return md(self.html).strip() return md(self.html).strip()
@markdown.setter
def markdown(self, value: str) -> None:
self.html = mistune.markdown(value)
@property @property
def plain(self) -> str: def plain(self) -> str:
md = self.markdown md = self.markdown
return md.replace("\n\n", "\n") return md.replace("\n\n", "\n")
@plain.setter
def plain(self, value: str) -> None:
self.html = mistune.markdown(plain_to_markdown(value))
def __str__(self) -> str: def __str__(self) -> str:
return self.markdown return self.markdown

View File

@@ -1,7 +1,55 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum from typing import Type, Generator, Set, Dict, List
from .musify import Musify from collections import defaultdict
from .youtube import YouTube
from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp
from .abstract import Page, INDEPENDENT_DB_OBJECTS from ._encyclopaedia_metallum import EncyclopaediaMetallum
from ._musify import Musify
from ._youtube import YouTube
from ._youtube_music import YoutubeMusic
from ._bandcamp import Bandcamp
from ._genius import Genius
from ._abstract import Page, INDEPENDENT_DB_OBJECTS
_registered_pages: Dict[Type[Page], Set[Page]] = defaultdict(set)
def get_pages(*page_types: List[Type[Page]]) -> Generator[Page, None, None]:
if len(page_types) == 0:
page_types = _registered_pages.keys()
for page_type in page_types:
yield from _registered_pages[page_type]
def register_page(page_type: Type[Page], **kwargs):
if page_type in _registered_pages:
return
_registered_pages[page_type].add(page_type(**kwargs))
def deregister_page(page_type: Type[Page]):
if page_type not in _registered_pages:
return
for p in _registered_pages[page_type]:
p.__del__()
del _registered_pages[page_type]
def scan_for_pages(**kwargs):
# assuming the wanted pages are the leaf classes of the interface
leaf_classes = []
_class_list = [Page]
while len(_class_list):
_class = _class_list.pop()
_class_subclasses = _class.__subclasses__()
if len(_class_subclasses) == 0:
if _class.REGISTER:
leaf_classes.append(_class)
else:
_class_list.extend(_class_subclasses)
for leaf_class in leaf_classes:
register_page(leaf_class, **kwargs)

View File

@@ -1,15 +1,19 @@
from __future__ import annotations
import logging import logging
import random import random
import re import re
from copy import copy from copy import copy
from pathlib import Path from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict, TYPE_CHECKING
from string import Formatter from string import Formatter
from dataclasses import dataclass, field from dataclasses import dataclass, field
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
if TYPE_CHECKING:
from ..download.page_attributes import DownloadOptions, FetchOptions
from ..connection import Connection from ..connection import Connection
from ..objects import ( from ..objects import (
Song, Song,
@@ -34,34 +38,25 @@ from ..utils import trace, output, BColors
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]] INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class Page: class Page:
REGISTER = True
SOURCE_TYPE: SourceType SOURCE_TYPE: SourceType
LOGGER: logging.Logger LOGGER: logging.Logger
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
cls.LOGGER = logging.getLogger(cls.__name__) cls.LOGGER = logging.getLogger(cls.__name__)
return super().__new__(cls) return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None, **kwargs):
self.SOURCE_TYPE.register_page(self) self.SOURCE_TYPE.register_page(self)
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def __del__(self):
self.SOURCE_TYPE.deregister_page()
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None): def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
""" """
Perform a regex search on the given string, using a single or a list of Perform a regex search on the given string, using a single or a list of

View File

@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
import pycountry import pycountry
from ..objects import Source, DatabaseObject from ..objects import Source, DatabaseObject
from .abstract import Page from ._abstract import Page
from ..objects import ( from ..objects import (
Artist, Artist,
Source, Source,

View File

@@ -6,7 +6,7 @@ from urllib.parse import urlparse, urlencode
from ..connection import Connection from ..connection import Connection
from ..utils.config import logging_settings from ..utils.config import logging_settings
from .abstract import Page from ._abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query from ..utils.support_classes.query import Query
@@ -207,6 +207,7 @@ def create_grid(
class EncyclopaediaMetallum(Page): class EncyclopaediaMetallum(Page):
REGISTER = False
SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"] LOGGER = logging_settings["metal_archives_logger"]

View File

@@ -0,0 +1,297 @@
from typing import List, Optional, Type
from urllib.parse import urlparse, urlunparse, urlencode
import json
from enum import Enum
from bs4 import BeautifulSoup
import pycountry
from ..objects import Source, DatabaseObject
from ._abstract import Page
from ..objects import (
Artist,
Source,
SourceType,
Song,
Album,
Label,
Target,
Contact,
ID3Timestamp,
Lyrics,
FormattedText,
Artwork,
)
from ..connection import Connection
from ..utils import dump_to_file, traverse_json_path
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings
from ..utils.shared import DEBUG
if DEBUG:
from ..utils import dump_to_file
class Genius(Page):
SOURCE_TYPE = ALL_SOURCE_TYPES.GENIUS
HOST = "genius.com"
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://genius.com/",
logger=self.LOGGER,
module="genius",
)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
path = source.parsed_url.path.replace("/", "")
if path.startswith("artists"):
return Artist
if path.startswith("albums"):
return Album
return Song
def add_to_artwork(self, artwork: Artwork, url: str):
if url is None:
return
url_frags = url.split(".")
if len(url_frags) < 2:
artwork.append(url=url)
return
dimensions = url_frags[-2].split("x")
if len(dimensions) < 2:
artwork.append(url=url)
return
if len(dimensions) == 3:
dimensions = dimensions[:-1]
try:
artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1]))
except ValueError:
artwork.append(url=url)
def parse_api_object(self, data: dict) -> Optional[DatabaseObject]:
if data is None:
return None
object_type = data.get("_type")
artwork = Artwork()
self.add_to_artwork(artwork, data.get("header_image_url"))
self.add_to_artwork(artwork, data.get("image_url"))
additional_sources: List[Source] = []
source: Source = Source(self.SOURCE_TYPE, data.get("url"), additional_data={
"id": data.get("id"),
"slug": data.get("slug"),
"api_path": data.get("api_path"),
})
notes = FormattedText()
description = data.get("description") or {}
if "html" in description:
notes.html = description["html"]
elif "markdown" in description:
notes.markdown = description["markdown"]
elif "description_preview" in data:
notes.plaintext = data["description_preview"]
if source.url is None:
return None
if object_type == "artist":
if data.get("instagram_name") is not None:
additional_sources.append(Source(ALL_SOURCE_TYPES.INSTAGRAM, f"https://www.instagram.com/{data['instagram_name']}/"))
if data.get("facebook_name") is not None:
additional_sources.append(Source(ALL_SOURCE_TYPES.FACEBOOK, f"https://www.facebook.com/{data['facebook_name']}/"))
if data.get("twitter_name") is not None:
additional_sources.append(Source(ALL_SOURCE_TYPES.TWITTER, f"https://x.com/{data['twitter_name']}/"))
return Artist(
name=data["name"].strip() if data.get("name") is not None else None,
source_list=[source],
artwork=artwork,
notes=notes,
)
if object_type == "album":
self.add_to_artwork(artwork, data.get("cover_art_thumbnail_url"))
self.add_to_artwork(artwork, data.get("cover_art_url"))
for cover_art in data.get("cover_arts", []):
self.add_to_artwork(artwork, cover_art.get("image_url"))
self.add_to_artwork(artwork, cover_art.get("thumbnail_image_url"))
return Album(
title=data.get("name").strip(),
source_list=[source],
artist_list=[self.parse_api_object(data.get("artist"))],
artwork=artwork,
date=ID3Timestamp(**data.get("release_date_components", {})),
)
if object_type == "song":
self.add_to_artwork(artwork, data.get("song_art_image_thumbnail_url"))
self.add_to_artwork(artwork, data.get("song_art_image_url"))
main_artist_list = []
featured_artist_list = []
_artist_name = None
primary_artist = self.parse_api_object(data.get("primary_artist"))
if primary_artist is not None:
_artist_name = primary_artist.name
main_artist_list.append(primary_artist)
for feature_artist in (*(data.get("featured_artists") or []), *(data.get("producer_artists") or []), *(data.get("writer_artists") or [])):
artist = self.parse_api_object(feature_artist)
if artist is not None:
featured_artist_list.append(artist)
return Song(
title=clean_song_title(data.get("title"), artist_name=_artist_name),
source_list=[source],
artwork=artwork,
feature_artist_list=featured_artist_list,
artist_list=main_artist_list,
)
return None
def general_search(self, search_query: str, **kwargs) -> List[DatabaseObject]:
results = []
search_params = {
"q": search_query,
}
r = self.connection.get("https://genius.com/api/search/multi?" + urlencode(search_params), name=f"search_{search_query}")
if r is None:
return results
dump_to_file("search_genius.json", r.text, is_json=True, exit_after_dump=False)
data = r.json()
for elements in traverse_json_path(data, "response.sections", default=[]):
hits = elements.get("hits", [])
for hit in hits:
parsed = self.parse_api_object(hit.get("result"))
if parsed is not None:
results.append(parsed)
return results
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist: Artist = Artist()
# https://genius.com/api/artists/24527/albums?page=1
r = self.connection.get(source.url, name=source.url)
if r is None:
return artist
soup = self.get_soup_from_response(r)
# find the content attribute in the meta tag which is contained in the head
data_container = soup.find("meta", {"itemprop": "page_data"})
if data_container is not None:
content = data_container["content"]
dump_to_file("genius_itemprop_artist.json", content, is_json=True, exit_after_dump=False)
data = json.loads(content)
artist = self.parse_api_object(data.get("artist"))
for e in (data.get("artist_albums") or []):
r = self.parse_api_object(e)
if not isinstance(r, Album):
continue
artist.album_collection.append(r)
for e in (data.get("artist_songs") or []):
r = self.parse_api_object(e)
if not isinstance(r, Song):
continue
"""
TODO
fetch the album for these songs, because the api doesn't
return them
"""
artist.album_collection.extend(r.album_collection)
artist.source_collection.append(source)
return artist
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album: Album = Album()
# https://genius.com/api/artists/24527/albums?page=1
r = self.connection.get(source.url, name=source.url)
if r is None:
return album
soup = self.get_soup_from_response(r)
# find the content attribute in the meta tag which is contained in the head
data_container = soup.find("meta", {"itemprop": "page_data"})
if data_container is not None:
content = data_container["content"]
dump_to_file("genius_itemprop_album.json", content, is_json=True, exit_after_dump=False)
data = json.loads(content)
album = self.parse_api_object(data.get("album"))
for e in data.get("album_appearances", []):
r = self.parse_api_object(e.get("song"))
if not isinstance(r, Song):
continue
album.song_collection.append(r)
album.source_collection.append(source)
return album
def get_json_content_from_response(self, response, start: str, end: str) -> Optional[str]:
content = response.text
start_index = content.find(start)
if start_index < 0:
return None
start_index += len(start)
end_index = content.find(end, start_index)
if end_index < 0:
return None
return content[start_index:end_index]
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song: Song = Song()
r = self.connection.get(source.url, name=source.url)
if r is None:
return song
# get the contents that are between `JSON.parse('` and `');`
content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n window.__APP_CONFIG__ = ")
if content is not None:
content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'")
data = json.loads(content)
lyrics_html = traverse_json_path(data, "songPage.lyricsData.body.html", default=None)
if lyrics_html is not None:
song.lyrics_collection.append(Lyrics(FormattedText(html=lyrics_html)))
dump_to_file("genius_song_script_json.json", content, is_json=True, exit_after_dump=False)
soup = self.get_soup_from_response(r)
for lyrics in soup.find_all("div", {"data-lyrics-container": "true"}):
lyrics_object = Lyrics(FormattedText(html=lyrics.prettify()))
song.lyrics_collection.append(lyrics_object)
song.source_collection.append(source)
return song

View File

@@ -8,7 +8,7 @@ import pycountry
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ..connection import Connection from ..connection import Connection
from .abstract import Page from ._abstract import Page
from ..utils.enums import SourceType, ALL_SOURCE_TYPES from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import ( from ..objects import (

View File

@@ -5,7 +5,7 @@ from enum import Enum
import python_sponsorblock import python_sponsorblock
from ..objects import Source, DatabaseObject, Song, Target from ..objects import Source, DatabaseObject, Song, Target
from .abstract import Page from ._abstract import Page
from ..objects import ( from ..objects import (
Artist, Artist,
Source, Source,
@@ -22,7 +22,7 @@ from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.download_result import DownloadResult
from ..utils.config import youtube_settings, main_settings, logging_settings from ..utils.config import youtube_settings, main_settings, logging_settings
from .youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType from ._youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType
""" """
@@ -38,7 +38,7 @@ def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: s
class YouTube(SuperYouTube): class YouTube(SuperYouTube):
# CHANGE REGISTER = youtube_settings["use_youtube_alongside_youtube_music"]
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View File

@@ -3,7 +3,6 @@ from enum import Enum
from ...utils.config import logging_settings from ...utils.config import logging_settings
from ...objects import Source, DatabaseObject from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import ( from ...objects import (
Artist, Artist,
Source, Source,

View File

@@ -6,7 +6,6 @@ from ...utils.string_processing import clean_song_title
from ...utils.enums import SourceType, ALL_SOURCE_TYPES from ...utils.enums import SourceType, ALL_SOURCE_TYPES
from ...objects import Source, DatabaseObject from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import ( from ...objects import (
Artist, Artist,
Source, Source,

View File

@@ -6,7 +6,7 @@ import requests
import python_sponsorblock import python_sponsorblock
from ...objects import Source, DatabaseObject, Song, Target from ...objects import Source, DatabaseObject, Song, Target
from ..abstract import Page from .._abstract import Page
from ...objects import ( from ...objects import (
Artist, Artist,
Source, Source,

View File

@@ -22,7 +22,7 @@ from ...utils import get_current_millis, traverse_json_path
from ...utils import dump_to_file from ...utils import dump_to_file
from ..abstract import Page from .._abstract import Page
from ...objects import ( from ...objects import (
DatabaseObject as DataObject, DatabaseObject as DataObject,
Source, Source,

View File

@@ -17,6 +17,9 @@ class SourceType:
def register_page(self, page: Page): def register_page(self, page: Page):
self.page = page self.page = page
def deregister_page(self):
self.page = None
def __hash__(self): def __hash__(self):
return hash(self.name) return hash(self.name)

View File

@@ -15,11 +15,11 @@ __stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True DEBUG = (__stage__ == "dev") and True
DEBUG_LOGGING = DEBUG and False DEBUG_LOGGING = DEBUG and False
DEBUG_TRACE = DEBUG and True DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and True DEBUG_OBJECT_TRACE = DEBUG and False
DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False DEBUG_PAGES = DEBUG and False
DEBUG_DUMP = DEBUG and False DEBUG_DUMP = DEBUG and True
DEBUG_PRINT_ID = DEBUG and True DEBUG_PRINT_ID = DEBUG and True
if DEBUG: if DEBUG: