fix: runtime errors
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-05-15 13:16:11 +02:00
parent ac6c513d56
commit bedd0fe819
12 changed files with 65 additions and 196 deletions

View File

@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__": if __name__ == "__main__":
commands = [ commands = [
"s: #a Crystal F", "s: #a I'm in a coffin",
"d: 20", "0",
"d: 0",
] ]

View File

@ -317,7 +317,7 @@ class Downloader:
for database_object in data_objects: for database_object in data_objects:
r = self.pages.download( r = self.pages.download(
music_object=database_object, data_object=database_object,
genre=self.genre, genre=self.genre,
**kwargs **kwargs
) )

View File

@ -1,4 +1,5 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Set
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.enums.album import AlbumType from ..utils.enums.album import AlbumType

View File

@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
import re import re
import logging
from . import FetchOptions, DownloadOptions from . import FetchOptions, DownloadOptions
from .results import SearchResults from .results import SearchResults
@ -17,6 +18,7 @@ from ..objects import (
Label, Label,
) )
from ..audio import write_metadata_to_target, correct_codec from ..audio import write_metadata_to_target, correct_codec
from ..utils import output, BColors
from ..utils.string_processing import fit_to_file_system from ..utils.string_processing import fit_to_file_system
from ..utils.config import youtube_settings, main_settings from ..utils.config import youtube_settings, main_settings
from ..utils.path_manager import LOCATIONS from ..utils.path_manager import LOCATIONS
@ -69,6 +71,8 @@ if DEBUG_PAGES:
class Pages: class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.LOGGER = logging.getLogger("download")
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
@ -118,7 +122,9 @@ class Pages:
return data_object return data_object
source: Source source: Source
for source in data_object.source_collection.get_sources(): for source in data_object.source_collection.get_sources(source_type_sorting={
"only_with_page": True,
}):
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level) new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
if new_data_object is not None: if new_data_object is not None:
data_object.merge(new_data_object) data_object.merge(new_data_object)
@ -129,10 +135,15 @@ class Pages:
if not source.has_page: if not source.has_page:
return None return None
func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs) source_type = source.page.get_source_type(source=source)
if source_type is None:
self.LOGGER.debug(f"Could not determine source type for {source}.")
return None
func = getattr(source.page, fetch_map[source_type])
# fetching the data object and marking it as fetched # fetching the data object and marking it as fetched
data_object: DataObject = func(source=source) data_object: DataObject = func(source=source, **kwargs)
data_object.mark_as_fetched(source.hash_url) data_object.mark_as_fetched(source.hash_url)
return data_object return data_object
@ -175,7 +186,7 @@ class Pages:
# download all children # download all children
download_result: DownloadResult = DownloadResult() download_result: DownloadResult = DownloadResult()
for c in data_object.get_children(): for c in data_object.get_child_collections():
for d in c: for d in c:
if self._skip_object(d): if self._skip_object(d):
continue continue
@ -209,7 +220,7 @@ class Pages:
path_template = path_template.replace(f"{{{field}}}", naming[field][0]) path_template = path_template.replace(f"{{{field}}}", naming[field][0])
return possible_parts return path_template
def _download_song(self, song: Song, naming: dict) -> DownloadOptions: def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
""" """
@ -235,7 +246,7 @@ class Pages:
# removing duplicates from the naming, and process the strings # removing duplicates from the naming, and process the strings
for key, value in naming.items(): for key, value in naming.items():
# https://stackoverflow.com/a/17016257 # https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(items)) naming[key] = list(dict.fromkeys(value))
# manage the targets # manage the targets
tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
@ -248,14 +259,14 @@ class Pages:
) )
)) ))
for target in song.target_collection: for target in song.target_collection:
if target.exists(): if target.exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
r.found_on_disk += 1 r.found_on_disk += 1
if self.download_options.download_again_if_found: if self.download_options.download_again_if_found:
target.copy_content(tmp) target.copy_content(tmp)
else: else:
target.create_parent_directories() target.create_path()
output(f'- {target.file_path}', color=BColors.GREY) output(f'- {target.file_path}', color=BColors.GREY)
# this streams from every available source until something succeeds, setting the skip intervals to the values of the according source # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source
@ -294,7 +305,7 @@ class Pages:
if used_source is not None: if used_source is not None:
used_source.page.post_process_hook(song=song, temp_target=tmp) used_source.page.post_process_hook(song=song, temp_target=tmp)
if not found_on_disc or self.download_options.process_metadata_if_found: if not found_on_disk or self.download_options.process_metadata_if_found:
write_metadata_to_target(metadata=song.metadata, target=tmp, song=song) write_metadata_to_target(metadata=song.metadata, target=tmp, song=song)
tmp.delete() tmp.delete()

View File

@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union
from dataclasses import dataclass from dataclasses import dataclass
from ..objects import DatabaseObject from ..objects import DatabaseObject
from ..utils.enums.source import SourceType
from ..pages import Page, EncyclopaediaMetallum, Musify from ..pages import Page, EncyclopaediaMetallum, Musify

View File

@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
from pathlib import Path from pathlib import Path
import inspect import inspect
from .source import SourceCollection
from .metadata import Metadata from .metadata import Metadata
from ..utils import get_unix_time, object_trace, generate_id from ..utils import get_unix_time, object_trace, generate_id
from ..utils.config import logging_settings, main_settings from ..utils.config import logging_settings, main_settings

View File

@ -20,13 +20,11 @@ from dataclasses import dataclass, field
from functools import cached_property from functools import cached_property
from ..utils import generate_id from ..utils import generate_id
from ..utils.enums import SourceType from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.config import youtube_settings from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url, shorten_display_url from ..utils.string_processing import hash_url, shorten_display_url
from .metadata import Mapping, Metadata from .metadata import Mapping, Metadata
from .parents import OuterProxy
from .collection import Collection
if TYPE_CHECKING: if TYPE_CHECKING:
from ..pages.abstract import Page from ..pages.abstract import Page
@ -54,38 +52,38 @@ class Source:
url = parsed_url.geturl() url = parsed_url.geturl()
if "musify" in parsed_url.netloc: if "musify" in parsed_url.netloc:
return cls(SourceType.MUSIFY, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page)
if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page)
if url.startswith("https://www.deezer"): if url.startswith("https://www.deezer"):
return cls(SourceType.DEEZER, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page)
if url.startswith("https://open.spotify.com"): if url.startswith("https://open.spotify.com"):
return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page)
if "bandcamp" in url: if "bandcamp" in url:
return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page)
if "wikipedia" in parsed_url.netloc: if "wikipedia" in parsed_url.netloc:
return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page)
if url.startswith("https://www.metal-archives.com/"): if url.startswith("https://www.metal-archives.com/"):
return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
# the less important once # the less important once
if url.startswith("https://www.facebook"): if url.startswith("https://www.facebook"):
return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page)
if url.startswith("https://www.instagram"): if url.startswith("https://www.instagram"):
return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page)
if url.startswith("https://twitter"): if url.startswith("https://twitter"):
return cls(SourceType.TWITTER, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page)
if url.startswith("https://myspace.com"): if url.startswith("https://myspace.com"):
return cls(SourceType.MYSPACE, url, referrer_page=referrer_page) return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page)
@property @property
def has_page(self) -> bool: def has_page(self) -> bool:
@ -134,7 +132,7 @@ class SourceCollection:
_sources_by_type: Dict[SourceType, List[Source]] _sources_by_type: Dict[SourceType, List[Source]]
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list = defaultdict(list) self._sources_by_type = defaultdict(list)
self._indexed_sources = {} self._indexed_sources = {}
self.extend(data or []) self.extend(data or [])
@ -157,7 +155,7 @@ class SourceCollection:
Iterable[SourceType]: A list of source types. Iterable[SourceType]: A list of source types.
""" """
source_types: List[SourceType] = self._page_to_source_list.keys() source_types: List[SourceType] = self._sources_by_type.keys()
if only_with_page: if only_with_page:
source_types = filter(lambda st: st.has_page, source_types) source_types = filter(lambda st: st.has_page, source_types)
@ -186,7 +184,7 @@ class SourceCollection:
source_types = self.source_types(**source_type_sorting) source_types = self.source_types(**source_type_sorting)
for source_type in source_types: for source_type in source_types:
yield from self._page_to_source_list[source_type] yield from self._sources_by_type[source_type]
def append(self, source: Source): def append(self, source: Source):
if source is None: if source is None:
@ -202,7 +200,7 @@ class SourceCollection:
existing_source.__merge__(source) existing_source.__merge__(source)
source = existing_source source = existing_source
else: else:
self._page_to_source_list[source.source_type].append(source) self._sources_by_type[source.source_type].append(source)
changed = False changed = False
for key in source.indexing_values: for key in source.indexing_values:

View File

@ -49,15 +49,16 @@ class DownloadOptions:
class Page: class Page:
SOURCE_TYPE: SourceType SOURCE_TYPE: SourceType
LOGGER: LOGGER LOGGER: logging.Logger
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
cls.SOURCE_TYPE.register_page(cls)
cls.LOGGER = logging.getLogger(cls.__name__) cls.LOGGER = logging.getLogger(cls.__name__)
return super().__new__(cls) return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None): def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.SOURCE_TYPE.register_page(self)
self.download_options: DownloadOptions = download_options or DownloadOptions() self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions() self.fetch_options: FetchOptions = fetch_options or FetchOptions()
@ -145,151 +146,7 @@ class Page:
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label() return Label()
def download( # to download stuff
self,
music_object: DatabaseObject,
genre: str,
) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
def fill_naming_objects(naming_music_object: DatabaseObject):
nonlocal naming_dict
for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
collection: Collection = getattr(naming_music_object, collection_name)
if collection.empty:
continue
dom_ordered_music_object: DatabaseObject = collection[0]
naming_dict.add_object(dom_ordered_music_object)
return fill_naming_objects(dom_ordered_music_object)
fill_naming_objects(music_object)
return self._download(music_object, naming_dict)
def _download(
self,
music_object: DatabaseObject,
naming_dict: NamingDict,
**kwargs
) -> DownloadResult:
if isinstance(music_object, Song):
output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
else:
output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
return DownloadResult()
if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
self.fetch_details(music_object=music_object, stop_at_level=1)
if isinstance(music_object, Album):
music_object.update_tracksort()
naming_dict.add_object(music_object)
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict)
download_result: DownloadResult = DownloadResult()
for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection: Collection = getattr(music_object, collection_name)
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict):
song.compile()
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
if song.genre is None:
song.genre = naming_dict["genre"]
path_parts = Formatter().parse(main_settings["download_path"])
file_parts = Formatter().parse(main_settings["download_file"])
new_target = Target(
relative_to_music_dir=True,
file_path=Path(
main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
)
)
if song.target_collection.empty:
song.target_collection.append(new_target)
r = DownloadResult(1)
temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
found_on_disc = False
target: Target
for target in song.target_collection:
current_exists = target.exists
if current_exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
else:
output(f'- {target.file_path}', color=BColors.GREY)
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
skip_intervals = []
if not found_on_disc:
for source in sources:
r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
if not r.is_fatal_error:
skip_intervals = self.get_skip_intervals(song, source)
break
if temp_target.exists:
r.merge(self._post_process_targets(
song=song,
temp_target=temp_target,
skip_intervals=skip_intervals,
found_on_disc=found_on_disc,
))
return r
def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult:
if not found_on_disc or self.download_options.process_audio_if_found:
correct_codec(temp_target, skip_intervals=skip_intervals)
self.post_process_hook(song, temp_target)
if not found_on_disc or self.download_options.process_metadata_if_found:
write_metadata_to_target(song.metadata, temp_target, song)
r = DownloadResult()
target: Target
for target in song.target_collection:
if temp_target is not target:
temp_target.copy_content(target)
r.add_target(target)
temp_target.delete()
r.sponsor_segments += len(skip_intervals)
return r
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
return [] return []

View File

@ -62,8 +62,7 @@ class Bandcamp(Page):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
parsed_url = urlparse(source.url) path = source.parsed_url.path.replace("/", "")
path = parsed_url.path.replace("/", "")
if path == "" or path.startswith("music"): if path == "" or path.startswith("music"):
return Artist return Artist

View File

@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
from ..connection import Connection from ..connection import Connection
from ..utils.config import logging_settings from ..utils.config import logging_settings
from .abstract import Page from .abstract import Page
from ..utils.enums.source import SourceType from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query from ..utils.support_classes.query import Query
from ..objects import ( from ..objects import (
@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html) _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
], ],
source_list=[ source_list=[
Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id) Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id)
] ]
) )
@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
return Artist( return Artist(
name=artist_name, name=artist_name,
source_list=[ source_list=[
Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url) Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url)
] ]
) )
@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
title=album_name, title=album_name,
album_type=album_type, album_type=album_type,
source_list=[ source_list=[
Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url) Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url)
], ],
artist_list=[ artist_list=[
_artist_from_json(artist_html=artist_html) _artist_from_json(artist_html=artist_html)
@ -207,7 +207,7 @@ def create_grid(
class EncyclopaediaMetallum(Page): class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"] LOGGER = logging_settings["metal_archives_logger"]
def __init__(self, **kwargs): def __init__(self, **kwargs):

View File

@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path
from ...utils import dump_to_file from ...utils import dump_to_file
from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork
from ..abstract import Page from ..abstract import Page
from ...objects import ( from ...objects import (
Artist, DatabaseObject as DataObject,
Source, Source,
SourceType, FormattedText,
ID3Timestamp,
Artwork,
Artist,
Song, Song,
Album, Album,
Label, Label,
Target, Target,
Lyrics, Lyrics,
FormattedText
) )
from ...connection import Connection from ...connection import Connection
from ...utils.enums import SourceType, ALL_SOURCE_TYPES
from ...utils.enums.album import AlbumType from ...utils.enums.album import AlbumType
from ...utils.support_classes.download_result import DownloadResult from ...utils.support_classes.download_result import DownloadResult
@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = {
class YoutubeMusic(SuperYouTube): class YoutubeMusic(SuperYouTube):
# CHANGE # CHANGE
SOURCE_TYPE = SourceType.YOUTUBE_MUSIC SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
LOGGER = logging_settings["youtube_music_logger"]
def __init__(self, *args, ydl_opts: dict = None, **kwargs): def __init__(self, *args, ydl_opts: dict = None, **kwargs):
self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection( self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube):
default='{}' default='{}'
)) or {} )) or {}
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: def get_source_type(self, source: Source) -> Optional[Type[DataObject]]:
return super().get_source_type(source) return super().get_source_type(source)
def general_search(self, search_query: str) -> List[DatabaseObject]: def general_search(self, search_query: str) -> List[DataObject]:
search_query = search_query.strip() search_query = search_query.strip()
urlescaped_query: str = quote(search_query.strip().replace(" ", "+")) urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))

View File

@ -14,10 +14,11 @@ class SourceType:
page_type: Type[Page] = None page_type: Type[Page] = None
page: Page = None page: Page = None
def register_page(self, page: Page):
self.page = page
def register_page(self, page_type: Type[Page]): def __hash__(self):
self.page_type = page return hash(self.name)
self.page = page_type()
@property @property
def has_page(self) -> bool: def has_page(self) -> bool: