feature/move_download_code_to_download #34

Merged
Hazel merged 18 commits from feature/move_download_code_to_download into experimental 2024-05-15 15:24:30 +00:00
12 changed files with 65 additions and 196 deletions
Showing only changes of commit bedd0fe819 - Show all commits

View File

@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Crystal F",
"d: 20",
"s: #a I'm in a coffin",
"0",
"d: 0",
]

View File

@ -317,7 +317,7 @@ class Downloader:
for database_object in data_objects:
r = self.pages.download(
music_object=database_object,
data_object=database_object,
genre=self.genre,
**kwargs
)

View File

@ -1,4 +1,5 @@
from dataclasses import dataclass, field
from typing import Set
from ..utils.config import main_settings
from ..utils.enums.album import AlbumType

View File

@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List
from collections import defaultdict
from pathlib import Path
import re
import logging
from . import FetchOptions, DownloadOptions
from .results import SearchResults
@ -17,6 +18,7 @@ from ..objects import (
Label,
)
from ..audio import write_metadata_to_target, correct_codec
from ..utils import output, BColors
from ..utils.string_processing import fit_to_file_system
from ..utils.config import youtube_settings, main_settings
from ..utils.path_manager import LOCATIONS
@ -69,6 +71,8 @@ if DEBUG_PAGES:
class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.LOGGER = logging.getLogger("download")
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
@ -118,7 +122,9 @@ class Pages:
return data_object
source: Source
for source in data_object.source_collection.get_sources():
for source in data_object.source_collection.get_sources(source_type_sorting={
"only_with_page": True,
}):
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
if new_data_object is not None:
data_object.merge(new_data_object)
@ -129,10 +135,15 @@ class Pages:
if not source.has_page:
return None
func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs)
source_type = source.page.get_source_type(source=source)
if source_type is None:
self.LOGGER.debug(f"Could not determine source type for {source}.")
return None
func = getattr(source.page, fetch_map[source_type])
# fetching the data object and marking it as fetched
data_object: DataObject = func(source=source)
data_object: DataObject = func(source=source, **kwargs)
data_object.mark_as_fetched(source.hash_url)
return data_object
@ -175,7 +186,7 @@ class Pages:
# download all children
download_result: DownloadResult = DownloadResult()
for c in data_object.get_children():
for c in data_object.get_child_collections():
for d in c:
if self._skip_object(d):
continue
@ -209,7 +220,7 @@ class Pages:
path_template = path_template.replace(f"{{{field}}}", naming[field][0])
return possible_parts
return path_template
def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
"""
@ -235,7 +246,7 @@ class Pages:
# removing duplicates from the naming, and process the strings
for key, value in naming.items():
# https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(items))
naming[key] = list(dict.fromkeys(value))
# manage the targets
tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
@ -248,14 +259,14 @@ class Pages:
)
))
for target in song.target_collection:
if target.exists():
if target.exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
r.found_on_disk += 1
if self.download_options.download_again_if_found:
target.copy_content(tmp)
else:
target.create_parent_directories()
target.create_path()
output(f'- {target.file_path}', color=BColors.GREY)
# this streams from every available source until something succeeds, setting the skip intervals to the values of the according source
@ -294,7 +305,7 @@ class Pages:
if used_source is not None:
used_source.page.post_process_hook(song=song, temp_target=tmp)
if not found_on_disc or self.download_options.process_metadata_if_found:
if not found_on_disk or self.download_options.process_metadata_if_found:
write_metadata_to_target(metadata=song.metadata, target=tmp, song=song)
tmp.delete()

View File

@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union
from dataclasses import dataclass
from ..objects import DatabaseObject
from ..utils.enums.source import SourceType
from ..pages import Page, EncyclopaediaMetallum, Musify

View File

@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
from pathlib import Path
import inspect
from .source import SourceCollection
from .metadata import Metadata
from ..utils import get_unix_time, object_trace, generate_id
from ..utils.config import logging_settings, main_settings

View File

@ -20,13 +20,11 @@ from dataclasses import dataclass, field
from functools import cached_property
from ..utils import generate_id
from ..utils.enums import SourceType
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url, shorten_display_url
from .metadata import Mapping, Metadata
from .parents import OuterProxy
from .collection import Collection
if TYPE_CHECKING:
from ..pages.abstract import Page
@ -54,38 +52,38 @@ class Source:
url = parsed_url.geturl()
if "musify" in parsed_url.netloc:
return cls(SourceType.MUSIFY, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page)
if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page)
if url.startswith("https://www.deezer"):
return cls(SourceType.DEEZER, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page)
if url.startswith("https://open.spotify.com"):
return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page)
if "bandcamp" in url:
return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page)
if "wikipedia" in parsed_url.netloc:
return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page)
if url.startswith("https://www.metal-archives.com/"):
return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
# the less important once
if url.startswith("https://www.facebook"):
return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page)
if url.startswith("https://www.instagram"):
return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page)
if url.startswith("https://twitter"):
return cls(SourceType.TWITTER, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page)
if url.startswith("https://myspace.com"):
return cls(SourceType.MYSPACE, url, referrer_page=referrer_page)
return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page)
@property
def has_page(self) -> bool:
@ -134,7 +132,7 @@ class SourceCollection:
_sources_by_type: Dict[SourceType, List[Source]]
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list = defaultdict(list)
self._sources_by_type = defaultdict(list)
self._indexed_sources = {}
self.extend(data or [])
@ -157,7 +155,7 @@ class SourceCollection:
Iterable[SourceType]: A list of source types.
"""
source_types: List[SourceType] = self._page_to_source_list.keys()
source_types: List[SourceType] = self._sources_by_type.keys()
if only_with_page:
source_types = filter(lambda st: st.has_page, source_types)
@ -186,7 +184,7 @@ class SourceCollection:
source_types = self.source_types(**source_type_sorting)
for source_type in source_types:
yield from self._page_to_source_list[source_type]
yield from self._sources_by_type[source_type]
def append(self, source: Source):
if source is None:
@ -202,7 +200,7 @@ class SourceCollection:
existing_source.__merge__(source)
source = existing_source
else:
self._page_to_source_list[source.source_type].append(source)
self._sources_by_type[source.source_type].append(source)
changed = False
for key in source.indexing_values:

View File

@ -49,15 +49,16 @@ class DownloadOptions:
class Page:
SOURCE_TYPE: SourceType
LOGGER: LOGGER
LOGGER: logging.Logger
def __new__(cls, *args, **kwargs):
cls.SOURCE_TYPE.register_page(cls)
cls.LOGGER = logging.getLogger(cls.__name__)
return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.SOURCE_TYPE.register_page(self)
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
@ -145,151 +146,7 @@ class Page:
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
def download(
self,
music_object: DatabaseObject,
genre: str,
) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
def fill_naming_objects(naming_music_object: DatabaseObject):
nonlocal naming_dict
for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
collection: Collection = getattr(naming_music_object, collection_name)
if collection.empty:
continue
dom_ordered_music_object: DatabaseObject = collection[0]
naming_dict.add_object(dom_ordered_music_object)
return fill_naming_objects(dom_ordered_music_object)
fill_naming_objects(music_object)
return self._download(music_object, naming_dict)
def _download(
self,
music_object: DatabaseObject,
naming_dict: NamingDict,
**kwargs
) -> DownloadResult:
if isinstance(music_object, Song):
output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
else:
output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
return DownloadResult()
if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
self.fetch_details(music_object=music_object, stop_at_level=1)
if isinstance(music_object, Album):
music_object.update_tracksort()
naming_dict.add_object(music_object)
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict)
download_result: DownloadResult = DownloadResult()
for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection: Collection = getattr(music_object, collection_name)
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict):
song.compile()
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
if song.genre is None:
song.genre = naming_dict["genre"]
path_parts = Formatter().parse(main_settings["download_path"])
file_parts = Formatter().parse(main_settings["download_file"])
new_target = Target(
relative_to_music_dir=True,
file_path=Path(
main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
)
)
if song.target_collection.empty:
song.target_collection.append(new_target)
r = DownloadResult(1)
temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
found_on_disc = False
target: Target
for target in song.target_collection:
current_exists = target.exists
if current_exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
else:
output(f'- {target.file_path}', color=BColors.GREY)
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
skip_intervals = []
if not found_on_disc:
for source in sources:
r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
if not r.is_fatal_error:
skip_intervals = self.get_skip_intervals(song, source)
break
if temp_target.exists:
r.merge(self._post_process_targets(
song=song,
temp_target=temp_target,
skip_intervals=skip_intervals,
found_on_disc=found_on_disc,
))
return r
def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult:
if not found_on_disc or self.download_options.process_audio_if_found:
correct_codec(temp_target, skip_intervals=skip_intervals)
self.post_process_hook(song, temp_target)
if not found_on_disc or self.download_options.process_metadata_if_found:
write_metadata_to_target(song.metadata, temp_target, song)
r = DownloadResult()
target: Target
for target in song.target_collection:
if temp_target is not target:
temp_target.copy_content(target)
r.add_target(target)
temp_target.delete()
r.sponsor_segments += len(skip_intervals)
return r
# to download stuff
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
return []

View File

@ -62,8 +62,7 @@ class Bandcamp(Page):
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
parsed_url = urlparse(source.url)
path = parsed_url.path.replace("/", "")
path = source.parsed_url.path.replace("/", "")
if path == "" or path.startswith("music"):
return Artist

View File

@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
from ..connection import Connection
from ..utils.config import logging_settings
from .abstract import Page
from ..utils.enums.source import SourceType
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query
from ..objects import (
@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id)
Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id)
]
)
@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
return Artist(
name=artist_name,
source_list=[
Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url)
Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
title=album_name,
album_type=album_type,
source_list=[
Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url)
Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
_artist_from_json(artist_html=artist_html)
@ -207,7 +207,7 @@ def create_grid(
class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM
SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"]
def __init__(self, **kwargs):

View File

@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path
from ...utils import dump_to_file
from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork
from ..abstract import Page
from ...objects import (
Artist,
DatabaseObject as DataObject,
Source,
SourceType,
FormattedText,
ID3Timestamp,
Artwork,
Artist,
Song,
Album,
Label,
Target,
Lyrics,
FormattedText
)
from ...connection import Connection
from ...utils.enums import SourceType, ALL_SOURCE_TYPES
from ...utils.enums.album import AlbumType
from ...utils.support_classes.download_result import DownloadResult
@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = {
class YoutubeMusic(SuperYouTube):
# CHANGE
SOURCE_TYPE = SourceType.YOUTUBE_MUSIC
LOGGER = logging_settings["youtube_music_logger"]
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
def __init__(self, *args, ydl_opts: dict = None, **kwargs):
self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube):
default='{}'
)) or {}
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
def get_source_type(self, source: Source) -> Optional[Type[DataObject]]:
return super().get_source_type(source)
def general_search(self, search_query: str) -> List[DatabaseObject]:
def general_search(self, search_query: str) -> List[DataObject]:
search_query = search_query.strip()
urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))

View File

@ -14,10 +14,11 @@ class SourceType:
page_type: Type[Page] = None
page: Page = None
def register_page(self, page: Page):
self.page = page
def register_page(self, page_type: Type[Page]):
self.page_type = page
self.page = page_type()
def __hash__(self):
return hash(self.name)
@property
def has_page(self) -> bool: