Compare commits
12 Commits
da8887b279
...
feature/mo
| Author | SHA1 | Date | |
|---|---|---|---|
| 80ad2727de | |||
| 19b83ce880 | |||
| 1bf04439f0 | |||
| bab6aeb45d | |||
| 98afe5047d | |||
| 017752c4d0 | |||
| ea4c73158e | |||
| 0096dfe5cb | |||
| bedd0fe819 | |||
| ac6c513d56 | |||
| cc14253239 | |||
| 14f986a497 |
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@@ -25,6 +25,7 @@
|
||||
"encyclopaedia",
|
||||
"ENDC",
|
||||
"Gitea",
|
||||
"iframe",
|
||||
"isrc",
|
||||
"levenshtein",
|
||||
"metallum",
|
||||
|
||||
@@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
if __name__ == "__main__":
|
||||
commands = [
|
||||
"s: #a Crystal F",
|
||||
"d: 20",
|
||||
"s: #a I'm in a coffin",
|
||||
"0",
|
||||
"d: 0",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -10,12 +10,12 @@ from ..objects import Target
|
||||
LOGGER = logging_settings["codex_logger"]
|
||||
|
||||
|
||||
def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], interval_list: List[Tuple[float, float]] = None):
|
||||
def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], skip_intervals: List[Tuple[float, float]] = None):
|
||||
if not target.exists:
|
||||
LOGGER.warning(f"Target doesn't exist: {target.file_path}")
|
||||
return
|
||||
|
||||
interval_list = interval_list or []
|
||||
skip_intervals = skip_intervals or []
|
||||
|
||||
bitrate_b = int(bitrate_kb / 1024)
|
||||
|
||||
@@ -29,7 +29,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au
|
||||
|
||||
start = 0
|
||||
next_start = 0
|
||||
for end, next_start in interval_list:
|
||||
for end, next_start in skip_intervals:
|
||||
aselect_list.append(f"between(t,{start},{end})")
|
||||
start = next_start
|
||||
aselect_list.append(f"gte(t,{next_start})")
|
||||
|
||||
@@ -178,8 +178,6 @@ class Downloader:
|
||||
page_count = 0
|
||||
for option in self.current_results.formatted_generator():
|
||||
if isinstance(option, Option):
|
||||
_downloadable = self.pages.is_downloadable(option.music_object)
|
||||
|
||||
r = f"{BColors.GREY.value}{option.index:0{self.option_digits}}{BColors.ENDC.value} {option.music_object.option_string}"
|
||||
print(r)
|
||||
else:
|
||||
@@ -319,7 +317,7 @@ class Downloader:
|
||||
|
||||
for database_object in data_objects:
|
||||
r = self.pages.download(
|
||||
music_object=database_object,
|
||||
data_object=database_object,
|
||||
genre=self.genre,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@@ -317,7 +317,7 @@ class Connection:
|
||||
name = kwargs.pop("description")
|
||||
|
||||
if progress > 0:
|
||||
headers = dict() if headers is None else headers
|
||||
headers = kwargs.get("headers", dict())
|
||||
headers["Range"] = f"bytes={target.size}-"
|
||||
|
||||
r = self.request(
|
||||
@@ -366,6 +366,7 @@ class Connection:
|
||||
if retry:
|
||||
self.LOGGER.warning(f"Retrying stream...")
|
||||
accepted_response_codes.add(206)
|
||||
stream_kwargs["progress"] = progress
|
||||
return Connection.stream_into(**stream_kwargs)
|
||||
|
||||
return DownloadResult()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Set
|
||||
|
||||
from ..utils.config import main_settings
|
||||
from ..utils.enums.album import AlbumType
|
||||
|
||||
@@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import re
|
||||
import logging
|
||||
|
||||
from . import FetchOptions, DownloadOptions
|
||||
from .results import SearchResults
|
||||
@@ -16,10 +17,12 @@ from ..objects import (
|
||||
Artist,
|
||||
Label,
|
||||
)
|
||||
from ..audio import write_metadata_to_target, correct_codec
|
||||
from ..utils import output, BColors
|
||||
from ..utils.string_processing import fit_to_file_system
|
||||
from ..utils.config import youtube_settings, main_settings
|
||||
from ..utils.path_manager import LOCATIONS
|
||||
from ..utils.enums import SourceType
|
||||
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
|
||||
from ..utils.support_classes.download_result import DownloadResult
|
||||
from ..utils.support_classes.query import Query
|
||||
from ..utils.support_classes.download_result import DownloadResult
|
||||
@@ -68,6 +71,8 @@ if DEBUG_PAGES:
|
||||
|
||||
class Pages:
|
||||
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
|
||||
self.LOGGER = logging.getLogger("download")
|
||||
|
||||
self.download_options: DownloadOptions = download_options or DownloadOptions()
|
||||
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
|
||||
|
||||
@@ -117,7 +122,9 @@ class Pages:
|
||||
return data_object
|
||||
|
||||
source: Source
|
||||
for source in data_object.source_collection.get_sources():
|
||||
for source in data_object.source_collection.get_sources(source_type_sorting={
|
||||
"only_with_page": True,
|
||||
}):
|
||||
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
|
||||
if new_data_object is not None:
|
||||
data_object.merge(new_data_object)
|
||||
@@ -125,36 +132,27 @@ class Pages:
|
||||
return data_object
|
||||
|
||||
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
|
||||
page: Page = self._get_page_from_enum(source.source_type)
|
||||
if page is None:
|
||||
if not source.has_page:
|
||||
return None
|
||||
|
||||
# getting the appropriate function for the page and the object type
|
||||
source_type = page.get_source_type(source)
|
||||
if not hasattr(page, fetch_map[source_type]):
|
||||
source_type = source.page.get_source_type(source=source)
|
||||
if source_type is None:
|
||||
self.LOGGER.debug(f"Could not determine source type for {source}.")
|
||||
return None
|
||||
func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
|
||||
|
||||
func = getattr(source.page, fetch_map[source_type])
|
||||
|
||||
# fetching the data object and marking it as fetched
|
||||
data_object: DataObject = func(source=source)
|
||||
data_object: DataObject = func(source=source, **kwargs)
|
||||
data_object.mark_as_fetched(source.hash_url)
|
||||
return data_object
|
||||
|
||||
def fetch_from_url(self, url: str) -> Optional[DataObject]:
|
||||
source = Source.match_url(url, SourceType.MANUAL)
|
||||
source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
|
||||
if source is None:
|
||||
return None
|
||||
|
||||
return self.fetch_from_source(source=source)
|
||||
|
||||
def is_downloadable(self, music_object: DataObject) -> bool:
|
||||
_page_types = set(self._source_to_page)
|
||||
for src in music_object.source_collection.source_pages:
|
||||
if src in self._source_to_page:
|
||||
_page_types.add(self._source_to_page[src])
|
||||
|
||||
audio_pages = self._audio_pages_set.intersection(_page_types)
|
||||
return len(audio_pages) > 0
|
||||
|
||||
def _skip_object(self, data_object: DataObject) -> bool:
|
||||
if isinstance(data_object, Album):
|
||||
@@ -166,6 +164,7 @@ class Pages:
|
||||
def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult:
|
||||
# fetch the given object
|
||||
self.fetch_details(data_object)
|
||||
output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD)
|
||||
|
||||
# fetching all parent objects (e.g. if you only download a song)
|
||||
if not kwargs.get("fetched_upwards", False):
|
||||
@@ -188,7 +187,7 @@ class Pages:
|
||||
|
||||
# download all children
|
||||
download_result: DownloadResult = DownloadResult()
|
||||
for c in data_object.get_children():
|
||||
for c in data_object.get_child_collections():
|
||||
for d in c:
|
||||
if self._skip_object(d):
|
||||
continue
|
||||
@@ -205,7 +204,7 @@ class Pages:
|
||||
|
||||
self._download_song(data_object, naming={
|
||||
"genre": [genre],
|
||||
"audio_format": main_settings["audio_format"],
|
||||
"audio_format": [main_settings["audio_format"]],
|
||||
})
|
||||
|
||||
return download_result
|
||||
@@ -222,12 +221,7 @@ class Pages:
|
||||
|
||||
path_template = path_template.replace(f"{{{field}}}", naming[field][0])
|
||||
|
||||
return possible_parts
|
||||
|
||||
def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]:
|
||||
pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()]
|
||||
pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True)
|
||||
return list(pages)
|
||||
return path_template
|
||||
|
||||
def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
|
||||
"""
|
||||
@@ -242,7 +236,6 @@ class Pages:
|
||||
# manage the naming
|
||||
naming: Dict[str, List[str]] = defaultdict(list, naming)
|
||||
naming["song"].append(song.title_string)
|
||||
naming["genre"].append(song.genre)
|
||||
naming["isrc"].append(song.isrc)
|
||||
naming["album"].extend(a.title_string for a in song.album_collection)
|
||||
naming["album_type"].extend(a.album_type.value for a in song.album_collection)
|
||||
@@ -253,11 +246,11 @@ class Pages:
|
||||
# removing duplicates from the naming, and process the strings
|
||||
for key, value in naming.items():
|
||||
# https://stackoverflow.com/a/17016257
|
||||
naming[key] = list(dict.fromkeys(items))
|
||||
naming[key] = list(dict.fromkeys(value))
|
||||
song.genre = naming["genre"][0]
|
||||
|
||||
# manage the targets
|
||||
tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
|
||||
found_on_disc = False
|
||||
|
||||
song.target_collection.append(Target(
|
||||
relative_to_music_dir=True,
|
||||
@@ -267,24 +260,64 @@ class Pages:
|
||||
)
|
||||
))
|
||||
for target in song.target_collection:
|
||||
if target.exists():
|
||||
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
|
||||
|
||||
found_on_disc = True
|
||||
if target.exists:
|
||||
output(f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
|
||||
r.found_on_disk += 1
|
||||
target.copy_content(tmp)
|
||||
|
||||
if not self.download_options.download_again_if_found:
|
||||
target.copy_content(tmp)
|
||||
else:
|
||||
target.create_parent_directories()
|
||||
output(f'- {target.file_path}', color=BColors.GREY)
|
||||
target.create_path()
|
||||
output(f'{target.file_path}', color=BColors.GREY)
|
||||
|
||||
# actually download
|
||||
for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"):
|
||||
r = page.download_song_to_target(song, tmp, r)
|
||||
# this streams from every available source until something succeeds, setting the skip intervals to the values of the according source
|
||||
used_source: Optional[Source] = None
|
||||
skip_intervals: List[Tuple[float, float]] = []
|
||||
for source in song.source_collection.get_sources(source_type_sorting={
|
||||
"only_with_page": True,
|
||||
"sort_key": lambda page: page.download_priority,
|
||||
"reverse": True,
|
||||
}):
|
||||
if tmp.exists:
|
||||
break
|
||||
|
||||
used_source = source
|
||||
streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download")
|
||||
skip_intervals = source.page.get_skip_intervals(song=song, source=source)
|
||||
|
||||
# if something has been downloaded but it somehow failed, delete the file
|
||||
if streaming_results.is_fatal_error and tmp.exists:
|
||||
tmp.delete()
|
||||
|
||||
# if everything went right, the file should exist now
|
||||
if not tmp.exists:
|
||||
if used_source is None:
|
||||
r.error_message = f"No source found for {song.option_string}."
|
||||
else:
|
||||
r.error_message = f"Something went wrong downloading {song.option_string}."
|
||||
return r
|
||||
|
||||
# post process the audio
|
||||
found_on_disk = used_source is None
|
||||
if not found_on_disk or self.download_options.process_audio_if_found:
|
||||
correct_codec(target=tmp, skip_intervals=skip_intervals)
|
||||
r.sponsor_segments = len(skip_intervals)
|
||||
|
||||
if used_source is not None:
|
||||
used_source.page.post_process_hook(song=song, temp_target=tmp)
|
||||
|
||||
if not found_on_disk or self.download_options.process_metadata_if_found:
|
||||
write_metadata_to_target(metadata=song.metadata, target=tmp, song=song)
|
||||
|
||||
# copy the tmp target to the final locations
|
||||
for target in song.target_collection:
|
||||
tmp.copy_content(target)
|
||||
|
||||
tmp.delete()
|
||||
return r
|
||||
|
||||
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
|
||||
source = Source.match_url(url, SourceType.MANUAL)
|
||||
source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
|
||||
|
||||
if source is None:
|
||||
raise UrlNotFoundException(url=url)
|
||||
|
||||
@@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..objects import DatabaseObject
|
||||
from ..utils.enums.source import SourceType
|
||||
from ..pages import Page, EncyclopaediaMetallum, Musify
|
||||
|
||||
|
||||
|
||||
@@ -38,8 +38,13 @@ class FormattedText:
|
||||
def markdown(self) -> str:
|
||||
return md(self.html).strip()
|
||||
|
||||
@property
|
||||
def plain(self) -> str:
|
||||
md = self.markdown
|
||||
return md.replace("\n\n", "\n")
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.markdown
|
||||
|
||||
plaintext = markdown
|
||||
plaintext = plain
|
||||
|
||||
|
||||
@@ -34,6 +34,6 @@ class Lyrics(OuterProxy):
|
||||
@property
|
||||
def metadata(self) -> Metadata:
|
||||
return Metadata({
|
||||
id3Mapping.UNSYNCED_LYRICS: [self.text.markdown]
|
||||
id3Mapping.UNSYNCED_LYRICS: [self.text.plaintext]
|
||||
})
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
|
||||
from pathlib import Path
|
||||
import inspect
|
||||
|
||||
from .source import SourceCollection
|
||||
from .metadata import Metadata
|
||||
from ..utils import get_unix_time, object_trace, generate_id
|
||||
from ..utils.config import logging_settings, main_settings
|
||||
|
||||
@@ -155,9 +155,6 @@ class Song(Base):
|
||||
self.main_artist_collection.extend_object_to_attribute = {
|
||||
"main_album_collection": self.album_collection
|
||||
}
|
||||
self.feature_artist_collection.append_object_to_attribute = {
|
||||
"feature_song_collection": self
|
||||
}
|
||||
|
||||
self.feature_artist_collection.push_to = [self.main_artist_collection]
|
||||
self.main_artist_collection.pull_from = [self.feature_artist_collection]
|
||||
@@ -464,7 +461,6 @@ class Artist(Base):
|
||||
source_collection: SourceCollection
|
||||
contact_collection: Collection[Contact]
|
||||
|
||||
feature_song_collection: Collection[Song]
|
||||
main_album_collection: Collection[Album]
|
||||
label_collection: Collection[Label]
|
||||
|
||||
@@ -479,7 +475,6 @@ class Artist(Base):
|
||||
"general_genre": lambda: "",
|
||||
|
||||
"source_collection": SourceCollection,
|
||||
"feature_song_collection": Collection,
|
||||
"main_album_collection": Collection,
|
||||
"contact_collection": Collection,
|
||||
"label_collection": Collection,
|
||||
@@ -511,14 +506,10 @@ class Artist(Base):
|
||||
Base.__init__(**real_kwargs)
|
||||
|
||||
|
||||
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection")
|
||||
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection",)
|
||||
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",)
|
||||
|
||||
def __init_collections__(self):
|
||||
self.feature_song_collection.append_object_to_attribute = {
|
||||
"feature_artist_collection": self
|
||||
}
|
||||
|
||||
self.main_album_collection.append_object_to_attribute = {
|
||||
"artist_collection": self
|
||||
}
|
||||
@@ -530,7 +521,6 @@ class Artist(Base):
|
||||
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
|
||||
if object_type is Song:
|
||||
# this doesn't really make sense
|
||||
# self.feature_song_collection.extend(object_list)
|
||||
return
|
||||
|
||||
if object_type is Artist:
|
||||
@@ -628,8 +618,6 @@ class Artist(Base):
|
||||
if len(self.main_album_collection) > 0:
|
||||
r += f" with {len(self.main_album_collection)} albums"
|
||||
|
||||
if len(self.feature_song_collection) > 0:
|
||||
r += f" featured in {len(self.feature_song_collection)} songs"
|
||||
r += BColors.ENDC.value
|
||||
|
||||
return r
|
||||
|
||||
@@ -2,19 +2,31 @@ from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator
|
||||
from typing import (
|
||||
List,
|
||||
Dict,
|
||||
Set,
|
||||
Tuple,
|
||||
Optional,
|
||||
Iterable,
|
||||
Generator,
|
||||
TypedDict,
|
||||
Callable,
|
||||
Any,
|
||||
TYPE_CHECKING
|
||||
)
|
||||
from urllib.parse import urlparse, ParseResult
|
||||
from dataclasses import dataclass, field
|
||||
from functools import cached_property
|
||||
|
||||
from ..utils import generate_id
|
||||
from ..utils.enums import SourceType
|
||||
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
|
||||
from ..utils.config import youtube_settings
|
||||
from ..utils.string_processing import hash_url, shorten_display_url
|
||||
|
||||
from .metadata import Mapping, Metadata
|
||||
from .parents import OuterProxy
|
||||
from .collection import Collection
|
||||
if TYPE_CHECKING:
|
||||
from ..pages.abstract import Page
|
||||
|
||||
|
||||
|
||||
@@ -29,10 +41,6 @@ class Source:
|
||||
|
||||
def __post_init__(self):
|
||||
self.referrer_page = self.referrer_page or self.source_type
|
||||
|
||||
@property
|
||||
def parsed_url(self) -> ParseResult:
|
||||
return urlparse(self.url)
|
||||
|
||||
@classmethod
|
||||
def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]:
|
||||
@@ -44,38 +52,50 @@ class Source:
|
||||
url = parsed_url.geturl()
|
||||
|
||||
if "musify" in parsed_url.netloc:
|
||||
return cls(SourceType.MUSIFY, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page)
|
||||
|
||||
if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
|
||||
return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page)
|
||||
|
||||
if url.startswith("https://www.deezer"):
|
||||
return cls(SourceType.DEEZER, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page)
|
||||
|
||||
if url.startswith("https://open.spotify.com"):
|
||||
return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page)
|
||||
|
||||
if "bandcamp" in url:
|
||||
return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page)
|
||||
|
||||
if "wikipedia" in parsed_url.netloc:
|
||||
return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page)
|
||||
|
||||
if url.startswith("https://www.metal-archives.com/"):
|
||||
return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
|
||||
|
||||
# the less important once
|
||||
if url.startswith("https://www.facebook"):
|
||||
return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page)
|
||||
|
||||
if url.startswith("https://www.instagram"):
|
||||
return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page)
|
||||
|
||||
if url.startswith("https://twitter"):
|
||||
return cls(SourceType.TWITTER, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page)
|
||||
|
||||
if url.startswith("https://myspace.com"):
|
||||
return cls(SourceType.MYSPACE, url, referrer_page=referrer_page)
|
||||
return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page)
|
||||
|
||||
@property
|
||||
def has_page(self) -> bool:
|
||||
return self.source_type.page is not None
|
||||
|
||||
@property
|
||||
def page(self) -> Page:
|
||||
return self.source_type.page
|
||||
|
||||
@property
|
||||
def parsed_url(self) -> ParseResult:
|
||||
return urlparse(self.url)
|
||||
|
||||
@property
|
||||
def hash_url(self) -> str:
|
||||
@@ -99,27 +119,72 @@ class Source:
|
||||
page_str = property(fget=lambda self: self.source_type.value)
|
||||
|
||||
|
||||
class SourceTypeSorting(TypedDict):
|
||||
sort_key: Callable[[SourceType], Any]
|
||||
reverse: bool
|
||||
only_with_page: bool
|
||||
|
||||
|
||||
class SourceCollection:
|
||||
__change_version__ = generate_id()
|
||||
|
||||
_indexed_sources: Dict[str, Source]
|
||||
_page_to_source_list: Dict[SourceType, List[Source]]
|
||||
_sources_by_type: Dict[SourceType, List[Source]]
|
||||
|
||||
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
|
||||
self._page_to_source_list = defaultdict(list)
|
||||
self._sources_by_type = defaultdict(list)
|
||||
self._indexed_sources = {}
|
||||
|
||||
self.extend(data or [])
|
||||
|
||||
def has_source_page(self, *source_pages: SourceType) -> bool:
|
||||
return any(source_page in self._page_to_source_list for source_page in source_pages)
|
||||
def source_types(
|
||||
self,
|
||||
only_with_page: bool = False,
|
||||
sort_key = lambda page: page.name,
|
||||
reverse: bool = False
|
||||
) -> Iterable[SourceType]:
|
||||
"""
|
||||
Returns a list of all source types contained in this source collection.
|
||||
|
||||
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
|
||||
if not len(source_pages):
|
||||
source_pages = self.source_pages
|
||||
Args:
|
||||
only_with_page (bool, optional): If True, only returns source types that have a page, meaning you can download from them.
|
||||
sort_key (function, optional): A function that defines the sorting key for the source types. Defaults to lambda page: page.name.
|
||||
reverse (bool, optional): If True, sorts the source types in reverse order. Defaults to False.
|
||||
|
||||
for page in source_pages:
|
||||
yield from self._page_to_source_list[page]
|
||||
Returns:
|
||||
Iterable[SourceType]: A list of source types.
|
||||
"""
|
||||
|
||||
source_types: List[SourceType] = self._sources_by_type.keys()
|
||||
if only_with_page:
|
||||
source_types = filter(lambda st: st.has_page, source_types)
|
||||
|
||||
return sorted(
|
||||
source_types,
|
||||
key=sort_key,
|
||||
reverse=reverse
|
||||
)
|
||||
|
||||
def get_sources(self, *source_types: List[SourceType], source_type_sorting: SourceTypeSorting = None) -> Generator[Source]:
|
||||
"""
|
||||
Retrieves sources based on the provided source types and source type sorting.
|
||||
|
||||
Args:
|
||||
*source_types (List[Source]): Variable number of source types to filter the sources.
|
||||
source_type_sorting (SourceTypeSorting): Sorting criteria for the source types. This is only relevant if no source types are provided.
|
||||
|
||||
Yields:
|
||||
Generator[Source]: A generator that yields the sources based on the provided filters.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if not len(source_types):
|
||||
source_type_sorting = source_type_sorting or {}
|
||||
source_types = self.source_types(**source_type_sorting)
|
||||
|
||||
for source_type in source_types:
|
||||
yield from self._sources_by_type[source_type]
|
||||
|
||||
def append(self, source: Source):
|
||||
if source is None:
|
||||
@@ -135,7 +200,7 @@ class SourceCollection:
|
||||
existing_source.__merge__(source)
|
||||
source = existing_source
|
||||
else:
|
||||
self._page_to_source_list[source.source_type].append(source)
|
||||
self._sources_by_type[source.source_type].append(source)
|
||||
|
||||
changed = False
|
||||
for key in source.indexing_values:
|
||||
@@ -156,10 +221,6 @@ class SourceCollection:
|
||||
def __merge__(self, other: SourceCollection, **kwargs):
|
||||
self.extend(other)
|
||||
|
||||
@property
|
||||
def source_pages(self) -> Iterable[SourceType]:
|
||||
return sorted(self._page_to_source_list.keys(), key=lambda page: page.value)
|
||||
|
||||
@property
|
||||
def hash_url_list(self) -> List[str]:
|
||||
return [hash_url(source.url) for source in self.get_sources()]
|
||||
@@ -170,7 +231,7 @@ class SourceCollection:
|
||||
|
||||
@property
|
||||
def homepage_list(self) -> List[str]:
|
||||
return [source.homepage for source in self.source_pages]
|
||||
return [source_type.homepage for source_type in self._sources_by_type.keys()]
|
||||
|
||||
def indexing_values(self) -> Generator[Tuple[str, str], None, None]:
|
||||
for index in self._indexed_sources:
|
||||
|
||||
@@ -49,15 +49,16 @@ class DownloadOptions:
|
||||
|
||||
class Page:
|
||||
SOURCE_TYPE: SourceType
|
||||
LOGGER: LOGGER
|
||||
LOGGER: logging.Logger
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
cls.SOURCE_TYPE.register_page(cls)
|
||||
cls.LOGGER = logging.getLogger(cls.__name__)
|
||||
|
||||
return super().__new__(cls)
|
||||
|
||||
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
|
||||
self.SOURCE_TYPE.register_page(self)
|
||||
|
||||
self.download_options: DownloadOptions = download_options or DownloadOptions()
|
||||
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
|
||||
|
||||
@@ -145,153 +146,7 @@ class Page:
|
||||
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
|
||||
return Label()
|
||||
|
||||
def download(
|
||||
self,
|
||||
music_object: DatabaseObject,
|
||||
genre: str,
|
||||
) -> DownloadResult:
|
||||
naming_dict: NamingDict = NamingDict({"genre": genre})
|
||||
|
||||
def fill_naming_objects(naming_music_object: DatabaseObject):
|
||||
nonlocal naming_dict
|
||||
|
||||
for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
|
||||
collection: Collection = getattr(naming_music_object, collection_name)
|
||||
|
||||
if collection.empty:
|
||||
continue
|
||||
|
||||
dom_ordered_music_object: DatabaseObject = collection[0]
|
||||
naming_dict.add_object(dom_ordered_music_object)
|
||||
return fill_naming_objects(dom_ordered_music_object)
|
||||
|
||||
fill_naming_objects(music_object)
|
||||
|
||||
return self._download(music_object, naming_dict)
|
||||
|
||||
def _download(
|
||||
self,
|
||||
music_object: DatabaseObject,
|
||||
naming_dict: NamingDict,
|
||||
**kwargs
|
||||
) -> DownloadResult:
|
||||
if isinstance(music_object, Song):
|
||||
output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
|
||||
else:
|
||||
output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
|
||||
|
||||
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
|
||||
if isinstance(music_object, Album):
|
||||
if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
|
||||
return DownloadResult()
|
||||
|
||||
if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
|
||||
self.fetch_details(music_object=music_object, stop_at_level=1)
|
||||
|
||||
if isinstance(music_object, Album):
|
||||
music_object.update_tracksort()
|
||||
|
||||
naming_dict.add_object(music_object)
|
||||
|
||||
if isinstance(music_object, Song):
|
||||
return self._download_song(music_object, naming_dict)
|
||||
|
||||
download_result: DownloadResult = DownloadResult()
|
||||
|
||||
for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
|
||||
collection: Collection = getattr(music_object, collection_name)
|
||||
|
||||
sub_ordered_music_object: DatabaseObject
|
||||
for sub_ordered_music_object in collection:
|
||||
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
|
||||
|
||||
return download_result
|
||||
|
||||
def _download_song(self, song: Song, naming_dict: NamingDict):
|
||||
song.compile()
|
||||
if "genre" not in naming_dict and song.genre is not None:
|
||||
naming_dict["genre"] = song.genre
|
||||
|
||||
if song.genre is None:
|
||||
song.genre = naming_dict["genre"]
|
||||
|
||||
path_parts = Formatter().parse(main_settings["download_path"])
|
||||
file_parts = Formatter().parse(main_settings["download_file"])
|
||||
new_target = Target(
|
||||
relative_to_music_dir=True,
|
||||
file_path=Path(
|
||||
main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
|
||||
main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
|
||||
)
|
||||
)
|
||||
|
||||
if song.target_collection.empty:
|
||||
song.target_collection.append(new_target)
|
||||
|
||||
r = DownloadResult(1)
|
||||
temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
|
||||
|
||||
found_on_disc = False
|
||||
target: Target
|
||||
for target in song.target_collection:
|
||||
current_exists = target.exists
|
||||
|
||||
if current_exists:
|
||||
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
|
||||
target.copy_content(temp_target)
|
||||
found_on_disc = True
|
||||
|
||||
r.found_on_disk += 1
|
||||
r.add_target(target)
|
||||
else:
|
||||
output(f'- {target.file_path}', color=BColors.GREY)
|
||||
|
||||
if not song.source_collection.has_source_page(self.SOURCE_TYPE):
|
||||
return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.")
|
||||
|
||||
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
|
||||
|
||||
skip_intervals = []
|
||||
if not found_on_disc:
|
||||
for source in sources:
|
||||
r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
|
||||
|
||||
if not r.is_fatal_error:
|
||||
skip_intervals = self.get_skip_intervals(song, source)
|
||||
break
|
||||
|
||||
if temp_target.exists:
|
||||
r.merge(self._post_process_targets(
|
||||
song=song,
|
||||
temp_target=temp_target,
|
||||
interval_list=skip_intervals,
|
||||
found_on_disc=found_on_disc,
|
||||
))
|
||||
|
||||
return r
|
||||
|
||||
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult:
|
||||
if not found_on_disc or self.download_options.process_audio_if_found:
|
||||
correct_codec(temp_target, interval_list=interval_list)
|
||||
|
||||
self.post_process_hook(song, temp_target)
|
||||
|
||||
if not found_on_disc or self.download_options.process_metadata_if_found:
|
||||
write_metadata_to_target(song.metadata, temp_target, song)
|
||||
|
||||
r = DownloadResult()
|
||||
|
||||
target: Target
|
||||
for target in song.target_collection:
|
||||
if temp_target is not target:
|
||||
temp_target.copy_content(target)
|
||||
r.add_target(target)
|
||||
|
||||
temp_target.delete()
|
||||
r.sponsor_segments += len(interval_list)
|
||||
|
||||
return r
|
||||
|
||||
# to download stuff
|
||||
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
|
||||
return []
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@ class BandcampTypes(Enum):
|
||||
|
||||
class Bandcamp(Page):
|
||||
SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP
|
||||
LOGGER = logging_settings["bandcamp_logger"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.connection: Connection = Connection(
|
||||
@@ -63,8 +62,7 @@ class Bandcamp(Page):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
|
||||
parsed_url = urlparse(source.url)
|
||||
path = parsed_url.path.replace("/", "")
|
||||
path = source.parsed_url.path.replace("/", "")
|
||||
|
||||
if path == "" or path.startswith("music"):
|
||||
return Artist
|
||||
|
||||
@@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
|
||||
from ..connection import Connection
|
||||
from ..utils.config import logging_settings
|
||||
from .abstract import Page
|
||||
from ..utils.enums.source import SourceType
|
||||
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
|
||||
from ..utils.enums.album import AlbumType
|
||||
from ..utils.support_classes.query import Query
|
||||
from ..objects import (
|
||||
@@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
|
||||
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
|
||||
],
|
||||
source_list=[
|
||||
Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id)
|
||||
Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id)
|
||||
]
|
||||
)
|
||||
|
||||
@@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
|
||||
return Artist(
|
||||
name=artist_name,
|
||||
source_list=[
|
||||
Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url)
|
||||
Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url)
|
||||
]
|
||||
)
|
||||
|
||||
@@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
|
||||
title=album_name,
|
||||
album_type=album_type,
|
||||
source_list=[
|
||||
Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url)
|
||||
Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url)
|
||||
],
|
||||
artist_list=[
|
||||
_artist_from_json(artist_html=artist_html)
|
||||
@@ -207,7 +207,7 @@ def create_grid(
|
||||
|
||||
|
||||
class EncyclopaediaMetallum(Page):
|
||||
SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM
|
||||
SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
|
||||
LOGGER = logging_settings["metal_archives_logger"]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
@@ -502,9 +502,18 @@ class Musify(Page):
|
||||
for video_container in video_container_list:
|
||||
iframe_list: List[BeautifulSoup] = video_container.findAll("iframe")
|
||||
for iframe in iframe_list:
|
||||
"""
|
||||
the url could look like this
|
||||
https://www.youtube.com/embed/sNObCkhzOYA?si=dNVgnZMBNVlNb0P_
|
||||
"""
|
||||
parsed_url = urlparse(iframe["src"])
|
||||
path_parts = parsed_url.path.strip("/").split("/")
|
||||
if path_parts[0] != "embed" or len(path_parts) < 2:
|
||||
continue
|
||||
|
||||
source_list.append(Source(
|
||||
SourceType.YOUTUBE,
|
||||
iframe["src"],
|
||||
ALL_SOURCE_TYPES.YOUTUBE,
|
||||
f"https://music.youtube.com/watch?v={path_parts[1]}",
|
||||
referrer_page=self.SOURCE_TYPE
|
||||
))
|
||||
|
||||
|
||||
@@ -41,8 +41,6 @@ class YouTube(SuperYouTube):
|
||||
# CHANGE
|
||||
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
|
||||
|
||||
NO_ADDITIONAL_DATA_FROM_SONG = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.connection: Connection = Connection(
|
||||
host=get_invidious_url(),
|
||||
|
||||
@@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path
|
||||
|
||||
from ...utils import dump_to_file
|
||||
|
||||
from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork
|
||||
from ..abstract import Page
|
||||
from ...objects import (
|
||||
Artist,
|
||||
DatabaseObject as DataObject,
|
||||
Source,
|
||||
SourceType,
|
||||
FormattedText,
|
||||
ID3Timestamp,
|
||||
Artwork,
|
||||
Artist,
|
||||
Song,
|
||||
Album,
|
||||
Label,
|
||||
Target,
|
||||
Lyrics,
|
||||
FormattedText
|
||||
)
|
||||
from ...connection import Connection
|
||||
from ...utils.enums import SourceType, ALL_SOURCE_TYPES
|
||||
from ...utils.enums.album import AlbumType
|
||||
from ...utils.support_classes.download_result import DownloadResult
|
||||
|
||||
@@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = {
|
||||
|
||||
class YoutubeMusic(SuperYouTube):
|
||||
# CHANGE
|
||||
SOURCE_TYPE = SourceType.YOUTUBE_MUSIC
|
||||
LOGGER = logging_settings["youtube_music_logger"]
|
||||
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
|
||||
|
||||
def __init__(self, *args, ydl_opts: dict = None, **kwargs):
|
||||
self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
|
||||
@@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube):
|
||||
default='{}'
|
||||
)) or {}
|
||||
|
||||
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
|
||||
def get_source_type(self, source: Source) -> Optional[Type[DataObject]]:
|
||||
return super().get_source_type(source)
|
||||
|
||||
def general_search(self, search_query: str) -> List[DatabaseObject]:
|
||||
def general_search(self, search_query: str) -> List[DataObject]:
|
||||
search_query = search_query.strip()
|
||||
|
||||
urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))
|
||||
@@ -619,7 +620,7 @@ class YoutubeMusic(SuperYouTube):
|
||||
Artist(
|
||||
name=name,
|
||||
source_list=[Source(
|
||||
SourceType.YOUTUBE_MUSIC,
|
||||
self.SOURCE_TYPE,
|
||||
f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}"
|
||||
)]
|
||||
) for name in artist_names]
|
||||
@@ -640,7 +641,7 @@ class YoutubeMusic(SuperYouTube):
|
||||
artwork=Artwork(*ydl_res.get("thumbnails", [])),
|
||||
main_artist_list=artist_list,
|
||||
source_list=[Source(
|
||||
SourceType.YOUTUBE_MUSIC,
|
||||
self.SOURCE_TYPE,
|
||||
f"https://music.youtube.com/watch?v={ydl_res.get('id')}"
|
||||
), source],
|
||||
)
|
||||
|
||||
@@ -14,10 +14,11 @@ class SourceType:
|
||||
page_type: Type[Page] = None
|
||||
page: Page = None
|
||||
|
||||
def register_page(self, page: Page):
|
||||
self.page = page
|
||||
|
||||
def register_page(self, page_type: Type[Page]):
|
||||
self.page_type = page
|
||||
self.page = page_type()
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
@property
|
||||
def has_page(self) -> bool:
|
||||
|
||||
Reference in New Issue
Block a user