18 Commits

Author SHA1 Message Date
acd183c90e fix: bandcamp
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-10 17:39:30 +02:00
7186f06ce6 feat: improved interface
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:33:07 +02:00
6e354af0d1 feat: added proper settings
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:06:40 +02:00
155f239c8a feat: changed ids for audio tempfiles to random id instead of increment id
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:32:14 +02:00
36db651dfa fix: cleaning the song name deleted the song if the song name was the same as the artist name
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:25:11 +02:00
8426f6e2ea fix: filtered another year
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:20:22 +02:00
75d0a83d14 fix: changed dependency
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-09 10:57:55 +02:00
Hellow
2af577c0cd fix: removed empty objects
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-08 21:06:40 +02:00
Hellow
3780f05e58 feat: added launch.json 2024-05-08 16:48:27 +02:00
Hellow
a0305a7a6e fix: don't add year as artist 2024-05-08 16:47:56 +02:00
e3d7ed8837 Merge pull request 'fix/musify_artist_spam' (#27) from fix/musify_artist_spam into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #27
2024-05-08 10:31:23 +00:00
9d4e3e8545 fix: bounds get respected
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-08 12:23:16 +02:00
9c63e8e55a fix: correct collections
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
2024-05-08 12:09:41 +02:00
a97f8872c8 fix: refetching release title from album card
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-08 09:57:11 +02:00
a5f8057b82 feat: improved initialization of data objects 2024-05-08 09:44:18 +02:00
e3e547c232 feat: improved musify 2024-05-08 09:15:41 +02:00
12c0bf6b83 Merge pull request 'ci: make tags release to the music-kraken pypi package instead of music-kraken-stable' (#24) from ci/remove-stable-package into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #24
2024-05-07 21:17:11 +00:00
ac9a74138c ci: make tags release to the music-kraken pypi package instead of music-kraken-stable
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-07 16:07:45 +00:00
19 changed files with 376 additions and 338 deletions

22
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,22 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Python Debugger: Download script",
"type": "debugpy",
"request": "launch",
"program": "development/actual_donwload.py",
"console": "integratedTerminal"
}
]
}

View File

@@ -19,6 +19,7 @@
"albumsort",
"APIC",
"Bandcamp",
"bitrate",
"dotenv",
"encyclopaedia",
"ENDC",
@@ -27,6 +28,7 @@
"metallum",
"musify",
"OKBLUE",
"OKGREEN",
"pathvalidate",
"Referer",
"sponsorblock",

View File

@@ -11,7 +11,6 @@ steps:
build-stable:
image: python
commands:
- sed -i 's/name = "music-kraken"/name = "music-kraken-stable"/' pyproject.toml
- python -m pip install -r requirements-dev.txt
- python3 -m build
environment:

View File

@@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Crystal F",
"10"
"s: #a Psychonaut 4",
"d: 0",
]

View File

@@ -47,7 +47,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au
# run the ffmpeg command with a progressbar
ff = FfmpegProgress(ffmpeg_command)
with tqdm(total=100, desc=f"removing {len(interval_list)} segments") as pbar:
with tqdm(total=100, desc=f"processing") as pbar:
for progress in ff.run_command_with_progress():
pbar.update(progress-pbar.n)

View File

@@ -166,9 +166,9 @@ class Downloader:
self.genre = genre or get_genre()
self.process_metadata_anyway = process_metadata_anyway
print()
print(f"Downloading to: \"{self.genre}\"")
print()
output()
output(f"Downloading to: \"{self.genre}\"", color=BColors.HEADER)
output()
def print_current_options(self):
self.page_dict = dict()
@@ -304,20 +304,16 @@ class Downloader:
def goto(self, data_object: DatabaseObject):
page: Type[Page]
self.pages.fetch_details(data_object)
self.pages.fetch_details(data_object, stop_at_level=1)
print(data_object)
print(data_object.options)
self.set_current_options(GoToResults(data_object.options, max_items_per_page=self.max_displayed_options))
self.print_current_options()
def download(self, data_objects: List[DatabaseObject], **kwargs) -> bool:
output()
if len(data_objects) == 1:
output(f"Downloading {data_objects[0].option_string}...", color=BColors.BOLD)
else:
output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n")
if len(data_objects) > 1:
output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n")
_result_map: Dict[DatabaseObject, DownloadResult] = dict()
@@ -380,13 +376,13 @@ class Downloader:
continue
i = 0
if possible_index.isdigit():
try:
i = int(possible_index)
else:
except ValueError:
raise MKInvalidInputException(message=f"The index \"{possible_index}\" is not a number.")
if i < 0 and i >= len(self.current_results):
raise MKInvalidInputException(message=f"The index \"{i}\" is not within the bounds of 0-{len(self.current_results)}.")
if i < 0 or i >= len(self.current_results):
raise MKInvalidInputException(message=f"The index \"{i}\" is not within the bounds of 0-{len(self.current_results) - 1}.")
indices.append(i)

View File

@@ -118,7 +118,7 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types)
for download_page in audio_pages:
return self._page_instances[download_page].download(music_object=music_object, genre=genre, download_all=download_all, process_metadata_anyway=process_metadata_anyway)
return self._page_instances[download_page].download(music_object=music_object, genre=genre)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")

View File

@@ -28,6 +28,9 @@ class Results:
self._by_index = dict()
self._page_by_index = dict()
def __len__(self) -> int:
return max(self._by_index.keys())
def __getitem__(self, index: int):
return self._by_index[index]

View File

@@ -2,6 +2,8 @@ from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set
import copy
from .parents import OuterProxy
from ..utils import object_trace
from ..utils import output, BColors
@@ -47,8 +49,15 @@ class Collection(Generic[T]):
self.extend(data)
def __hash__(self) -> int:
return id(self)
@property
def collection_names(self) -> List[str]:
return list(set(self._collection_for.values()))
def __repr__(self) -> str:
return f"Collection({' | '.join(self._collection_for.values())} {id(self)})"
return f"Collection({' | '.join(self.collection_names)} {id(self)})"
def _map_element(self, __object: T, no_unmap: bool = False, **kwargs):
if not no_unmap:
@@ -104,8 +113,9 @@ class Collection(Generic[T]):
"""
self._data.append(other)
other._inner._is_in_collection.add(self)
# all of the existing hooks to get the defined datastructure
# all of the existing hooks to get the defined datastructures
for collection_attribute, generator in self.extend_object_to_attribute.items():
other.__getattribute__(collection_attribute).extend(generator, **kwargs)
@@ -143,35 +153,35 @@ class Collection(Generic[T]):
if other is None:
return
if not other._inner._has_data:
return
if other.id in self._indexed_from_id:
return
object_trace(f"Appending {other.option_string} to {self}")
for c in self.pull_from:
r = c._find_object(other)
if r is not None:
output("found pull from", r, other, self, color=BColors.RED, sep="\t")
other.merge(r, **kwargs)
c.remove(r, existing=r, **kwargs)
break
existing_object = self._find_object(other)
# switching collection in the case of push to
for c in self.push_to:
r = c._find_object(other)
if r is not None:
output("found push to", r, other, self, color=BColors.RED, sep="\t")
# output("found push to", r, other, c, self, color=BColors.RED, sep="\t")
return c.append(other, **kwargs)
for c in self.pull_from:
r = c._find_object(other)
if r is not None:
# output("found pull from", r, other, c, self, color=BColors.RED, sep="\t")
c.remove(r, existing=r, **kwargs)
if existing_object is None:
existing = self._find_object(other)
if existing is None:
self._append_new_object(other, **kwargs)
else:
existing_object.merge(other, **kwargs)
existing.merge(other, **kwargs)
def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, **kwargs):
def remove(self, *other_list: List[T], silent: bool = False, existing: Optional[T] = None, remove_from_other_collection=True, **kwargs):
other: T
for other in other_list:
existing: Optional[T] = existing or self._indexed_values["id"].get(other.id, None)
if existing is None:
@@ -179,16 +189,13 @@ class Collection(Generic[T]):
raise ValueError(f"Object {other} not found in {self}")
return other
"""
for collection_attribute, generator in self.extend_object_to_attribute.items():
other.__getattribute__(collection_attribute).remove(*generator, silent=silent, **kwargs)
for attribute, new_object in self.append_object_to_attribute.items():
other.__getattribute__(attribute).remove(new_object, silent=silent, **kwargs)
"""
self._data.remove(existing)
self._unmap_element(existing)
if remove_from_other_collection:
for c in copy.copy(other._inner._is_in_collection):
c.remove(other, silent=True, remove_from_other_collection=False, **kwargs)
other._inner._is_in_collection = set()
else:
self._data.remove(existing)
self._unmap_element(existing)
def contains(self, __object: T) -> bool:
return self._find_object(__object) is not None

View File

@@ -32,7 +32,7 @@ class FormattedText:
if self.is_empty and other.is_empty:
return True
return self.doc == other.doc
return self.html == other.html
@property
def markdown(self) -> str:

View File

@@ -29,12 +29,17 @@ class InnerData:
"""
_refers_to_instances: set = None
_is_in_collection: set = None
_has_data: bool = False
"""
Attribute versions keep track, of if the attribute has been changed.
"""
def __init__(self, object_type, **kwargs):
self._refers_to_instances = set()
self._is_in_collection = set()
self._fetched_from: dict = {}
# initialize the default values
@@ -45,9 +50,19 @@ class InnerData:
for key, value in kwargs.items():
if hasattr(value, "__is_collection__"):
value._collection_for[self] = key
self.__setattr__(key, value)
if self._has_data:
continue
def __setattr__(self, key: str, value):
if self._has_data or not hasattr(self, "_default_values"):
return super().__setattr__(key, value)
super().__setattr__("_has_data", not (key in self._default_values and self._default_values[key] == value))
return super().__setattr__(key, value)
def __hash__(self):
return self.id
@@ -58,6 +73,7 @@ class InnerData:
"""
self._fetched_from.update(__other._fetched_from)
self._is_in_collection.update(__other._is_in_collection)
for key, value in __other.__dict__.copy().items():
if key.startswith("_"):

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import random
from collections import defaultdict
from typing import List, Optional, Dict, Tuple, Type, Union
import copy
import pycountry
@@ -110,7 +111,7 @@ class Song(Base):
"album_collection": Collection,
"feature_artist_collection": Collection,
"title": lambda: "",
"title": lambda: None,
"unified_title": lambda: None,
"isrc": lambda: None,
"genre": lambda: None,
@@ -118,13 +119,27 @@ class Song(Base):
"tracksort": lambda: 0,
}
def __init__(self, title: str = "", unified_title: str = None, isrc: str = None, length: int = None,
genre: str = None, note: FormattedText = None, source_list: List[Source] = None,
target_list: List[Target] = None, lyrics_list: List[Lyrics] = None,
main_artist_list: List[Artist] = None, feature_artist_list: List[Artist] = None,
album_list: List[Album] = None, tracksort: int = 0, artwork: Optional[Artwork] = None, **kwargs) -> None:
def __init__(
self,
title: str = None,
isrc: str = None,
length: int = None,
genre: str = None,
note: FormattedText = None,
source_list: List[Source] = None,
target_list: List[Target] = None,
lyrics_list: List[Lyrics] = None,
main_artist_list: List[Artist] = None,
feature_artist_list: List[Artist] = None,
album_list: List[Album] = None,
tracksort: int = 0,
artwork: Optional[Artwork] = None,
**kwargs
) -> None:
real_kwargs = copy.copy(locals())
real_kwargs.update(real_kwargs.pop("kwargs", {}))
Base.__init__(**locals())
Base.__init__(**real_kwargs)
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_artist_collection", "feature_artist_collection", "album_collection")
TITEL = "title"
@@ -210,14 +225,6 @@ class Song(Base):
r += get_collection_string(self.feature_artist_collection, " feat. {}")
return r
@property
def options(self) -> List[P]:
options = self.main_artist_collection.shallow_list
options.extend(self.feature_artist_collection)
options.extend(self.album_collection)
options.append(self)
return options
@property
def tracksort_str(self) -> str:
"""
@@ -273,15 +280,27 @@ class Album(Base):
TITEL = "title"
# This is automatically generated
def __init__(self, title: str = None, unified_title: str = None, album_status: AlbumStatus = None,
album_type: AlbumType = None, language: Language = None, date: ID3Timestamp = None,
barcode: str = None, albumsort: int = None, notes: FormattedText = None,
source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None,
label_list: List[Label] = None, **kwargs) -> None:
super().__init__(title=title, unified_title=unified_title, album_status=album_status, album_type=album_type,
language=language, date=date, barcode=barcode, albumsort=albumsort, notes=notes,
source_list=source_list, artist_list=artist_list, song_list=song_list, label_list=label_list,
**kwargs)
def __init__(
self,
title: str = None,
unified_title: str = None,
album_status: AlbumStatus = None,
album_type: AlbumType = None,
language: Language = None,
date: ID3Timestamp = None,
barcode: str = None,
albumsort: int = None,
notes: FormattedText = None,
source_list: List[Source] = None,
artist_list: List[Artist] = None,
song_list: List[Song] = None,
label_list: List[Label] = None,
**kwargs
) -> None:
real_kwargs = copy.copy(locals())
real_kwargs.update(real_kwargs.pop("kwargs", {}))
Base.__init__(**real_kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",)
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection")
@@ -413,14 +432,8 @@ class Album(Base):
return self.album_type.value
"""
All objects dependent on Artist
"""
class Artist(Base):
name: str
unified_name: str
country: Country
formed_in: ID3Timestamp
notes: FormattedText
@@ -437,8 +450,7 @@ class Artist(Base):
label_collection: Collection[Label]
_default_factories = {
"name": str,
"unified_name": lambda: None,
"name": lambda: None,
"country": lambda: None,
"unformatted_location": lambda: None,
@@ -457,17 +469,28 @@ class Artist(Base):
TITEL = "name"
# This is automatically generated
def __init__(self, name: str = "", unified_name: str = None, country: Country = None,
formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None,
general_genre: str = None, unformatted_location: str = None, source_list: List[Source] = None,
contact_list: List[Contact] = None, feature_song_list: List[Song] = None,
main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None:
super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes,
lyrical_themes=lyrical_themes, general_genre=general_genre,
unformatted_location=unformatted_location, source_list=source_list, contact_list=contact_list,
feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list,
**kwargs)
def __init__(
self,
name: str = None,
unified_name: str = None,
country: Country = None,
formed_in: ID3Timestamp = None,
notes: FormattedText = None,
lyrical_themes: List[str] = None,
general_genre: str = None,
unformatted_location: str = None,
source_list: List[Source] = None,
contact_list: List[Contact] = None,
feature_song_list: List[Song] = None,
main_album_list: List[Album] = None,
label_list: List[Label] = None,
**kwargs
) -> None:
real_kwargs = copy.copy(locals())
real_kwargs.update(real_kwargs.pop("kwargs", {}))
Base.__init__(**real_kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection")
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",)
@@ -593,11 +616,6 @@ class Artist(Base):
return r
"""
Label
"""
class Label(Base):
COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection")
@@ -625,12 +643,21 @@ class Label(Base):
TITEL = "name"
def __init__(self, name: str = None, unified_name: str = None, notes: FormattedText = None,
source_list: List[Source] = None, contact_list: List[Contact] = None,
album_list: List[Album] = None, current_artist_list: List[Artist] = None, **kwargs) -> None:
super().__init__(name=name, unified_name=unified_name, notes=notes, source_list=source_list,
contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list,
**kwargs)
def __init__(
self,
name: str = None,
unified_name: str = None,
notes: FormattedText = None,
source_list: List[Source] = None,
contact_list: List[Contact] = None,
album_list: List[Album] = None,
current_artist_list: List[Artist] = None,
**kwargs
) -> None:
real_kwargs = copy.copy(locals())
real_kwargs.update(real_kwargs.pop("kwargs", {}))
Base.__init__(**real_kwargs)
def __init_collections__(self):
self.album_collection.append_object_to_attribute = {

View File

@@ -3,8 +3,9 @@ import random
import re
from copy import copy
from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict
from string import Formatter
from dataclasses import dataclass, field
import requests
from bs4 import BeautifulSoup
@@ -28,11 +29,23 @@ from ..utils.config import main_settings
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import fit_to_file_system
from ..utils import trace
from ..utils import trace, output, BColors
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class NamingDict(dict):
CUSTOM_KEYS: Dict[str, str] = {
@@ -101,6 +114,10 @@ class Page:
# set this to true, if all song details can also be fetched by fetching album details
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
@@ -176,7 +193,6 @@ class Page:
self,
music_object: DatabaseObject,
stop_at_level: int = 1,
post_process: bool = True
) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
@@ -208,7 +224,6 @@ class Page:
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
post_process=False,
type_string=type(music_object).__name__,
entity_string=music_object.option_string,
)
@@ -230,7 +245,6 @@ class Page:
source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
post_process: bool = True,
type_string: str = "",
entity_string: str = "",
) -> Optional[DatabaseObject]:
@@ -254,7 +268,7 @@ class Page:
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level)
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
@@ -268,7 +282,7 @@ class Page:
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1, post_process=False))
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
return music_object
@@ -288,8 +302,6 @@ class Page:
self,
music_object: DatabaseObject,
genre: str,
download_all: bool = False,
process_metadata_anyway: bool = True
) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
@@ -308,25 +320,22 @@ class Page:
fill_naming_objects(music_object)
return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway)
return self._download(music_object, naming_dict)
def _download(
self,
music_object: DatabaseObject,
naming_dict: NamingDict,
download_all: bool = False,
skip_details: bool = False,
process_metadata_anyway: bool = True
**kwargs
) -> DownloadResult:
trace(f"downloading {type(music_object).__name__} [{music_object.option_string}]")
skip_next_details = skip_details
if isinstance(music_object, Song):
output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
else:
output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if self.NO_ADDITIONAL_DATA_FROM_SONG:
skip_next_details = True
if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]:
if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
return DownloadResult()
if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
@@ -338,7 +347,7 @@ class Page:
naming_dict.add_object(music_object)
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict, process_metadata_anyway=process_metadata_anyway)
return self._download_song(music_object, naming_dict)
download_result: DownloadResult = DownloadResult()
@@ -347,13 +356,11 @@ class Page:
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all,
skip_details=skip_next_details,
process_metadata_anyway=process_metadata_anyway))
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict, process_metadata_anyway: bool = True):
def _download_song(self, song: Song, naming_dict: NamingDict):
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
@@ -373,40 +380,33 @@ class Page:
if song.target_collection.empty:
song.target_collection.append(new_target)
r = DownloadResult(1)
temp_target: Target = Target.temp()
found_on_disc = False
target: Target
for target in song.target_collection:
current_exists = target.exists
if current_exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
else:
output(f'- {target.file_path}', color=BColors.GREY)
if not song.source_collection.has_source_page(self.SOURCE_TYPE):
return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.")
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
temp_target: Target = Target(
relative_to_music_dir=False,
file_path=Path(
main_settings["temp_directory"],
str(song.id)
)
)
r = DownloadResult(1)
found_on_disc = False
target: Target
for target in song.target_collection:
if target.exists:
if process_metadata_anyway:
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
if found_on_disc and not process_metadata_anyway:
self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.")
return r
skip_intervals = []
if not found_on_disc:
for source in sources:
r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string)
r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
if not r.is_fatal_error:
skip_intervals = self.get_skip_intervals(song, source)
@@ -417,16 +417,19 @@ class Page:
song=song,
temp_target=temp_target,
interval_list=skip_intervals,
found_on_disc=found_on_disc,
))
return r
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult:
correct_codec(temp_target, interval_list=interval_list)
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult:
if not found_on_disc or self.download_options.process_audio_if_found:
correct_codec(temp_target, interval_list=interval_list)
self.post_process_hook(song, temp_target)
write_metadata_to_target(song.metadata, temp_target, song)
if not found_on_disc or self.download_options.process_metadata_if_found:
write_metadata_to_target(song.metadata, temp_target, song)
r = DownloadResult()

View File

@@ -22,6 +22,7 @@ from ..objects import (
Artwork,
)
from ..connection import Connection
from ..utils import dump_to_file
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings

View File

@@ -1,7 +1,7 @@
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Type, Union, Generator
from typing import List, Optional, Type, Union, Generator, Dict, Any
from urllib.parse import urlparse
import pycountry
@@ -24,7 +24,7 @@ from ..objects import (
Lyrics,
Artwork
)
from ..utils.config import logging_settings
from ..utils.config import logging_settings, main_settings
from ..utils import string_processing, shared
from ..utils.string_processing import clean_song_title
from ..utils.support_classes.query import Query
@@ -361,7 +361,7 @@ class Musify(Page):
return Song(
title=clean_song_title(song_title, artist_name=artist_list[0].name if len(artist_list) > 0 else None),
main_artist_list=artist_list,
feature_artist_list=artist_list,
source_list=source_list
)
@@ -418,6 +418,10 @@ class Musify(Page):
href = artist_soup["href"]
if href is not None:
href_parts = href.split("/")
if len(href_parts) <= 1 or href_parts[-2] != "artist":
return
artist_src_list.append(Source(self.SOURCE_TYPE, self.HOST + href))
name_elem: BeautifulSoup = artist_soup.find("span", {"itemprop": "name"})
@@ -510,7 +514,7 @@ class Musify(Page):
title=clean_song_title(track_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None),
source_list=source_list,
lyrics_list=lyrics_list,
main_artist_list=artist_list,
feature_artist_list=artist_list,
album_list=album_list,
artwork=artwork,
)
@@ -652,10 +656,104 @@ class Musify(Page):
return Song(
title=clean_song_title(song_name, artist_name=artist_list[0].name if len(artist_list) > 0 else None),
tracksort=tracksort,
main_artist_list=artist_list,
feature_artist_list=artist_list,
source_list=source_list
)
def _parse_album(self, soup: BeautifulSoup) -> Album:
name: str = None
source_list: List[Source] = []
artist_list: List[Artist] = []
date: ID3Timestamp = None
"""
if breadcrumb list has 4 elements, then
the -2 is the artist link,
the -1 is the album
"""
# breadcrumb
breadcrumb_soup: BeautifulSoup = soup.find("ol", {"class", "breadcrumb"})
breadcrumb_elements: List[BeautifulSoup] = breadcrumb_soup.find_all("li", {"class": "breadcrumb-item"})
if len(breadcrumb_elements) == 4:
# album
album_crumb: BeautifulSoup = breadcrumb_elements[-1]
name = album_crumb.text.strip()
# artist
artist_crumb: BeautifulSoup = breadcrumb_elements[-2]
anchor: BeautifulSoup = artist_crumb.find("a")
if anchor is not None:
href = anchor.get("href")
href_parts = href.split("/")
if not(len(href_parts) <= 1 or href_parts[-2] != "artist"):
artist_source_list: List[Source] = []
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
else:
self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case")
# meta
meta_url: BeautifulSoup = soup.find("meta", {"itemprop": "url"})
if meta_url is not None:
url = meta_url.get("content")
if url is not None:
source_list.append(Source(self.SOURCE_TYPE, self.HOST + url))
meta_name: BeautifulSoup = soup.find("meta", {"itemprop": "name"})
if meta_name is not None:
_name = meta_name.get("content")
if _name is not None:
name = _name
# album info
album_info_ul: BeautifulSoup = soup.find("ul", {"class": "album-info"})
if album_info_ul is not None:
artist_anchor: BeautifulSoup
for artist_anchor in album_info_ul.find_all("a", {"itemprop": "byArtist"}):
# line 98
artist_source_list: List[Source] = []
artist_url_meta = artist_anchor.find("meta", {"itemprop": "url"})
if artist_url_meta is not None:
artist_href = artist_url_meta.get("content")
if artist_href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, url=self.HOST + artist_href))
artist_meta_name = artist_anchor.find("meta", {"itemprop": "name"})
if artist_meta_name is not None:
artist_name = artist_meta_name.get("content")
if artist_name is not None:
artist_list.append(Artist(
name=artist_name,
source_list=artist_source_list
))
time_soup: BeautifulSoup = album_info_ul.find("time", {"itemprop": "datePublished"})
if time_soup is not None:
raw_datetime = time_soup.get("datetime")
if raw_datetime is not None:
try:
date = ID3Timestamp.strptime(raw_datetime, "%Y-%m-%d")
except ValueError:
self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}")
return Album(
title=name,
source_list=source_list,
artist_list=artist_list,
date=date
)
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
"""
fetches album from source:
@@ -694,19 +792,14 @@ class Musify(Page):
return album
def _get_artist_attributes(self, url: MusifyUrl) -> Artist:
def _fetch_initial_artist(self, url: MusifyUrl, source: Source, **kwargs) -> Artist:
"""
fetches the main Artist attributes from this endpoint
https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
it needs to parse html
:param url:
:return:
"""
r = self.connection.get(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent", name="artist_attributes_" + url.name_with_id)
if r is None:
return Artist()
return Artist(source_list=[source])
soup = self.get_soup_from_response(r)
@@ -821,7 +914,7 @@ class Musify(Page):
notes=notes
)
def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None) -> Album:
def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album:
"""
<div class="card release-thumbnail" data-type="2">
<a href="/release/ghost-bath-self-loather-2021-1554266">
@@ -845,46 +938,20 @@ class Musify(Page):
</div>
"""
_id: Optional[str] = None
name: str = None
source_list: List[Source] = []
timestamp: Optional[ID3Timestamp] = None
album_status = None
def set_name(new_name: str):
nonlocal name
nonlocal artist_name
# example of just setting not working:
# https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067
if new_name.count(" - ") != 1:
name = new_name
return
potential_artist_list, potential_name = new_name.split(" - ")
unified_artist_list = string_processing.unify(potential_artist_list)
if artist_name is not None:
if string_processing.unify(artist_name) not in unified_artist_list:
name = new_name
return
name = potential_name
return
name = new_name
album_kwargs: Dict[str, Any] = {
"source_list": [],
}
album_status_id = album_card.get("data-type")
if album_status_id.isdigit():
album_status_id = int(album_status_id)
album_type = ALBUM_TYPE_MAP[album_status_id]
album_kwargs["album_type"] = ALBUM_TYPE_MAP[album_status_id]
if album_status_id == 5:
album_status = AlbumStatus.BOOTLEG
album_kwargs["album_status"] = AlbumStatus.BOOTLEG
def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False):
nonlocal _id
nonlocal name
nonlocal source_list
nonlocal album_kwargs
if _anchor is None:
return
@@ -892,20 +959,13 @@ class Musify(Page):
href = _anchor.get("href")
if href is not None:
# add url to sources
source_list.append(Source(
album_kwargs["source_list"].append(Source(
self.SOURCE_TYPE,
self.HOST + href
))
# split id from url
split_href = href.split("-")
if len(split_href) > 1:
_id = split_href[-1]
if not text_is_name:
return
set_name(_anchor.text)
if text_is_name:
album_kwargs["title"] = clean_song_title(_anchor.text, artist_name)
anchor_list = album_card.find_all("a", recursive=False)
if len(anchor_list) > 0:
@@ -916,7 +976,7 @@ class Musify(Page):
if thumbnail is not None:
alt = thumbnail.get("alt")
if alt is not None:
set_name(alt)
album_kwargs["title"] = clean_song_title(alt, artist_name)
image_url = thumbnail.get("src")
else:
@@ -933,7 +993,7 @@ class Musify(Page):
13.11.2021
</small>
"""
nonlocal timestamp
nonlocal album_kwargs
italic_tagging_soup: BeautifulSoup = small_soup.find("i")
if italic_tagging_soup is None:
@@ -943,7 +1003,7 @@ class Musify(Page):
return
raw_time = small_soup.text.strip()
timestamp = ID3Timestamp.strptime(raw_time, "%d.%m.%Y")
album_kwargs["date"] = ID3Timestamp.strptime(raw_time, "%d.%m.%Y")
# parse small date
card_footer_list = album_card.find_all("div", {"class": "card-footer"})
@@ -956,112 +1016,18 @@ class Musify(Page):
else:
self.LOGGER.debug("there is not even 1 footer in the album card")
return Album(
title=name,
source_list=source_list,
date=timestamp,
album_type=album_type,
album_status=album_status,
)
return Album(**album_kwargs)
def _parse_album(self, soup: BeautifulSoup) -> Album:
name: str = None
source_list: List[Source] = []
artist_list: List[Artist] = []
date: ID3Timestamp = None
"""
if breadcrumb list has 4 elements, then
the -2 is the artist link,
the -1 is the album
"""
# breadcrumb
breadcrumb_soup: BeautifulSoup = soup.find("ol", {"class", "breadcrumb"})
breadcrumb_elements: List[BeautifulSoup] = breadcrumb_soup.find_all("li", {"class": "breadcrumb-item"})
if len(breadcrumb_elements) == 4:
# album
album_crumb: BeautifulSoup = breadcrumb_elements[-1]
name = album_crumb.text.strip()
# artist
artist_crumb: BeautifulSoup = breadcrumb_elements[-2]
anchor: BeautifulSoup = artist_crumb.find("a")
if anchor is not None:
href = anchor.get("href")
artist_source_list: List[Source] = []
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
else:
self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case")
# meta
meta_url: BeautifulSoup = soup.find("meta", {"itemprop": "url"})
if meta_url is not None:
url = meta_url.get("content")
if url is not None:
source_list.append(Source(self.SOURCE_TYPE, self.HOST + url))
meta_name: BeautifulSoup = soup.find("meta", {"itemprop": "name"})
if meta_name is not None:
_name = meta_name.get("content")
if _name is not None:
name = _name
# album info
album_info_ul: BeautifulSoup = soup.find("ul", {"class": "album-info"})
if album_info_ul is not None:
artist_anchor: BeautifulSoup
for artist_anchor in album_info_ul.find_all("a", {"itemprop": "byArtist"}):
# line 98
artist_source_list: List[Source] = []
artist_url_meta = artist_anchor.find("meta", {"itemprop": "url"})
if artist_url_meta is not None:
artist_href = artist_url_meta.get("content")
if artist_href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, url=self.HOST + artist_href))
artist_meta_name = artist_anchor.find("meta", {"itemprop": "name"})
if artist_meta_name is not None:
artist_name = artist_meta_name.get("content")
if artist_name is not None:
artist_list.append(Artist(
name=artist_name,
source_list=artist_source_list
))
time_soup: BeautifulSoup = album_info_ul.find("time", {"itemprop": "datePublished"})
if time_soup is not None:
raw_datetime = time_soup.get("datetime")
if raw_datetime is not None:
try:
date = ID3Timestamp.strptime(raw_datetime, "%Y-%m-%d")
except ValueError:
self.LOGGER.debug(f"Raw datetime doesn't match time format %Y-%m-%d: {raw_datetime}")
return Album(
title=name,
source_list=source_list,
artist_list=artist_list,
date=date
)
def _get_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]:
def _fetch_artist_discography(self, artist: Artist, url: MusifyUrl, artist_name: str = None, **kwargs):
"""
POST https://musify.club/artist/filteralbums
ArtistID: 280348
SortOrder.Property: dateCreated
SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
ArtistID: 280348
SortOrder.Property: dateCreated
SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
"""
_download_all = kwargs.get("download_all", False)
_album_type_blacklist = kwargs.get("album_type_blacklist", main_settings["album_type_blacklist"])
endpoint = self.HOST + "/" + url.source_type.value + "/filteralbums"
@@ -1072,36 +1038,29 @@ class Musify(Page):
"X-Requested-With": "XMLHttpRequest"
}, name="discography_" + url.name_with_id)
if r is None:
return []
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
return
soup: BeautifulSoup = self.get_soup_from_response(r)
for card_soup in soup.find_all("div", {"class": "card"}):
album = self._parse_album_card(card_soup, artist_name)
if album.album_type is AlbumType.COMPILATION_ALBUM or album.album_type is AlbumType.MIXTAPE:
album = self._parse_album_card(card_soup, artist_name, **kwargs)
if not self.fetch_options.download_all and album.album_type in self.fetch_options.album_type_blacklist:
continue
artist.main_album_collection.append(album)
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
def fetch_artist(self, source: Source, **kwargs) -> Artist:
"""
fetches artist from source
TODO
[x] discography
[x] attributes
[] picture gallery
Args:
source (Source): the source to fetch
stop_at_level: int = 1: if it is false, every album from discograohy will be fetched. Defaults to False.
Returns:
Artist: the artist fetched
"""
url = parse_url(source.url)
artist = self._get_artist_attributes(url)
self._get_discography(artist, url, artist.name)
artist = self._fetch_initial_artist(url, source=source, **kwargs)
self._fetch_artist_discography(artist, url, artist.name, **kwargs)
return artist

View File

@@ -42,7 +42,7 @@ class YouTube(SuperYouTube):
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
NO_ADDITIONAL_DATA_FROM_SONG = True
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(

View File

@@ -12,7 +12,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"):
__stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True
DEBUG = (__stage__ == "dev") and False
DEBUG_LOGGING = DEBUG and False
DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False

View File

@@ -116,10 +116,13 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) ->
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
possible_new_name = raw_song_title[len(artist_name):].strip()
for char in ("-", "", ":", "|"):
if possible_new_name.startswith(char):
raw_song_title = possible_new_name[1:].strip()
break
return raw_song_title.strip()

View File

@@ -69,7 +69,7 @@ dependencies = [
"toml~=0.10.2",
"typing_extensions~=4.7.1",
"python-sponsorblock~=0.0.0",
"python-sponsorblock~=0.0.dev1",
"youtube_dl",
]
dynamic = [