Compare commits

...

12 Commits

Author SHA1 Message Date
5179c64161 Merge branch 'experimental' of ssh://gitea.elara.ws:2222/music-kraken/music-kraken-core into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:53:39 +02:00
04405f88eb Merge branch 'fix/musify_scrapes_year_as_artist' into experimental 2024-05-10 17:52:11 +02:00
acd183c90e fix: bandcamp
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-10 17:39:30 +02:00
7186f06ce6 feat: improved interface
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:33:07 +02:00
6e354af0d1 feat: added proper settings
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:06:40 +02:00
155f239c8a feat: changed ids for audio tempfiles to random id instead of increment id
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:32:14 +02:00
36db651dfa fix: cleaning the song name deleted the song if the song name was the same as the artist name
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:25:11 +02:00
8426f6e2ea fix: filtered another year
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:20:22 +02:00
75d0a83d14 fix: changed dependency
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-09 10:57:55 +02:00
Hellow
2af577c0cd fix: removed empty objects
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-08 21:06:40 +02:00
Hellow
3780f05e58 feat: added launch.json 2024-05-08 16:48:27 +02:00
Hellow
a0305a7a6e fix: don't add year as artist 2024-05-08 16:47:56 +02:00
15 changed files with 132 additions and 83 deletions

22
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,22 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Python Debugger: Download script",
"type": "debugpy",
"request": "launch",
"program": "development/actual_donwload.py",
"console": "integratedTerminal"
}
]
}

View File

@ -19,6 +19,7 @@
"albumsort",
"APIC",
"Bandcamp",
"bitrate",
"dotenv",
"encyclopaedia",
"ENDC",
@ -27,6 +28,7 @@
"metallum",
"musify",
"OKBLUE",
"OKGREEN",
"pathvalidate",
"Referer",
"sponsorblock",

View File

@ -6,9 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Crystal F",
"10",
"2",
"s: #a Psychonaut 4",
"d: 0",
]

View File

@ -47,7 +47,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au
# run the ffmpeg command with a progressbar
ff = FfmpegProgress(ffmpeg_command)
with tqdm(total=100, desc=f"removing {len(interval_list)} segments") as pbar:
with tqdm(total=100, desc=f"processing") as pbar:
for progress in ff.run_command_with_progress():
pbar.update(progress-pbar.n)

View File

@ -166,9 +166,9 @@ class Downloader:
self.genre = genre or get_genre()
self.process_metadata_anyway = process_metadata_anyway
print()
print(f"Downloading to: \"{self.genre}\"")
print()
output()
output(f"Downloading to: \"{self.genre}\"", color=BColors.HEADER)
output()
def print_current_options(self):
self.page_dict = dict()
@ -312,10 +312,8 @@ class Downloader:
def download(self, data_objects: List[DatabaseObject], **kwargs) -> bool:
output()
if len(data_objects) == 1:
output(f"Downloading {data_objects[0].option_string}...", color=BColors.BOLD)
else:
output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n")
if len(data_objects) > 1:
output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n")
_result_map: Dict[DatabaseObject, DownloadResult] = dict()

View File

@ -118,7 +118,7 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types)
for download_page in audio_pages:
return self._page_instances[download_page].download(music_object=music_object, genre=genre, download_all=download_all, process_metadata_anyway=process_metadata_anyway)
return self._page_instances[download_page].download(music_object=music_object, genre=genre)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")

View File

@ -153,6 +153,8 @@ class Collection(Generic[T]):
if other is None:
return
if not other._inner._has_data:
return
if other.id in self._indexed_from_id:
return

View File

@ -30,6 +30,8 @@ class InnerData:
_refers_to_instances: set = None
_is_in_collection: set = None
_has_data: bool = False
"""
Attribute versions keep track, of if the attribute has been changed.
"""
@ -48,9 +50,19 @@ class InnerData:
for key, value in kwargs.items():
if hasattr(value, "__is_collection__"):
value._collection_for[self] = key
self.__setattr__(key, value)
if self._has_data:
continue
def __setattr__(self, key: str, value):
if self._has_data or not hasattr(self, "_default_values"):
return super().__setattr__(key, value)
super().__setattr__("_has_data", not (key in self._default_values and self._default_values[key] == value))
return super().__setattr__(key, value)
def __hash__(self):
return self.id

View File

@ -111,7 +111,7 @@ class Song(Base):
"album_collection": Collection,
"feature_artist_collection": Collection,
"title": lambda: "",
"title": lambda: None,
"unified_title": lambda: None,
"isrc": lambda: None,
"genre": lambda: None,

View File

@ -3,8 +3,9 @@ import random
import re
from copy import copy
from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict
from string import Formatter
from dataclasses import dataclass, field
import requests
from bs4 import BeautifulSoup
@ -28,11 +29,23 @@ from ..utils.config import main_settings
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import fit_to_file_system
from ..utils import trace
from ..utils import trace, output, BColors
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class NamingDict(dict):
CUSTOM_KEYS: Dict[str, str] = {
@ -101,6 +114,10 @@ class Page:
# set this to true, if all song details can also be fetched by fetching album details
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
@ -176,7 +193,6 @@ class Page:
self,
music_object: DatabaseObject,
stop_at_level: int = 1,
post_process: bool = True
) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
@ -208,7 +224,6 @@ class Page:
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
post_process=False,
type_string=type(music_object).__name__,
entity_string=music_object.option_string,
)
@ -230,7 +245,6 @@ class Page:
source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
post_process: bool = True,
type_string: str = "",
entity_string: str = "",
) -> Optional[DatabaseObject]:
@ -268,7 +282,7 @@ class Page:
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1, post_process=False))
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
return music_object
@ -288,8 +302,6 @@ class Page:
self,
music_object: DatabaseObject,
genre: str,
download_all: bool = False,
process_metadata_anyway: bool = True
) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
@ -308,25 +320,22 @@ class Page:
fill_naming_objects(music_object)
return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway)
return self._download(music_object, naming_dict)
def _download(
self,
music_object: DatabaseObject,
naming_dict: NamingDict,
download_all: bool = False,
skip_details: bool = False,
process_metadata_anyway: bool = True
**kwargs
) -> DownloadResult:
trace(f"downloading {type(music_object).__name__} [{music_object.option_string}]")
skip_next_details = skip_details
if isinstance(music_object, Song):
output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
else:
output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if self.NO_ADDITIONAL_DATA_FROM_SONG:
skip_next_details = True
if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]:
if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
return DownloadResult()
if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
@ -338,7 +347,7 @@ class Page:
naming_dict.add_object(music_object)
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict, process_metadata_anyway=process_metadata_anyway)
return self._download_song(music_object, naming_dict)
download_result: DownloadResult = DownloadResult()
@ -347,13 +356,11 @@ class Page:
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all,
skip_details=skip_next_details,
process_metadata_anyway=process_metadata_anyway))
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict, process_metadata_anyway: bool = True):
def _download_song(self, song: Song, naming_dict: NamingDict):
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
@ -373,40 +380,33 @@ class Page:
if song.target_collection.empty:
song.target_collection.append(new_target)
r = DownloadResult(1)
temp_target: Target = Target.temp()
found_on_disc = False
target: Target
for target in song.target_collection:
current_exists = target.exists
if current_exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
else:
output(f'- {target.file_path}', color=BColors.GREY)
if not song.source_collection.has_source_page(self.SOURCE_TYPE):
return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.")
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
temp_target: Target = Target(
relative_to_music_dir=False,
file_path=Path(
main_settings["temp_directory"],
str(song.id)
)
)
r = DownloadResult(1)
found_on_disc = False
target: Target
for target in song.target_collection:
if target.exists:
if process_metadata_anyway:
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
if found_on_disc and not process_metadata_anyway:
self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.")
return r
skip_intervals = []
if not found_on_disc:
for source in sources:
r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string)
r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
if not r.is_fatal_error:
skip_intervals = self.get_skip_intervals(song, source)
@ -417,16 +417,19 @@ class Page:
song=song,
temp_target=temp_target,
interval_list=skip_intervals,
found_on_disc=found_on_disc,
))
return r
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult:
correct_codec(temp_target, interval_list=interval_list)
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult:
if not found_on_disc or self.download_options.process_audio_if_found:
correct_codec(temp_target, interval_list=interval_list)
self.post_process_hook(song, temp_target)
write_metadata_to_target(song.metadata, temp_target, song)
if not found_on_disc or self.download_options.process_metadata_if_found:
write_metadata_to_target(song.metadata, temp_target, song)
r = DownloadResult()

View File

@ -22,6 +22,7 @@ from ..objects import (
Artwork,
)
from ..connection import Connection
from ..utils import dump_to_file
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings

View File

@ -418,6 +418,10 @@ class Musify(Page):
href = artist_soup["href"]
if href is not None:
href_parts = href.split("/")
if len(href_parts) <= 1 or href_parts[-2] != "artist":
return
artist_src_list.append(Source(self.SOURCE_TYPE, self.HOST + href))
name_elem: BeautifulSoup = artist_soup.find("span", {"itemprop": "name"})
@ -681,17 +685,20 @@ class Musify(Page):
anchor: BeautifulSoup = artist_crumb.find("a")
if anchor is not None:
href = anchor.get("href")
artist_source_list: List[Source] = []
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
href_parts = href.split("/")
if not(len(href_parts) <= 1 or href_parts[-2] != "artist"):
artist_source_list: List[Source] = []
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
else:
self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case")
@ -938,10 +945,10 @@ class Musify(Page):
album_status_id = album_card.get("data-type")
if album_status_id.isdigit():
album_status_id = int(album_status_id)
album_type = ALBUM_TYPE_MAP[album_status_id]
album_kwargs["album_type"] = ALBUM_TYPE_MAP[album_status_id]
if album_status_id == 5:
album_status = AlbumStatus.BOOTLEG
album_kwargs["album_status"] = AlbumStatus.BOOTLEG
def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False):
nonlocal album_kwargs
@ -1037,7 +1044,7 @@ class Musify(Page):
for card_soup in soup.find_all("div", {"class": "card"}):
album = self._parse_album_card(card_soup, artist_name, **kwargs)
if album.album_type in _album_type_blacklist:
if not self.fetch_options.download_all and album.album_type in self.fetch_options.album_type_blacklist:
continue
artist.main_album_collection.append(album)

View File

@ -42,7 +42,7 @@ class YouTube(SuperYouTube):
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
NO_ADDITIONAL_DATA_FROM_SONG = True
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(

View File

@ -12,7 +12,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"):
__stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True
DEBUG = (__stage__ == "dev") and False
DEBUG_LOGGING = DEBUG and False
DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False

View File

@ -116,10 +116,13 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) ->
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
possible_new_name = raw_song_title[len(artist_name):].strip()
for char in ("-", "", ":", "|"):
if possible_new_name.startswith(char):
raw_song_title = possible_new_name[1:].strip()
break
return raw_song_title.strip()