26 Commits

Author SHA1 Message Date
0e6fe8187a feat: fetch_from_url
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 18:09:11 +02:00
0343c11a62 feat: migrated fetch details and from source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 18:03:20 +02:00
9769cf4033 Merge pull request 'fix/caching_signatures' (#32) from fix/caching_signatures into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #32
2024-05-13 15:15:37 +00:00
55024bd987 fix: key error
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-13 17:15:15 +02:00
d85498869d feat: tracksort and albumsort + some other stuff
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 14:22:33 +02:00
c3350b016d fix: timeout for yt music stream
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 13:39:57 +02:00
788103a68e fix: removed invalid stuff
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 13:28:54 +02:00
5179c64161 Merge branch 'experimental' of ssh://gitea.elara.ws:2222/music-kraken/music-kraken-core into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:53:39 +02:00
04405f88eb Merge branch 'fix/musify_scrapes_year_as_artist' into experimental 2024-05-10 17:52:11 +02:00
acd183c90e fix: bandcamp
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-10 17:39:30 +02:00
7186f06ce6 feat: improved interface
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:33:07 +02:00
6e354af0d1 feat: added proper settings
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:06:40 +02:00
155f239c8a feat: changed ids for audio tempfiles to random id instead of increment id
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:32:14 +02:00
36db651dfa fix: cleaning the song name deleted the song if the song name was the same as the artist name
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:25:11 +02:00
8426f6e2ea fix: filtered another year
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 15:20:22 +02:00
75d0a83d14 fix: changed dependency
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-09 10:57:55 +02:00
Hellow
2af577c0cd fix: removed empty objects
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-08 21:06:40 +02:00
Hellow
3780f05e58 feat: added launch.json 2024-05-08 16:48:27 +02:00
Hellow
a0305a7a6e fix: don't add year as artist 2024-05-08 16:47:56 +02:00
949583225a Merge pull request 'Correct duplicate values' (#22) from issue16 into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #22
2024-05-08 12:33:34 +00:00
4e0b005170 Merge branch 'experimental' into issue16
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-08 12:33:56 +02:00
e3d7ed8837 Merge pull request 'fix/musify_artist_spam' (#27) from fix/musify_artist_spam into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #27
2024-05-08 10:31:23 +00:00
e3e7aea959 fix for lyrics
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
2024-05-08 12:27:56 +02:00
12c0bf6b83 Merge pull request 'ci: make tags release to the music-kraken pypi package instead of music-kraken-stable' (#24) from ci/remove-stable-package into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #24
2024-05-07 21:17:11 +00:00
ac9a74138c ci: make tags release to the music-kraken pypi package instead of music-kraken-stable
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-07 16:07:45 +00:00
709c5ebaa8 Correct duplicate values
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
2024-05-07 12:34:24 +02:00
24 changed files with 266 additions and 217 deletions

22
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,22 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Python Debugger: Download script",
"type": "debugpy",
"request": "launch",
"program": "development/actual_donwload.py",
"console": "integratedTerminal"
}
]
}

View File

@@ -19,14 +19,17 @@
"albumsort",
"APIC",
"Bandcamp",
"bitrate",
"dotenv",
"encyclopaedia",
"ENDC",
"Gitea",
"isrc",
"levenshtein",
"metallum",
"musify",
"OKBLUE",
"OKGREEN",
"pathvalidate",
"Referer",
"sponsorblock",

View File

@@ -11,7 +11,6 @@ steps:
build-stable:
image: python
commands:
- sed -i 's/name = "music-kraken"/name = "music-kraken-stable"/' pyproject.toml
- python -m pip install -r requirements-dev.txt
- python3 -m build
environment:

View File

@@ -7,8 +7,7 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Crystal F",
"10",
"2",
"d: 20",
]

View File

@@ -47,7 +47,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au
# run the ffmpeg command with a progressbar
ff = FfmpegProgress(ffmpeg_command)
with tqdm(total=100, desc=f"removing {len(interval_list)} segments") as pbar:
with tqdm(total=100, desc=f"processing") as pbar:
for progress in ff.run_command_with_progress():
pbar.update(progress-pbar.n)

View File

@@ -1,5 +1,5 @@
import mutagen
from mutagen.id3 import ID3, Frame, APIC
from mutagen.id3 import ID3, Frame, APIC, USLT
from pathlib import Path
from typing import List
import logging
@@ -7,6 +7,7 @@ from PIL import Image
from ..utils.config import logging_settings, main_settings
from ..objects import Song, Target, Metadata
from ..objects.metadata import Mapping
from ..connection import Connection
LOGGER = logging_settings["tagging_logger"]
@@ -105,8 +106,11 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song):
data=converted_target.read_bytes(),
)
)
mutagen_file = mutagen.File(target.file_path)
id3_object.frames.delall("USLT")
uslt_val = metadata.get_id3_value(Mapping.UNSYNCED_LYRICS)
id3_object.frames.add(
USLT(encoding=3, lang=u'eng', desc=u'desc', text=uslt_val)
)
id3_object.add_metadata(metadata)
id3_object.save()

View File

@@ -166,9 +166,9 @@ class Downloader:
self.genre = genre or get_genre()
self.process_metadata_anyway = process_metadata_anyway
print()
print(f"Downloading to: \"{self.genre}\"")
print()
output()
output(f"Downloading to: \"{self.genre}\"", color=BColors.HEADER)
output()
def print_current_options(self):
self.page_dict = dict()
@@ -312,10 +312,8 @@ class Downloader:
def download(self, data_objects: List[DatabaseObject], **kwargs) -> bool:
output()
if len(data_objects) == 1:
output(f"Downloading {data_objects[0].option_string}...", color=BColors.BOLD)
else:
output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n")
if len(data_objects) > 1:
output(f"Downloading {len(data_objects)} objects...", *("- " + o.option_string for o in data_objects), color=BColors.BOLD, sep="\n")
_result_map: Dict[DatabaseObject, DownloadResult] = dict()

View File

@@ -1,7 +1,7 @@
from typing import Tuple, Type, Dict, Set
from typing import Tuple, Type, Dict, Set, Optional
from .results import SearchResults
from ..objects import DatabaseObject, Source
from ..objects import DatabaseObject as DataObject, Source
from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages
@@ -34,6 +34,13 @@ SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
fetch_map = {
Song: "fetch_song",
Album: "fetch_album",
Artist: "fetch_artist",
Label: "fetch_label",
}
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
@@ -68,7 +75,12 @@ class Pages:
for page_type in self.pages:
self._page_instances[page_type] = page_type()
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourcePages) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
def search(self, query: Query) -> SearchResults:
result = SearchResults()
@@ -80,22 +92,42 @@ class Pages:
return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return music_object
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object
for source_page in music_object.source_collection.source_pages:
if source_page not in self._source_to_page:
continue
source: Source
for source in data_object.source_collection.get_sources():
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
if new_data_object is not None:
data_object.merge(new_data_object)
page_type = self._source_to_page[source_page]
if page_type in self._pages_set:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
return data_object
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
page: Page = self._get_page_from_enum(source.page_enum)
if page is None:
return None
return music_object
# getting the appropriate function for the page and the object type
source_type = page.get_source_type(source)
if not hasattr(page, fetch_map[source_type]):
return None
func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
# fetching the data object and marking it as fetched
data_object: DataObject = func(source=source)
data_object.mark_as_fetched(source.hash_url)
return data_object
def is_downloadable(self, music_object: DatabaseObject) -> bool:
def fetch_from_url(self, url: str) -> Optional[DataObject]:
source = Source.match_url(url, SourcePages.MANUAL)
if source is None:
return None
return self.fetch_from_source(source=source)
def is_downloadable(self, music_object: DataObject) -> bool:
_page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages:
if src in self._source_to_page:
@@ -104,7 +136,7 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types)
return len(audio_pages) > 0
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
def download(self, music_object: DataObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
@@ -118,11 +150,11 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types)
for download_page in audio_pages:
return self._page_instances[download_page].download(music_object=music_object, genre=genre, download_all=download_all, process_metadata_anyway=process_metadata_anyway)
return self._page_instances[download_page].download(music_object=music_object, genre=genre)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
source = Source.match_url(url, SourcePages.MANUAL)
if source is None:

View File

@@ -24,4 +24,4 @@ from .parents import OuterProxy
from .artwork import Artwork
DatabaseObject = TypeVar('T', bound=OuterProxy)
DatabaseObject = OuterProxy

View File

@@ -153,6 +153,8 @@ class Collection(Generic[T]):
if other is None:
return
if not other._inner._has_data:
return
if other.id in self._indexed_from_id:
return

View File

@@ -32,7 +32,7 @@ class FormattedText:
if self.is_empty and other.is_empty:
return True
return self.doc == other.doc
return self.html == other.html
@property
def markdown(self) -> str:

View File

@@ -92,7 +92,7 @@ class Mapping(Enum):
key = attribute.value
if key[0] == 'T':
# a text fiel
# a text field
return cls.get_text_instance(key, value)
if key[0] == "W":
# an url field
@@ -355,7 +355,12 @@ class Metadata:
return None
list_data = self.id3_dict[field]
#correct duplications
correct_list_data = list()
for data in list_data:
if data not in correct_list_data:
correct_list_data.append(data)
list_data = correct_list_data
# convert for example the time objects to timestamps
for i, element in enumerate(list_data):
# for performances sake I don't do other checks if it is already the right type
@@ -368,7 +373,7 @@ class Metadata:
if type(element) == ID3Timestamp:
list_data[i] = element.timestamp
continue
"""
Version 2.4 of the specification prescribes that all text fields (the fields that start with a T, except for TXXX) can contain multiple values separated by a null character.
Thus if above conditions are met, I concatenate the list,
@@ -376,7 +381,7 @@ class Metadata:
"""
if field.value[0].upper() == "T" and field.value.upper() != "TXXX":
return self.NULL_BYTE.join(list_data)
return list_data[0]
def get_mutagen_object(self, field):
@@ -395,6 +400,5 @@ class Metadata:
"""
# set the tagging timestamp to the current time
self.__setitem__(Mapping.TAGGING_TIME, [ID3Timestamp.now()])
for field in self.id3_dict:
yield self.get_mutagen_object(field)

View File

@@ -30,6 +30,8 @@ class InnerData:
_refers_to_instances: set = None
_is_in_collection: set = None
_has_data: bool = False
"""
Attribute versions keep track, of if the attribute has been changed.
"""
@@ -48,9 +50,19 @@ class InnerData:
for key, value in kwargs.items():
if hasattr(value, "__is_collection__"):
value._collection_for[self] = key
self.__setattr__(key, value)
if self._has_data:
continue
def __setattr__(self, key: str, value):
if self._has_data or not hasattr(self, "_default_values"):
return super().__setattr__(key, value)
super().__setattr__("_has_data", not (key in self._default_values and self._default_values[key] == value))
return super().__setattr__(key, value)
def __hash__(self):
return self.id
@@ -87,7 +99,9 @@ class OuterProxy:
Wraps the inner data, and provides apis, to naturally access those values.
"""
_default_factories: dict = {}
source_collection: SourceCollection
_default_factories: dict = {"source_collection": SourceCollection}
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
@@ -289,6 +303,33 @@ class OuterProxy:
return r
@property
def root_collections(self) -> List[Collection]:
if len(self.UPWARDS_COLLECTION_STRING_ATTRIBUTES) == 0:
return [self]
r = []
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
r.extend(self.__getattribute__(collection_string_attribute))
return r
def _compile(self, **kwargs):
pass
def compile(self, from_root=False, **kwargs):
# compile from the root
if not from_root:
for c in self.root_collections:
c.compile(from_root=True, **kwargs)
return
self._compile(**kwargs)
for c_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
for c in self.__getattribute__(c_attribute):
c.compile(from_root=True, **kwargs)
TITEL = "id"
@property
def title_string(self) -> str:

View File

@@ -111,7 +111,7 @@ class Song(Base):
"album_collection": Collection,
"feature_artist_collection": Collection,
"title": lambda: "",
"title": lambda: None,
"unified_title": lambda: None,
"isrc": lambda: None,
"genre": lambda: None,
@@ -376,6 +376,25 @@ class Album(Base):
r += f" with {len(self.song_collection)} songs"
return r
def _compile(self):
self.analyze_implied_album_type()
self.update_tracksort()
def analyze_implied_album_type(self):
# if the song collection has only one song, it is reasonable to assume that it is a single
if len(self.song_collection) == 1:
self.album_type = AlbumType.SINGLE
return
# if the album already has an album type, we don't need to do anything
if self.album_type is not AlbumType.OTHER:
return
# for information on EP's I looked at https://www.reddit.com/r/WeAreTheMusicMakers/comments/a354ql/whats_the_cutoff_length_between_ep_and_album/
if len(self.song_collection) < 9:
self.album_type = AlbumType.EP
return
def update_tracksort(self):
"""
This updates the tracksort attributes, of the songs in
@@ -525,6 +544,9 @@ class Artist(Base):
self.label_collection.extend(object_list)
return
def _compile(self):
self.update_albumsort()
def update_albumsort(self):
"""
This updates the albumsort attributes, of the albums in
@@ -535,9 +557,6 @@ class Artist(Base):
:return:
"""
if len(self.main_album_collection) <= 0:
return
type_section: Dict[AlbumType, int] = defaultdict(lambda: 2, {
AlbumType.OTHER: 0, # if I don't know it, I add it to the first section
AlbumType.STUDIO_ALBUM: 0,
@@ -580,7 +599,7 @@ class Artist(Base):
album_list.extend(sections[section_index])
# replace the old collection with the new one
self.main_album_collection: Collection = Collection(data=album_list, element_type=Album)
self.main_album_collection._data = album_list
INDEX_DEPENDS_ON = ("name", "source_collection", "contact_collection")
@property

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from pathlib import Path
from typing import List, Tuple, TextIO, Union
from typing import List, Tuple, TextIO, Union, Optional
import logging
import random
import requests
@@ -31,7 +31,10 @@ class Target(OuterProxy):
}
@classmethod
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID))) -> P:
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P:
if file_extension is not None:
name = f"{name}.{file_extension}"
return cls(main_settings["temp_directory"] / name)
# This is automatically generated

View File

@@ -3,8 +3,9 @@ import random
import re
from copy import copy
from pathlib import Path
from typing import Optional, Union, Type, Dict, Set, List, Tuple
from typing import Optional, Union, Type, Dict, Set, List, Tuple, TypedDict
from string import Formatter
from dataclasses import dataclass, field
import requests
from bs4 import BeautifulSoup
@@ -28,11 +29,23 @@ from ..utils.config import main_settings
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import fit_to_file_system
from ..utils import trace
from ..utils import trace, output, BColors
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class NamingDict(dict):
CUSTOM_KEYS: Dict[str, str] = {
@@ -101,6 +114,10 @@ class Page:
# set this to true, if all song details can also be fetched by fetching album details
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
@@ -172,106 +189,7 @@ class Page:
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_details(
self,
music_object: DatabaseObject,
stop_at_level: int = 1,
post_process: bool = True
) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in, an album, it fetches the tracklist
:param music_object:
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
# creating a new object, of the same type
new_music_object: Optional[DatabaseObject] = None
fetched_from_url: List[str] = []
# only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
if music_object.already_fetched_from(source.hash_url):
continue
tmp = self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
post_process=False,
type_string=type(music_object).__name__,
entity_string=music_object.option_string,
)
if new_music_object is None:
new_music_object = tmp
else:
new_music_object.merge(tmp)
fetched_from_url.append(source.hash_url)
if new_music_object is not None:
music_object.merge(new_music_object)
music_object.mark_as_fetched(*fetched_from_url)
return music_object
def fetch_object_from_source(
self,
source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
post_process: bool = True,
type_string: str = "",
entity_string: str = "",
) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source)
if obj_type is None:
return None
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
if stop_at_level > 0:
trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
collection: Collection
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection = music_object.__getattribute__(collection_str)
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1, post_process=False))
return music_object
# to fetch stuff
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
@@ -288,8 +206,6 @@ class Page:
self,
music_object: DatabaseObject,
genre: str,
download_all: bool = False,
process_metadata_anyway: bool = True
) -> DownloadResult:
naming_dict: NamingDict = NamingDict({"genre": genre})
@@ -308,25 +224,22 @@ class Page:
fill_naming_objects(music_object)
return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway)
return self._download(music_object, naming_dict)
def _download(
self,
music_object: DatabaseObject,
naming_dict: NamingDict,
download_all: bool = False,
skip_details: bool = False,
process_metadata_anyway: bool = True
**kwargs
) -> DownloadResult:
trace(f"downloading {type(music_object).__name__} [{music_object.option_string}]")
skip_next_details = skip_details
if isinstance(music_object, Song):
output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
else:
output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if self.NO_ADDITIONAL_DATA_FROM_SONG:
skip_next_details = True
if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]:
if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
return DownloadResult()
if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
@@ -338,7 +251,7 @@ class Page:
naming_dict.add_object(music_object)
if isinstance(music_object, Song):
return self._download_song(music_object, naming_dict, process_metadata_anyway=process_metadata_anyway)
return self._download_song(music_object, naming_dict)
download_result: DownloadResult = DownloadResult()
@@ -347,13 +260,12 @@ class Page:
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all,
skip_details=skip_next_details,
process_metadata_anyway=process_metadata_anyway))
download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict, process_metadata_anyway: bool = True):
def _download_song(self, song: Song, naming_dict: NamingDict):
song.compile()
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
@@ -373,40 +285,33 @@ class Page:
if song.target_collection.empty:
song.target_collection.append(new_target)
r = DownloadResult(1)
temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
found_on_disc = False
target: Target
for target in song.target_collection:
current_exists = target.exists
if current_exists:
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
else:
output(f'- {target.file_path}', color=BColors.GREY)
if not song.source_collection.has_source_page(self.SOURCE_TYPE):
return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.")
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
temp_target: Target = Target(
relative_to_music_dir=False,
file_path=Path(
main_settings["temp_directory"],
str(song.id)
)
)
r = DownloadResult(1)
found_on_disc = False
target: Target
for target in song.target_collection:
if target.exists:
if process_metadata_anyway:
target.copy_content(temp_target)
found_on_disc = True
r.found_on_disk += 1
r.add_target(target)
if found_on_disc and not process_metadata_anyway:
self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.")
return r
skip_intervals = []
if not found_on_disc:
for source in sources:
r = self.download_song_to_target(source=source, target=temp_target, desc=song.option_string)
r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
if not r.is_fatal_error:
skip_intervals = self.get_skip_intervals(song, source)
@@ -417,16 +322,19 @@ class Page:
song=song,
temp_target=temp_target,
interval_list=skip_intervals,
found_on_disc=found_on_disc,
))
return r
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult:
correct_codec(temp_target, interval_list=interval_list)
def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult:
if not found_on_disc or self.download_options.process_audio_if_found:
correct_codec(temp_target, interval_list=interval_list)
self.post_process_hook(song, temp_target)
write_metadata_to_target(song.metadata, temp_target, song)
if not found_on_disc or self.download_options.process_metadata_if_found:
write_metadata_to_target(song.metadata, temp_target, song)
r = DownloadResult()

View File

@@ -22,6 +22,7 @@ from ..objects import (
Artwork,
)
from ..connection import Connection
from ..utils import dump_to_file
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings

View File

@@ -418,6 +418,10 @@ class Musify(Page):
href = artist_soup["href"]
if href is not None:
href_parts = href.split("/")
if len(href_parts) <= 1 or href_parts[-2] != "artist":
return
artist_src_list.append(Source(self.SOURCE_TYPE, self.HOST + href))
name_elem: BeautifulSoup = artist_soup.find("span", {"itemprop": "name"})
@@ -681,17 +685,20 @@ class Musify(Page):
anchor: BeautifulSoup = artist_crumb.find("a")
if anchor is not None:
href = anchor.get("href")
artist_source_list: List[Source] = []
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
href_parts = href.split("/")
if not(len(href_parts) <= 1 or href_parts[-2] != "artist"):
artist_source_list: List[Source] = []
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
if href is not None:
artist_source_list.append(Source(self.SOURCE_TYPE, self.HOST + href.strip()))
span: BeautifulSoup = anchor.find("span")
if span is not None:
artist_list.append(Artist(
name=span.get_text(strip=True),
source_list=artist_source_list
))
else:
self.LOGGER.debug("there are not 4 breadcrumb items, which shouldn't be the case")
@@ -938,10 +945,10 @@ class Musify(Page):
album_status_id = album_card.get("data-type")
if album_status_id.isdigit():
album_status_id = int(album_status_id)
album_type = ALBUM_TYPE_MAP[album_status_id]
album_kwargs["album_type"] = ALBUM_TYPE_MAP[album_status_id]
if album_status_id == 5:
album_status = AlbumStatus.BOOTLEG
album_kwargs["album_status"] = AlbumStatus.BOOTLEG
def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False):
nonlocal album_kwargs
@@ -1037,7 +1044,7 @@ class Musify(Page):
for card_soup in soup.find_all("div", {"class": "card"}):
album = self._parse_album_card(card_soup, artist_name, **kwargs)
if album.album_type in _album_type_blacklist:
if not self.fetch_options.download_all and album.album_type in self.fetch_options.album_type_blacklist:
continue
artist.main_album_collection.append(album)

View File

@@ -42,7 +42,7 @@ class YouTube(SuperYouTube):
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
NO_ADDITIONAL_DATA_FROM_SONG = True
NO_ADDITIONAL_DATA_FROM_SONG = False
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(

View File

@@ -145,6 +145,8 @@ class SuperYouTube(Page):
_sponsorblock_connection: Connection = Connection()
self.sponsorblock = python_sponsorblock.SponsorBlock(silent=True, session=_sponsorblock_connection.session)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
_url_type = {
YouTubeUrlType.CHANNEL: Artist,

View File

@@ -193,8 +193,7 @@ class YoutubeMusic(SuperYouTube):
self.start_millis = get_current_millis()
if self.credentials.api_key == "" or DEBUG_YOUTUBE_INITIALIZING:
self._fetch_from_main_page()
self._fetch_from_main_page()
SuperYouTube.__init__(self, *args, **kwargs)
@@ -215,6 +214,8 @@ class YoutubeMusic(SuperYouTube):
self.download_values_by_url: dict = {}
self.not_download: Dict[str, DownloadError] = {}
super().__init__(*args, **kwargs)
def _fetch_from_main_page(self):
"""
===API=KEY===
@@ -736,8 +737,9 @@ class YoutubeMusic(SuperYouTube):
raw_headers=True,
disable_cache=True,
headers=media.get("headers", {}),
# chunk_size=media.get("chunk_size", main_settings["chunk_size"]),
chunk_size=main_settings["chunk_size"],
method="GET",
timeout=5,
)
else:
result = DownloadResult(error_message=str(media.get("error") or self.not_download[source.hash_url]))

View File

@@ -12,7 +12,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"):
__stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True
DEBUG = (__stage__ == "dev") and False
DEBUG_LOGGING = DEBUG and False
DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False

View File

@@ -116,10 +116,13 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) ->
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
possible_new_name = raw_song_title[len(artist_name):].strip()
for char in ("-", "", ":", "|"):
if possible_new_name.startswith(char):
raw_song_title = possible_new_name[1:].strip()
break
return raw_song_title.strip()

View File

@@ -69,7 +69,7 @@ dependencies = [
"toml~=0.10.2",
"typing_extensions~=4.7.1",
"python-sponsorblock~=0.0.0",
"python-sponsorblock~=0.1",
"youtube_dl",
]
dynamic = [