17 Commits

Author SHA1 Message Date
da8887b279 draft: rewriting soure
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-14 15:18:17 +02:00
Hellow
bb32fc7647 draft: rewriting downloading
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-14 00:28:05 +02:00
Hellow
8c369d79e4 draft: rewriting downloading
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 21:51:32 +02:00
Hellow
b09d6f2691 draft: rewriting downloading 2024-05-13 21:45:12 +02:00
0e6fe8187a feat: fetch_from_url
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 18:09:11 +02:00
0343c11a62 feat: migrated fetch details and from source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 18:03:20 +02:00
9769cf4033 Merge pull request 'fix/caching_signatures' (#32) from fix/caching_signatures into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #32
2024-05-13 15:15:37 +00:00
55024bd987 fix: key error
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-13 17:15:15 +02:00
d85498869d feat: tracksort and albumsort + some other stuff
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 14:22:33 +02:00
c3350b016d fix: timeout for yt music stream
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 13:39:57 +02:00
788103a68e fix: removed invalid stuff
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-13 13:28:54 +02:00
5179c64161 Merge branch 'experimental' of ssh://gitea.elara.ws:2222/music-kraken/music-kraken-core into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-10 17:53:39 +02:00
04405f88eb Merge branch 'fix/musify_scrapes_year_as_artist' into experimental 2024-05-10 17:52:11 +02:00
949583225a Merge pull request 'Correct duplicate values' (#22) from issue16 into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #22
2024-05-08 12:33:34 +00:00
4e0b005170 Merge branch 'experimental' into issue16
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful
2024-05-08 12:33:56 +02:00
e3e7aea959 fix for lyrics
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
2024-05-08 12:27:56 +02:00
709c5ebaa8 Correct duplicate values
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
2024-05-07 12:34:24 +02:00
26 changed files with 444 additions and 386 deletions

View File

@@ -20,12 +20,15 @@
"APIC",
"Bandcamp",
"bitrate",
"DEEZER",
"dotenv",
"encyclopaedia",
"ENDC",
"Gitea",
"isrc",
"levenshtein",
"metallum",
"MUSICBRAINZ",
"musify",
"OKBLUE",
"OKGREEN",

View File

@@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Psychonaut 4",
"d: 0",
"s: #a Crystal F",
"d: 20",
]

View File

@@ -1,5 +1,5 @@
import mutagen
from mutagen.id3 import ID3, Frame, APIC
from mutagen.id3 import ID3, Frame, APIC, USLT
from pathlib import Path
from typing import List
import logging
@@ -7,6 +7,7 @@ from PIL import Image
from ..utils.config import logging_settings, main_settings
from ..objects import Song, Target, Metadata
from ..objects.metadata import Mapping
from ..connection import Connection
LOGGER = logging_settings["tagging_logger"]
@@ -105,8 +106,11 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song):
data=converted_target.read_bytes(),
)
)
mutagen_file = mutagen.File(target.file_path)
id3_object.frames.delall("USLT")
uslt_val = metadata.get_id3_value(Mapping.UNSYNCED_LYRICS)
id3_object.frames.add(
USLT(encoding=3, lang=u'eng', desc=u'desc', text=uslt_val)
)
id3_object.add_metadata(metadata)
id3_object.save()

View File

@@ -0,0 +1,20 @@
from dataclasses import dataclass, field
from ..utils.config import main_settings
from ..utils.enums.album import AlbumType
@dataclass
class FetchOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
@dataclass
class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
download_again_if_found: bool = False
process_audio_if_found: bool = False
process_metadata_if_found: bool = True

View File

@@ -1,12 +1,29 @@
from typing import Tuple, Type, Dict, Set
from typing import Tuple, Type, Dict, Set, Optional, List
from collections import defaultdict
from pathlib import Path
import re
from . import FetchOptions, DownloadOptions
from .results import SearchResults
from ..objects import DatabaseObject, Source
from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages
from ..objects import (
DatabaseObject as DataObject,
Collection,
Target,
Source,
Options,
Song,
Album,
Artist,
Label,
)
from ..utils.string_processing import fit_to_file_system
from ..utils.config import youtube_settings, main_settings
from ..utils.path_manager import LOCATIONS
from ..utils.enums import SourceType
from ..utils.support_classes.download_result import DownloadResult
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES
@@ -34,6 +51,13 @@ SHADY_PAGES: Set[Type[Page]] = {
Musify,
}
fetch_map = {
Song: "fetch_song",
Album: "fetch_album",
Artist: "fetch_artist",
Label: "fetch_label",
}
if DEBUG_PAGES:
DEBUGGING_PAGE = Bandcamp
print(f"Only downloading from page {DEBUGGING_PAGE}.")
@@ -43,10 +67,13 @@ if DEBUG_PAGES:
class Pages:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None:
def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.download_options: DownloadOptions = download_options or DownloadOptions()
self.fetch_options: FetchOptions = fetch_options or FetchOptions()
# initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
self._source_to_page: Dict[SourceType, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
@@ -61,14 +88,19 @@ class Pages:
self._pages_set: Set[Type[Page]] = ALL_PAGES.difference(exclude_pages)
self.pages: Tuple[Type[Page], ...] = _set_to_tuple(self._pages_set)
self._audio_pages_set: Set[Type[Page]] = self._pages_set.intersection(AUDIO_PAGES)
self.audio_pages: Tuple[Type[Page], ...] = _set_to_tuple(self._audio_pages_set)
for page_type in self.pages:
self._page_instances[page_type] = page_type()
self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourceType) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
def search(self, query: Query) -> SearchResults:
result = SearchResults()
@@ -80,22 +112,42 @@ class Pages:
return result
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return music_object
def fetch_details(self, data_object: DataObject, stop_at_level: int = 1, **kwargs) -> DataObject:
if not isinstance(data_object, INDEPENDENT_DB_OBJECTS):
return data_object
for source_page in music_object.source_collection.source_pages:
if source_page not in self._source_to_page:
continue
source: Source
for source in data_object.source_collection.get_sources():
new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
if new_data_object is not None:
data_object.merge(new_data_object)
page_type = self._source_to_page[source_page]
if page_type in self._pages_set:
music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level))
return data_object
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
page: Page = self._get_page_from_enum(source.source_type)
if page is None:
return None
return music_object
# getting the appropriate function for the page and the object type
source_type = page.get_source_type(source)
if not hasattr(page, fetch_map[source_type]):
return None
func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
# fetching the data object and marking it as fetched
data_object: DataObject = func(source=source)
data_object.mark_as_fetched(source.hash_url)
return data_object
def is_downloadable(self, music_object: DatabaseObject) -> bool:
def fetch_from_url(self, url: str) -> Optional[DataObject]:
source = Source.match_url(url, SourceType.MANUAL)
if source is None:
return None
return self.fetch_from_source(source=source)
def is_downloadable(self, music_object: DataObject) -> bool:
_page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages:
if src in self._source_to_page:
@@ -104,30 +156,139 @@ class Pages:
audio_pages = self._audio_pages_set.intersection(_page_types)
return len(audio_pages) > 0
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult:
if not isinstance(music_object, INDEPENDENT_DB_OBJECTS):
return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.")
self.fetch_details(music_object)
_page_types = set(self._source_to_page)
for src in music_object.source_collection.source_pages:
if src in self._source_to_page:
_page_types.add(self._source_to_page[src])
audio_pages = self._audio_pages_set.intersection(_page_types)
def _skip_object(self, data_object: DataObject) -> bool:
if isinstance(data_object, Album):
if not self.download_options.download_all and data_object.album_type in self.download_options.album_type_blacklist:
return True
for download_page in audio_pages:
return self._page_instances[download_page].download(music_object=music_object, genre=genre)
return DownloadResult(error_message=f"No audio source has been found for {music_object}.")
return False
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DatabaseObject]:
source = Source.match_url(url, SourcePages.MANUAL)
def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult:
# fetch the given object
self.fetch_details(data_object)
# fetching all parent objects (e.g. if you only download a song)
if not kwargs.get("fetched_upwards", False):
to_fetch: List[DataObject] = [data_object]
while len(to_fetch) > 0:
new_to_fetch = []
for d in to_fetch:
if self._skip_object(d):
continue
self.fetch_details(d)
for c in d.get_parent_collections():
new_to_fetch.extend(c)
to_fetch = new_to_fetch
kwargs["fetched_upwards"] = True
# download all children
download_result: DownloadResult = DownloadResult()
for c in data_object.get_children():
for d in c:
if self._skip_object(d):
continue
download_result.merge(self.download(d, genre, **kwargs))
# actually download if the object is a song
if isinstance(data_object, Song):
"""
TODO
add the traced artist and album to the naming.
I am able to do that, because duplicate values are removed later on.
"""
self._download_song(data_object, naming={
"genre": [genre],
"audio_format": main_settings["audio_format"],
})
return download_result
def _extract_fields_from_template(self, path_template: str) -> Set[str]:
return set(re.findall(r"{([^}]+)}", path_template))
def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str:
field_names: Set[str] = self._extract_fields_from_template(path_template)
for field in field_names:
if len(naming[field]) == 0:
raise MKMissingNameException(f"Missing field for {field}.")
path_template = path_template.replace(f"{{{field}}}", naming[field][0])
return possible_parts
def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]:
pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()]
pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True)
return list(pages)
def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
"""
TODO
Search the song in the file system.
"""
r = DownloadResult(total=1)
# pre process the data recursively
song.compile()
# manage the naming
naming: Dict[str, List[str]] = defaultdict(list, naming)
naming["song"].append(song.title_string)
naming["genre"].append(song.genre)
naming["isrc"].append(song.isrc)
naming["album"].extend(a.title_string for a in song.album_collection)
naming["album_type"].extend(a.album_type.value for a in song.album_collection)
naming["artist"].extend(a.name for a in song.main_artist_collection)
naming["artist"].extend(a.name for a in song.feature_artist_collection)
for a in song.album_collection:
naming["label"].extend([l.title_string for l in a.label_collection])
# removing duplicates from the naming, and process the strings
for key, value in naming.items():
# https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(items))
# manage the targets
tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
found_on_disc = False
song.target_collection.append(Target(
relative_to_music_dir=True,
file_path=Path(
self._parse_path_template(main_settings["download_path"], naming=naming),
self._parse_path_template(main_settings["download_file"], naming=naming),
)
))
for target in song.target_collection:
if target.exists():
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
found_on_disc = True
r.found_on_disk += 1
target.copy_content(tmp)
else:
target.create_parent_directories()
output(f'- {target.file_path}', color=BColors.GREY)
# actually download
for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"):
r = page.download_song_to_target(song, tmp, r)
return r
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
source = Source.match_url(url, SourceType.MANUAL)
if source is None:
raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.page_enum]
_actual_page = self._source_to_page[source.source_type]
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)

View File

@@ -2,7 +2,7 @@ from typing import Tuple, Type, Dict, List, Generator, Union
from dataclasses import dataclass
from ..objects import DatabaseObject
from ..utils.enums.source import SourcePages
from ..utils.enums.source import SourceType
from ..pages import Page, EncyclopaediaMetallum, Musify

View File

@@ -3,7 +3,7 @@ from .option import Options
from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp
from .source import Source, SourcePages, SourceTypes
from .source import Source, SourceType
from .song import (
Song,
@@ -24,4 +24,4 @@ from .parents import OuterProxy
from .artwork import Artwork
DatabaseObject = TypeVar('T', bound=OuterProxy)
DatabaseObject = OuterProxy

View File

@@ -92,7 +92,7 @@ class Mapping(Enum):
key = attribute.value
if key[0] == 'T':
# a text fiel
# a text field
return cls.get_text_instance(key, value)
if key[0] == "W":
# an url field
@@ -355,7 +355,12 @@ class Metadata:
return None
list_data = self.id3_dict[field]
#correct duplications
correct_list_data = list()
for data in list_data:
if data not in correct_list_data:
correct_list_data.append(data)
list_data = correct_list_data
# convert for example the time objects to timestamps
for i, element in enumerate(list_data):
# for performances sake I don't do other checks if it is already the right type
@@ -368,7 +373,7 @@ class Metadata:
if type(element) == ID3Timestamp:
list_data[i] = element.timestamp
continue
"""
Version 2.4 of the specification prescribes that all text fields (the fields that start with a T, except for TXXX) can contain multiple values separated by a null character.
Thus if above conditions are met, I concatenate the list,
@@ -376,7 +381,7 @@ class Metadata:
"""
if field.value[0].upper() == "T" and field.value.upper() != "TXXX":
return self.NULL_BYTE.join(list_data)
return list_data[0]
def get_mutagen_object(self, field):
@@ -395,6 +400,5 @@ class Metadata:
"""
# set the tagging timestamp to the current time
self.__setitem__(Mapping.TAGGING_TIME, [ID3Timestamp.now()])
for field in self.id3_dict:
yield self.get_mutagen_object(field)

View File

@@ -99,7 +99,9 @@ class OuterProxy:
Wraps the inner data, and provides apis, to naturally access those values.
"""
_default_factories: dict = {}
source_collection: SourceCollection
_default_factories: dict = {"source_collection": SourceCollection}
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
@@ -203,6 +205,7 @@ class OuterProxy:
if __other is None:
return
a_id = self.id
a = self
b = __other
@@ -225,6 +228,8 @@ class OuterProxy:
a._inner.__merge__(old_inner, **kwargs)
del old_inner
self.id = a_id
def __merge__(self, __other: Optional[OuterProxy], **kwargs):
self.merge(__other, **kwargs)
@@ -301,6 +306,33 @@ class OuterProxy:
return r
@property
def root_collections(self) -> List[Collection]:
if len(self.UPWARDS_COLLECTION_STRING_ATTRIBUTES) == 0:
return [self]
r = []
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
r.extend(self.__getattribute__(collection_string_attribute))
return r
def _compile(self, **kwargs):
pass
def compile(self, from_root=False, **kwargs):
# compile from the root
if not from_root:
for c in self.root_collections:
c.compile(from_root=True, **kwargs)
return
self._compile(**kwargs)
for c_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
for c in self.__getattribute__(c_attribute):
c.compile(from_root=True, **kwargs)
TITEL = "id"
@property
def title_string(self) -> str:
@@ -308,3 +340,11 @@ class OuterProxy:
def __repr__(self):
return f"{type(self).__name__}({self.title_string})"
def get_child_collections(self):
for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
yield self.__getattribute__(collection_string_attribute)
def get_parent_collections(self):
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
yield self.__getattribute__(collection_string_attribute)

View File

@@ -376,6 +376,25 @@ class Album(Base):
r += f" with {len(self.song_collection)} songs"
return r
def _compile(self):
self.analyze_implied_album_type()
self.update_tracksort()
def analyze_implied_album_type(self):
# if the song collection has only one song, it is reasonable to assume that it is a single
if len(self.song_collection) == 1:
self.album_type = AlbumType.SINGLE
return
# if the album already has an album type, we don't need to do anything
if self.album_type is not AlbumType.OTHER:
return
# for information on EP's I looked at https://www.reddit.com/r/WeAreTheMusicMakers/comments/a354ql/whats_the_cutoff_length_between_ep_and_album/
if len(self.song_collection) < 9:
self.album_type = AlbumType.EP
return
def update_tracksort(self):
"""
This updates the tracksort attributes, of the songs in
@@ -525,6 +544,9 @@ class Artist(Base):
self.label_collection.extend(object_list)
return
def _compile(self):
self.update_albumsort()
def update_albumsort(self):
"""
This updates the albumsort attributes, of the albums in
@@ -535,9 +557,6 @@ class Artist(Base):
:return:
"""
if len(self.main_album_collection) <= 0:
return
type_section: Dict[AlbumType, int] = defaultdict(lambda: 2, {
AlbumType.OTHER: 0, # if I don't know it, I add it to the first section
AlbumType.STUDIO_ALBUM: 0,
@@ -580,7 +599,7 @@ class Artist(Base):
album_list.extend(sections[section_index])
# replace the old collection with the new one
self.main_album_collection: Collection = Collection(data=album_list, element_type=Album)
self.main_album_collection._data = album_list
INDEX_DEPENDS_ON = ("name", "source_collection", "contact_collection")
@property

View File

@@ -8,7 +8,7 @@ from dataclasses import dataclass, field
from functools import cached_property
from ..utils import generate_id
from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.enums import SourceType
from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url, shorten_display_url
@@ -20,22 +20,22 @@ from .collection import Collection
@dataclass
class Source:
page_enum: SourcePages
source_type: SourceType
url: str
referrer_page: SourcePages = None
referrer_page: SourceType = None
audio_url: Optional[str] = None
additional_data: dict = field(default_factory=dict)
def __post_init__(self):
self.referrer_page = self.referrer_page or self.page_enum
self.referrer_page = self.referrer_page or self.source_type
@property
def parsed_url(self) -> ParseResult:
return urlparse(self.url)
@classmethod
def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]:
def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]:
"""
this shouldn't be used, unless you are not certain what the source is for
the reason is that it is more inefficient
@@ -44,38 +44,38 @@ class Source:
url = parsed_url.geturl()
if "musify" in parsed_url.netloc:
return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page)
return cls(SourceType.MUSIFY, url, referrer_page=referrer_page)
if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page)
return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page)
if url.startswith("https://www.deezer"):
return cls(SourcePages.DEEZER, url, referrer_page=referrer_page)
return cls(SourceType.DEEZER, url, referrer_page=referrer_page)
if url.startswith("https://open.spotify.com"):
return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page)
return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page)
if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page)
return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page)
if "wikipedia" in parsed_url.netloc:
return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page)
return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page)
if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
# the less important once
if url.startswith("https://www.facebook"):
return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page)
return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page)
if url.startswith("https://www.instagram"):
return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page)
return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page)
if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url, referrer_page=referrer_page)
return cls(SourceType.TWITTER, url, referrer_page=referrer_page)
if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
return cls(SourceType.MYSPACE, url, referrer_page=referrer_page)
@property
def hash_url(self) -> str:
@@ -89,21 +89,21 @@ class Source:
return r
def __repr__(self) -> str:
return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})"
return f"Src({self.source_type.value}: {shorten_display_url(self.url)})"
def __merge__(self, other: Source, **kwargs):
if self.audio_url is None:
self.audio_url = other.audio_url
self.additional_data.update(other.additional_data)
page_str = property(fget=lambda self: self.page_enum.value)
page_str = property(fget=lambda self: self.source_type.value)
class SourceCollection:
__change_version__ = generate_id()
_indexed_sources: Dict[str, Source]
_page_to_source_list: Dict[SourcePages, List[Source]]
_page_to_source_list: Dict[SourceType, List[Source]]
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list = defaultdict(list)
@@ -111,7 +111,7 @@ class SourceCollection:
self.extend(data or [])
def has_source_page(self, *source_pages: SourcePages) -> bool:
def has_source_page(self, *source_pages: SourceType) -> bool:
return any(source_page in self._page_to_source_list for source_page in source_pages)
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
@@ -135,7 +135,7 @@ class SourceCollection:
existing_source.__merge__(source)
source = existing_source
else:
self._page_to_source_list[source.page_enum].append(source)
self._page_to_source_list[source.source_type].append(source)
changed = False
for key in source.indexing_values:
@@ -157,7 +157,7 @@ class SourceCollection:
self.extend(other)
@property
def source_pages(self) -> Iterable[SourcePages]:
def source_pages(self) -> Iterable[SourceType]:
return sorted(self._page_to_source_list.keys(), key=lambda page: page.value)
@property

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from pathlib import Path
from typing import List, Tuple, TextIO, Union
from typing import List, Tuple, TextIO, Union, Optional
import logging
import random
import requests
@@ -31,7 +31,10 @@ class Target(OuterProxy):
}
@classmethod
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID))) -> P:
def temp(cls, name: str = str(random.randint(0, HIGHEST_ID)), file_extension: Optional[str] = None) -> P:
if file_extension is not None:
name = f"{name}.{file_extension}"
return cls(main_settings["temp_directory"] / name)
# This is automatically generated

View File

@@ -22,7 +22,7 @@ from ..objects import (
Collection,
Label,
)
from ..utils.enums.source import SourcePages
from ..utils.enums import SourceType
from ..utils.enums.album import AlbumType
from ..audio import write_metadata_to_target, correct_codec
from ..utils.config import main_settings
@@ -47,72 +47,15 @@ class DownloadOptions:
process_audio_if_found: bool = False
process_metadata_if_found: bool = True
class NamingDict(dict):
CUSTOM_KEYS: Dict[str, str] = {
"label": "label.name",
"artist": "artist.name",
"song": "song.title",
"isrc": "song.isrc",
"album": "album.title",
"album_type": "album.album_type_string"
}
def __init__(self, values: dict, object_mappings: Dict[str, DatabaseObject] = None):
self.object_mappings: Dict[str, DatabaseObject] = object_mappings or dict()
super().__init__(values)
self["audio_format"] = main_settings["audio_format"]
def add_object(self, music_object: DatabaseObject):
self.object_mappings[type(music_object).__name__.lower()] = music_object
def copy(self) -> dict:
return type(self)(super().copy(), self.object_mappings.copy())
def __getitem__(self, key: str) -> str:
return fit_to_file_system(super().__getitem__(key))
def default_value_for_name(self, name: str) -> str:
return f'Various {name.replace("_", " ").title()}'
def __missing__(self, key: str) -> str:
if "." not in key:
if key not in self.CUSTOM_KEYS:
return self.default_value_for_name(key)
key = self.CUSTOM_KEYS[key]
frag_list = key.split(".")
object_name = frag_list[0].strip().lower()
attribute_name = frag_list[-1].strip().lower()
if object_name not in self.object_mappings:
return self.default_value_for_name(attribute_name)
music_object = self.object_mappings[object_name]
try:
value = getattr(music_object, attribute_name)
if value is None:
return self.default_value_for_name(attribute_name)
return str(value)
except AttributeError:
return self.default_value_for_name(attribute_name)
class Page:
"""
This is an abstract class, laying out the
functionality for every other class fetching something
"""
SOURCE_TYPE: SourceType
LOGGER: LOGGER
SOURCE_TYPE: SourcePages
LOGGER = logging.getLogger("this shouldn't be used")
def __new__(cls, *args, **kwargs):
cls.SOURCE_TYPE.register_page(cls)
cls.LOGGER = logging.getLogger(cls.__name__)
# set this to true, if all song details can also be fetched by fetching album details
NO_ADDITIONAL_DATA_FROM_SONG = False
return super().__new__(cls)
def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
self.download_options: DownloadOptions = download_options or DownloadOptions()
@@ -189,103 +132,7 @@ class Page:
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_details(
self,
music_object: DatabaseObject,
stop_at_level: int = 1,
) -> DatabaseObject:
"""
when a music object with lacking data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in, an album, it fetches the tracklist
:param music_object:
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
# creating a new object, of the same type
new_music_object: Optional[DatabaseObject] = None
fetched_from_url: List[str] = []
# only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
if music_object.already_fetched_from(source.hash_url):
continue
tmp = self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
type_string=type(music_object).__name__,
entity_string=music_object.option_string,
)
if new_music_object is None:
new_music_object = tmp
else:
new_music_object.merge(tmp)
fetched_from_url.append(source.hash_url)
if new_music_object is not None:
music_object.merge(new_music_object)
music_object.mark_as_fetched(*fetched_from_url)
return music_object
def fetch_object_from_source(
self,
source: Source,
stop_at_level: int = 2,
enforce_type: Type[DatabaseObject] = None,
type_string: str = "",
entity_string: str = "",
) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source)
if obj_type is None:
return None
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level=stop_at_level)
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
if stop_at_level > 0:
trace(f"fetching {type_string} [{entity_string}] [stop_at_level={stop_at_level}]")
collection: Collection
for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
collection = music_object.__getattribute__(collection_str)
for sub_element in collection:
sub_element.merge(
self.fetch_details(sub_element, stop_at_level=stop_at_level - 1))
return music_object
# to fetch stuff
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
@@ -361,6 +208,7 @@ class Page:
return download_result
def _download_song(self, song: Song, naming_dict: NamingDict):
song.compile()
if "genre" not in naming_dict and song.genre is not None:
naming_dict["genre"] = song.genre
@@ -381,7 +229,7 @@ class Page:
song.target_collection.append(new_target)
r = DownloadResult(1)
temp_target: Target = Target.temp()
temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
found_on_disc = False
target: Target

View File

@@ -10,7 +10,7 @@ from .abstract import Page
from ..objects import (
Artist,
Source,
SourcePages,
SourceType,
Song,
Album,
Label,
@@ -23,6 +23,7 @@ from ..objects import (
)
from ..connection import Connection
from ..utils import dump_to_file
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings
@@ -49,8 +50,7 @@ class BandcampTypes(Enum):
class Bandcamp(Page):
# CHANGE
SOURCE_TYPE = SourcePages.BANDCAMP
SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP
LOGGER = logging_settings["bandcamp_logger"]
def __init__(self, *args, **kwargs):

View File

@@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
from ..connection import Connection
from ..utils.config import logging_settings
from .abstract import Page
from ..utils.enums.source import SourcePages
from ..utils.enums.source import SourceType
from ..utils.enums.album import AlbumType
from ..utils.support_classes.query import Query
from ..objects import (
@@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
_album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
],
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, song_id)
Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id)
]
)
@@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
return Artist(
name=artist_name,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, artist_url)
Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url)
]
)
@@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
title=album_name,
album_type=album_type,
source_list=[
Source(SourcePages.ENCYCLOPAEDIA_METALLUM, album_url)
Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url)
],
artist_list=[
_artist_from_json(artist_html=artist_html)
@@ -207,7 +207,7 @@ def create_grid(
class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM
LOGGER = logging_settings["metal_archives_logger"]
def __init__(self, **kwargs):
@@ -832,7 +832,7 @@ class EncyclopaediaMetallum(Page):
)
def get_source_type(self, source: Source):
if self.SOURCE_TYPE != source.page_enum:
if self.SOURCE_TYPE != source.source_type:
return None
url = source.url

View File

@@ -9,7 +9,7 @@ from bs4 import BeautifulSoup
from ..connection import Connection
from .abstract import Page
from ..utils.enums.source import SourcePages
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.enums.album import AlbumType, AlbumStatus
from ..objects import (
Artist,
@@ -111,9 +111,7 @@ def parse_url(url: str) -> MusifyUrl:
class Musify(Page):
# CHANGE
SOURCE_TYPE = SourcePages.MUSIFY
LOGGER = logging_settings["musify_logger"]
SOURCE_TYPE = ALL_SOURCE_TYPES.MUSIFY
HOST = "https://musify.club"
@@ -505,7 +503,7 @@ class Musify(Page):
iframe_list: List[BeautifulSoup] = video_container.findAll("iframe")
for iframe in iframe_list:
source_list.append(Source(
SourcePages.YOUTUBE,
SourceType.YOUTUBE,
iframe["src"],
referrer_page=self.SOURCE_TYPE
))

View File

@@ -1,65 +0,0 @@
from typing import List, Optional, Type
from urllib.parse import urlparse
import logging
from ..objects import Source, DatabaseObject
from .abstract import Page
from ..objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
Target
)
from ..connection import Connection
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
class Preset(Page):
# CHANGE
SOURCE_TYPE = SourcePages.PRESET
LOGGER = logging.getLogger("preset")
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://www.preset.cum/",
logger=self.LOGGER
)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
return super().get_source_type(source)
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
return []
def song_search(self, song: Song) -> List[Song]:
return []
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
return DownloadResult()

View File

@@ -9,7 +9,6 @@ from .abstract import Page
from ..objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
@@ -19,6 +18,7 @@ from ..objects import (
)
from ..connection import Connection
from ..utils.string_processing import clean_song_title
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult
from ..utils.config import youtube_settings, main_settings, logging_settings
@@ -39,8 +39,7 @@ def get_piped_url(path: str = "", params: str = "", query: str = "", fragment: s
class YouTube(SuperYouTube):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
NO_ADDITIONAL_DATA_FROM_SONG = False

View File

@@ -7,7 +7,6 @@ from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,

View File

@@ -3,12 +3,13 @@ from enum import Enum
from ...utils.config import youtube_settings, logging_settings
from ...utils.string_processing import clean_song_title
from ...utils.enums import SourceType, ALL_SOURCE_TYPES
from ...objects import Source, DatabaseObject
from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
@@ -18,7 +19,7 @@ from ...objects import (
LOGGER = logging_settings["youtube_music_logger"]
SOURCE_PAGE = SourcePages.YOUTUBE_MUSIC
SOURCE_PAGE = ALL_SOURCE_TYPES.YOUTUBE
class PageType(Enum):

View File

@@ -10,7 +10,6 @@ from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
Song,
Album,
Label,
@@ -21,6 +20,7 @@ from ...objects import (
from ...connection import Connection
from ...utils.support_classes.download_result import DownloadResult
from ...utils.config import youtube_settings, logging_settings, main_settings
from ...utils.enums import SourceType, ALL_SOURCE_TYPES
def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
@@ -50,7 +50,7 @@ class YouTubeUrl:
"""
def __init__(self, url: str) -> None:
self.SOURCE_TYPE = SourcePages.YOUTUBE
self.SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
"""
Raises Index exception for wrong url, and value error for not found enum type
@@ -58,9 +58,6 @@ class YouTubeUrl:
self.id = ""
parsed = urlparse(url=url)
if parsed.netloc == "music.youtube.com":
self.SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC
self.url_type: YouTubeUrlType
type_frag_list = parsed.path.split("/")
@@ -124,8 +121,7 @@ class YouTubeUrl:
class SuperYouTube(Page):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE
LOGGER = logging_settings["youtube_logger"]
SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
NO_ADDITIONAL_DATA_FROM_SONG = False
@@ -145,6 +141,8 @@ class SuperYouTube(Page):
_sponsorblock_connection: Connection = Connection()
self.sponsorblock = python_sponsorblock.SponsorBlock(silent=True, session=_sponsorblock_connection.session)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
_url_type = {
YouTubeUrlType.CHANNEL: Artist,

View File

@@ -27,7 +27,7 @@ from ..abstract import Page
from ...objects import (
Artist,
Source,
SourcePages,
SourceType,
Song,
Album,
Label,
@@ -176,7 +176,7 @@ ALBUM_TYPE_MAP = {
class YoutubeMusic(SuperYouTube):
# CHANGE
SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC
SOURCE_TYPE = SourceType.YOUTUBE_MUSIC
LOGGER = logging_settings["youtube_music_logger"]
def __init__(self, *args, ydl_opts: dict = None, **kwargs):
@@ -193,8 +193,7 @@ class YoutubeMusic(SuperYouTube):
self.start_millis = get_current_millis()
if self.credentials.api_key == "" or DEBUG_YOUTUBE_INITIALIZING:
self._fetch_from_main_page()
self._fetch_from_main_page()
SuperYouTube.__init__(self, *args, **kwargs)
@@ -215,6 +214,8 @@ class YoutubeMusic(SuperYouTube):
self.download_values_by_url: dict = {}
self.not_download: Dict[str, DownloadError] = {}
super().__init__(*args, **kwargs)
def _fetch_from_main_page(self):
"""
===API=KEY===
@@ -618,7 +619,7 @@ class YoutubeMusic(SuperYouTube):
Artist(
name=name,
source_list=[Source(
SourcePages.YOUTUBE_MUSIC,
SourceType.YOUTUBE_MUSIC,
f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}"
)]
) for name in artist_names]
@@ -639,7 +640,7 @@ class YoutubeMusic(SuperYouTube):
artwork=Artwork(*ydl_res.get("thumbnails", [])),
main_artist_list=artist_list,
source_list=[Source(
SourcePages.YOUTUBE_MUSIC,
SourceType.YOUTUBE_MUSIC,
f"https://music.youtube.com/watch?v={ydl_res.get('id')}"
), source],
)
@@ -736,8 +737,9 @@ class YoutubeMusic(SuperYouTube):
raw_headers=True,
disable_cache=True,
headers=media.get("headers", {}),
# chunk_size=media.get("chunk_size", main_settings["chunk_size"]),
chunk_size=main_settings["chunk_size"],
method="GET",
timeout=5,
)
else:
result = DownloadResult(error_message=str(media.get("error") or self.not_download[source.hash_url]))

View File

@@ -1 +1,53 @@
from .source import SourcePages
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional, TYPE_CHECKING, Type
if TYPE_CHECKING:
from ...pages.abstract import Page
@dataclass
class SourceType:
name: str
homepage: Optional[str] = None
download_priority: int = 0
page_type: Type[Page] = None
page: Page = None
def register_page(self, page_type: Type[Page]):
self.page_type = page
self.page = page_type()
@property
def has_page(self) -> bool:
return self.page is not None
# for backwards compatibility
@property
def value(self) -> str:
return self.name
class ALL_SOURCE_TYPES:
YOUTUBE = SourceType(name="youtube", homepage="https://music.youtube.com/")
BANDCAMP = SourceType(name="bandcamp", homepage="https://bandcamp.com/", download_priority=10)
MUSIFY = SourceType(name="musify", homepage="https://musify.club/", download_priority=7)
GENIUS = SourceType(name="genius", homepage="https://genius.com/")
MUSICBRAINZ = SourceType(name="musicbrainz", homepage="https://musicbrainz.org/")
ENCYCLOPAEDIA_METALLUM = SourceType(name="encyclopaedia metallum")
DEEZER = SourceType(name="deezer", homepage="https://www.deezer.com/")
SPOTIFY = SourceType(name="spotify", homepage="https://open.spotify.com/")
# This has nothing to do with audio, but bands can be here
WIKIPEDIA = SourceType(name="wikipedia", homepage="https://en.wikipedia.org/wiki/Main_Page")
INSTAGRAM = SourceType(name="instagram", homepage="https://www.instagram.com/")
FACEBOOK = SourceType(name="facebook", homepage="https://www.facebook.com/")
TWITTER = SourceType(name="twitter", homepage="https://twitter.com/")
# Yes somehow this ancient site is linked EVERYWHERE
MYSPACE = SourceType(name="myspace", homepage="https://myspace.com/")
MANUAL = SourceType(name="manual")
PRESET = SourceType(name="preset")

View File

@@ -1,40 +0,0 @@
from enum import Enum
class SourceTypes(Enum):
SONG = "song"
ALBUM = "album"
ARTIST = "artist"
LYRICS = "lyrics"
class SourcePages(Enum):
YOUTUBE = "youtube", "https://www.youtube.com/"
MUSIFY = "musify", "https://musify.club/"
YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/"
GENIUS = "genius", "https://genius.com/"
MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/"
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
BANDCAMP = "bandcamp", "https://bandcamp.com/"
DEEZER = "deezer", "https://www.deezer.com/"
SPOTIFY = "spotify", "https://open.spotify.com/"
# This has nothing to do with audio, but bands can be here
WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page"
INSTAGRAM = "instagram", "https://www.instagram.com/"
FACEBOOK = "facebook", "https://www.facebook.com/"
TWITTER = "twitter", "https://twitter.com/"
MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE
MANUAL = "manual", ""
PRESET = "preset", ""
def __new__(cls, value, homepage = None):
member = object.__new__(cls)
member._value_ = value
member.homepage = homepage
return member

View File

@@ -4,8 +4,20 @@ class MKBaseException(Exception):
super().__init__(message, **kwargs)
# Downloading
class MKDownloadException(MKBaseException):
pass
class MKMissingNameException(MKDownloadException):
pass
# Frontend
class MKFrontendException(MKBaseException):
pass
class MKInvalidInputException(MKFrontendException):
pass

View File

@@ -69,7 +69,7 @@ dependencies = [
"toml~=0.10.2",
"typing_extensions~=4.7.1",
"python-sponsorblock~=0.0.dev1",
"python-sponsorblock~=0.1",
"youtube_dl",
]
dynamic = [