music-kraken-core/src/music_kraken/pages/abstract.py

494 lines
18 KiB
Python
Raw Normal View History

2023-03-31 07:47:03 +00:00
import random
2023-03-30 12:39:28 +00:00
from typing import Optional, Union, Type, Dict, List
from bs4 import BeautifulSoup
import requests
import logging
2023-04-03 08:38:12 +00:00
from dataclasses import dataclass
from copy import copy
from ..utils import shared
2023-03-10 09:13:35 +00:00
from ..objects import (
2023-01-23 13:53:35 +00:00
Song,
Source,
Album,
Artist,
Lyrics,
Target,
2023-03-24 14:58:21 +00:00
DatabaseObject,
2023-03-20 13:40:32 +00:00
Options,
2023-03-21 11:46:32 +00:00
SourcePages,
2023-03-24 13:28:19 +00:00
Collection,
Label
2023-01-23 13:53:35 +00:00
)
2023-03-30 14:50:27 +00:00
from ..tagging import write_metadata_to_target
2023-04-03 08:38:12 +00:00
from ..utils.shared import DOWNLOAD_PATH, DOWNLOAD_FILE, DEFAULT_VALUES
2023-04-03 09:17:55 +00:00
from ..utils.string_processing import fit_to_file_system
2023-01-23 13:53:35 +00:00
2023-03-24 14:58:21 +00:00
LOGGER = logging.getLogger("this shouldn't be used")
2023-04-03 17:59:31 +00:00
2023-04-03 08:38:12 +00:00
@dataclass
class DefaultTarget:
genre: str = DEFAULT_VALUES["genre"]
label: str = DEFAULT_VALUES["label"]
artist: str = DEFAULT_VALUES["artist"]
album: str = DEFAULT_VALUES["album"]
song: str = DEFAULT_VALUES["song"]
2023-04-03 17:59:31 +00:00
2023-04-03 08:38:12 +00:00
def __setattr__(self, __name: str, __value: str) -> None:
if __name in DEFAULT_VALUES:
if self.__getattribute__(__name) == DEFAULT_VALUES[__name]:
2023-04-03 09:17:55 +00:00
super().__setattr__(__name, fit_to_file_system(__value))
2023-04-03 08:38:12 +00:00
return
2023-04-03 17:59:31 +00:00
2023-04-03 08:38:12 +00:00
super().__setattr__(__name, __value)
2023-04-03 17:59:31 +00:00
2023-04-03 08:38:12 +00:00
@property
def target(self) -> Target:
return Target(
relative_to_music_dir=True,
2023-04-03 17:59:31 +00:00
path=DOWNLOAD_PATH.format(genre=self.genre, label=self.label, artist=self.artist, album=self.album,
song=self.song),
file=DOWNLOAD_FILE.format(genre=self.genre, label=self.label, artist=self.artist, album=self.album,
song=self.song)
2023-04-03 08:38:12 +00:00
)
2023-04-03 17:59:31 +00:00
2023-01-23 13:53:35 +00:00
class Page:
"""
This is an abstract class, laying out the
functionality for every other class fetching something
"""
API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = shared.proxies
TIMEOUT = 5
2023-04-04 08:05:37 +00:00
POST_TIMEOUT = TIMEOUT
TRIES = 5
LOGGER = LOGGER
2023-04-03 17:59:31 +00:00
2023-03-20 13:40:32 +00:00
SOURCE_TYPE: SourcePages
@classmethod
2023-04-03 17:59:31 +00:00
def get_request(cls, url: str, stream: bool = False, accepted_response_codes: set = set((200,)), trie: int = 0) -> \
Optional[
requests.Response]:
2023-03-27 16:41:50 +00:00
retry = False
try:
2023-03-31 07:47:03 +00:00
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT, stream=stream)
except requests.exceptions.Timeout:
2023-04-04 08:05:37 +00:00
cls.LOGGER.warning(f"request timed out at \"{url}\": ({trie}-{cls.TRIES})")
2023-03-27 16:41:50 +00:00
retry = True
2023-03-31 07:47:03 +00:00
except requests.exceptions.ConnectionError:
2023-04-04 08:05:37 +00:00
cls.LOGGER.warning(f"couldn't connect to \"{url}\": ({trie}-{cls.TRIES})")
2023-03-31 07:47:03 +00:00
retry = True
2023-03-27 16:41:50 +00:00
if not retry and r.status_code in accepted_response_codes:
return r
2023-03-31 07:47:03 +00:00
if not retry:
cls.LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})")
cls.LOGGER.debug(r.content)
2023-03-27 16:41:50 +00:00
if trie >= cls.TRIES:
cls.LOGGER.warning("to many tries. Aborting.")
2023-03-27 16:41:50 +00:00
return None
2023-04-03 09:47:30 +00:00
return cls.get_request(url=url, stream=stream, accepted_response_codes=accepted_response_codes, trie=trie + 1)
@classmethod
def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
requests.Response]:
2023-03-27 16:41:50 +00:00
retry = False
try:
2023-04-04 08:05:37 +00:00
r = cls.API_SESSION.post(url, json=json, timeout=cls.POST_TIMEOUT)
except requests.exceptions.Timeout:
2023-04-04 08:05:37 +00:00
cls.LOGGER.warning(f"request timed out at \"{url}\": ({trie}-{cls.TRIES})")
2023-03-27 16:41:50 +00:00
retry = True
2023-03-31 08:56:31 +00:00
except requests.exceptions.ConnectionError:
2023-04-04 08:05:37 +00:00
cls.LOGGER.warning(f"couldn't connect to \"{url}\": ({trie}-{cls.TRIES})")
2023-03-31 08:56:31 +00:00
retry = True
2023-03-27 16:41:50 +00:00
if not retry and r.status_code in accepted_response_codes:
return r
2023-03-31 08:56:31 +00:00
if not retry:
cls.LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at POST:{url}. ({trie}-{cls.TRIES})")
cls.LOGGER.debug(r.content)
2023-03-27 16:41:50 +00:00
if trie >= cls.TRIES:
cls.LOGGER.warning("to many tries. Aborting.")
2023-03-27 16:41:50 +00:00
return None
2023-04-04 08:05:37 +00:00
cls.LOGGER.warning(f"payload: {json}")
2023-04-03 09:47:30 +00:00
return cls.post_request(url=url, json=json, accepted_response_codes=accepted_response_codes, trie=trie + 1)
@classmethod
def get_soup_from_response(cls, r: requests.Response) -> BeautifulSoup:
return BeautifulSoup(r.content, "html.parser")
2023-01-23 23:16:10 +00:00
class Query:
def __init__(self, query: str):
self.query = query
self.is_raw = False
self.artist = None
self.album = None
self.song = None
self.parse_query(query=query)
def __str__(self):
if self.is_raw:
return self.query
return f"{self.artist}; {self.album}; {self.song}"
def parse_query(self, query: str):
if not '#' in query:
self.is_raw = True
return
query = query.strip()
parameters = query.split('#')
parameters.remove('')
for parameter in parameters:
splitted = parameter.split(" ")
type_ = splitted[0]
input_ = " ".join(splitted[1:]).strip()
if type_ == "a":
self.artist = input_
continue
if type_ == "r":
self.album = input_
continue
if type_ == "t":
2023-01-24 09:51:41 +00:00
self.song = input_
2023-01-23 23:16:10 +00:00
continue
2023-01-24 09:51:41 +00:00
def get_str(self, string):
if string is None:
return ""
return string
artist_str = property(fget=lambda self: self.get_str(self.artist))
album_str = property(fget=lambda self: self.get_str(self.album))
song_str = property(fget=lambda self: self.get_str(self.song))
2023-01-23 13:53:35 +00:00
@classmethod
def search_by_query(cls, query: str) -> Options:
2023-01-23 13:53:35 +00:00
"""
# The Query
You can define a new parameter with "#",
the letter behind it defines the *type* of parameter, followed by a space
"#a Psychonaut 4 #r Tired, Numb and #t Drop by Drop"
if no # is in the query it gets treated as "unspecified query"
# Functionality
Returns the best matches from this page for the query, passed in.
:param query:
:return possible_music_objects:
"""
2023-03-13 13:39:46 +00:00
return Options()
2023-01-23 13:53:35 +00:00
@classmethod
2023-03-24 14:58:21 +00:00
def fetch_details(cls, music_object: Union[Song, Album, Artist, Label], stop_at_level: int = 1) -> DatabaseObject:
2023-01-23 13:53:35 +00:00
"""
when a music object with laccing data is passed in, it returns
the SAME object **(no copy)** with more detailed data.
If you for example put in an album, it fetches the tracklist
:param music_object:
2023-03-24 13:28:19 +00:00
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
2023-01-23 14:52:50 +00:00
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
2023-01-23 13:53:35 +00:00
"""
2023-04-03 17:59:31 +00:00
2023-03-24 14:58:21 +00:00
new_music_object: DatabaseObject = type(music_object)()
2023-03-29 09:34:58 +00:00
had_sources = False
2023-03-24 13:28:19 +00:00
source: Source
2023-03-27 16:41:50 +00:00
for source in music_object.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
2023-04-03 17:59:31 +00:00
new_music_object.merge(
cls._fetch_object_from_source(source=source, obj_type=type(music_object), stop_at_level=stop_at_level))
2023-03-29 09:34:58 +00:00
had_sources = True
2023-04-03 17:59:31 +00:00
2023-03-29 09:34:58 +00:00
if not had_sources:
music_object.compile(merge_into=True)
return music_object
2023-01-23 13:53:35 +00:00
2023-03-24 14:58:21 +00:00
collections = {
Label: Collection(element_type=Label),
Artist: Collection(element_type=Artist),
Album: Collection(element_type=Album),
Song: Collection(element_type=Song)
}
2023-04-03 17:59:31 +00:00
2023-03-24 14:58:21 +00:00
cls._clean_music_object(new_music_object, collections)
2023-04-03 17:59:31 +00:00
music_object.merge(new_music_object)
music_object.compile(merge_into=True)
2023-01-23 14:52:50 +00:00
2023-03-24 13:28:19 +00:00
return music_object
2023-04-03 17:59:31 +00:00
2023-03-30 10:00:39 +00:00
@classmethod
2023-03-30 10:09:36 +00:00
def fetch_object_from_source(cls, source: Source, stop_at_level: int = 2):
2023-03-30 10:00:39 +00:00
obj_type = cls._get_type_of_url(source.url)
if obj_type is None:
2023-03-30 10:23:57 +00:00
return None
2023-04-03 17:59:31 +00:00
2023-03-30 10:09:36 +00:00
music_object = cls._fetch_object_from_source(source=source, obj_type=obj_type, stop_at_level=stop_at_level)
2023-04-03 17:59:31 +00:00
2023-03-30 10:09:36 +00:00
collections = {
Label: Collection(element_type=Label),
Artist: Collection(element_type=Artist),
Album: Collection(element_type=Album),
Song: Collection(element_type=Song)
}
2023-04-03 17:59:31 +00:00
cls._clean_music_object(music_object, collections)
2023-03-30 10:09:36 +00:00
music_object.compile(merge_into=True)
return music_object
2023-03-20 13:40:32 +00:00
2023-02-06 14:06:38 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def _fetch_object_from_source(cls, source: Source,
obj_type: Union[Type[Song], Type[Album], Type[Artist], Type[Label]],
stop_at_level: int = 1) -> Union[Song, Album, Artist, Label]:
2023-03-24 13:28:19 +00:00
if obj_type == Artist:
return cls._fetch_artist_from_source(source=source, stop_at_level=stop_at_level)
2023-04-03 17:59:31 +00:00
2023-03-24 13:28:19 +00:00
if obj_type == Song:
return cls._fetch_song_from_source(source=source, stop_at_level=stop_at_level)
2023-04-03 17:59:31 +00:00
2023-03-24 13:28:19 +00:00
if obj_type == Album:
return cls._fetch_album_from_source(source=source, stop_at_level=stop_at_level)
2023-04-03 17:59:31 +00:00
2023-03-24 13:28:19 +00:00
if obj_type == Label:
return cls._fetch_label_from_source(source=source, stop_at_level=stop_at_level)
2023-01-23 14:52:50 +00:00
2023-03-24 14:58:21 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def _clean_music_object(cls, music_object: Union[Label, Album, Artist, Song],
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
2023-03-24 14:58:21 +00:00
if type(music_object) == Label:
return cls._clean_label(label=music_object, collections=collections)
if type(music_object) == Artist:
return cls._clean_artist(artist=music_object, collections=collections)
if type(music_object) == Album:
return cls._clean_album(album=music_object, collections=collections)
if type(music_object) == Song:
return cls._clean_song(song=music_object, collections=collections)
@classmethod
2023-04-03 17:59:31 +00:00
def _clean_collection(cls, collection: Collection,
collection_dict: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
2023-03-24 14:58:21 +00:00
if collection.element_type not in collection_dict:
return
for i, element in enumerate(collection):
2023-04-03 14:23:30 +00:00
r = collection_dict[collection.element_type].append(element, merge_into_existing=True)
2023-03-24 14:58:21 +00:00
collection[i] = r.current_element
2023-04-03 17:59:31 +00:00
2023-04-03 14:23:30 +00:00
if not r.was_the_same:
cls._clean_music_object(r.current_element, collection_dict)
2023-03-24 14:58:21 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def _clean_label(cls, label: Label,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
2023-03-24 14:58:21 +00:00
cls._clean_collection(label.current_artist_collection, collections)
cls._clean_collection(label.album_collection, collections)
@classmethod
2023-04-03 17:59:31 +00:00
def _clean_artist(cls, artist: Artist,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
2023-03-24 14:58:21 +00:00
cls._clean_collection(artist.main_album_collection, collections)
cls._clean_collection(artist.feature_song_collection, collections)
cls._clean_collection(artist.label_collection, collections)
@classmethod
2023-04-03 17:59:31 +00:00
def _clean_album(cls, album: Album,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
2023-03-24 14:58:21 +00:00
cls._clean_collection(album.label_collection, collections)
cls._clean_collection(album.song_collection, collections)
cls._clean_collection(album.artist_collection, collections)
@classmethod
2023-04-03 17:59:31 +00:00
def _clean_song(cls, song: Song,
collections: Dict[Union[Type[Song], Type[Album], Type[Artist], Type[Label]], Collection]):
2023-03-24 14:58:21 +00:00
cls._clean_collection(song.album_collection, collections)
cls._clean_collection(song.feature_artist_collection, collections)
cls._clean_collection(song.main_artist_collection, collections)
2023-03-30 12:52:50 +00:00
@classmethod
2023-04-03 08:38:12 +00:00
def download(
2023-04-03 17:59:31 +00:00
cls,
music_object: Union[Song, Album, Artist, Label],
download_features: bool = True,
default_target: DefaultTarget = None
) -> bool:
2023-04-03 08:38:12 +00:00
if default_target is None:
default_target = DefaultTarget()
2023-04-03 17:59:31 +00:00
2023-03-30 12:52:50 +00:00
if type(music_object) is Song:
2023-04-03 08:38:12 +00:00
return cls.download_song(music_object, default_target)
2023-03-30 12:52:50 +00:00
if type(music_object) is Album:
2023-04-03 08:38:12 +00:00
return cls.download_album(music_object, default_target)
2023-03-30 12:52:50 +00:00
if type(music_object) is Artist:
2023-04-03 08:38:12 +00:00
return cls.download_artist(music_object, default_target)
2023-03-30 12:52:50 +00:00
if type(music_object) is Label:
2023-04-03 08:38:12 +00:00
return cls.download_label(music_object, download_features=download_features, default_target=default_target)
2023-04-03 17:59:31 +00:00
2023-04-03 10:38:09 +00:00
return False
2023-04-03 17:59:31 +00:00
2023-03-30 12:52:50 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def download_label(cls, label: Label, download_features: bool = True, override_existing: bool = False,
default_target: DefaultTarget = None):
2023-04-03 08:38:12 +00:00
if default_target is None:
default_target = DefaultTarget()
else:
default_target = copy(default_target)
default_target.label = label.name
2023-04-03 17:59:31 +00:00
2023-03-31 08:34:29 +00:00
cls.fetch_details(label)
2023-03-30 12:52:50 +00:00
for artist in label.current_artist_collection:
2023-04-03 17:59:31 +00:00
cls.download_artist(artist, download_features=download_features, override_existing=override_existing,
default_target=default_target)
2023-03-30 12:52:50 +00:00
for album in label.album_collection:
2023-04-03 08:38:12 +00:00
cls.download_album(album, override_existing=override_existing, default_target=default_target)
2023-03-30 12:52:50 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def download_artist(cls, artist: Artist, download_features: bool = True, override_existing: bool = False,
default_target: DefaultTarget = None):
2023-04-03 08:38:12 +00:00
if default_target is None:
default_target = DefaultTarget()
else:
default_target = copy(default_target)
default_target.artist = artist.name
2023-04-03 09:47:30 +00:00
if not artist.label_collection.empty:
default_target.label = artist.label_collection[0].name
2023-04-03 17:59:31 +00:00
2023-03-31 08:34:29 +00:00
cls.fetch_details(artist)
2023-03-30 12:52:50 +00:00
for album in artist.main_album_collection:
2023-04-03 08:38:12 +00:00
cls.download_album(album, override_existing=override_existing, default_target=default_target)
2023-04-03 17:59:31 +00:00
2023-03-30 12:52:50 +00:00
if download_features:
2023-03-31 08:34:29 +00:00
for song in artist.feature_album.song_collection:
2023-04-03 08:38:12 +00:00
cls.download_song(song, override_existing=override_existing, default_target=default_target)
2023-03-30 12:52:50 +00:00
@classmethod
2023-04-03 08:38:12 +00:00
def download_album(cls, album: Album, override_existing: bool = False, default_target: DefaultTarget = None):
if default_target is None:
default_target = DefaultTarget()
else:
default_target = copy(default_target)
default_target.album = album.title
2023-04-03 09:47:30 +00:00
if not album.artist_collection.empty:
default_target.artist = album.artist_collection[0].name
if not album.label_collection.empty:
default_target.label = album.label_collection[0].name
2023-04-03 17:59:31 +00:00
2023-03-31 08:34:29 +00:00
cls.fetch_details(album)
2023-04-03 17:59:31 +00:00
2023-04-03 10:38:09 +00:00
album.update_tracksort()
2023-03-30 12:52:50 +00:00
for song in album.song_collection:
2023-04-03 08:38:12 +00:00
cls.download_song(song, override_existing=override_existing, default_target=default_target)
2023-03-30 12:52:50 +00:00
2023-03-30 12:39:28 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def download_song(cls, song: Song, override_existing: bool = False, create_target_on_demand: bool = True,
default_target: DefaultTarget = None):
2023-04-03 08:38:12 +00:00
if default_target is None:
default_target = DefaultTarget()
else:
default_target = copy(default_target)
default_target.song = song.title
2023-04-03 09:47:30 +00:00
if not song.album_collection.empty:
default_target.album = song.album_collection[0].title
if not song.main_artist_collection.empty:
artist: Artist = song.main_artist_collection[0]
default_target.artist = artist.name
2023-04-03 17:59:31 +00:00
2023-04-03 09:47:30 +00:00
if not artist.label_collection.empty:
default_target.label = artist.label_collection[0].name
2023-04-03 17:59:31 +00:00
2023-03-31 08:34:29 +00:00
cls.fetch_details(song)
2023-04-03 17:59:31 +00:00
2023-03-30 12:39:28 +00:00
if song.target_collection.empty:
2023-03-31 08:34:29 +00:00
if create_target_on_demand and not song.main_artist_collection.empty and not song.album_collection.empty:
2023-04-03 08:38:12 +00:00
song.target_collection.append(default_target.target)
2023-03-31 08:34:29 +00:00
else:
return
2023-04-03 17:59:31 +00:00
2023-03-30 13:28:23 +00:00
target: Target
if any(target.exists for target in song.target_collection) and not override_existing:
2023-03-30 13:28:23 +00:00
existing_target: Target
for existing_target in song.target_collection:
if existing_target.exists:
break
2023-04-03 17:59:31 +00:00
2023-03-30 13:28:23 +00:00
for target in song.target_collection:
if target is existing_target:
continue
2023-04-03 17:59:31 +00:00
2023-03-30 13:28:23 +00:00
existing_target.copy_content(target)
return True
2023-04-03 17:59:31 +00:00
2023-03-30 12:39:28 +00:00
sources = song.source_collection.get_sources_from_page(cls.SOURCE_TYPE)
if len(sources) == 0:
return False
2023-04-03 17:59:31 +00:00
2023-03-31 07:47:03 +00:00
temp_target: Target = Target(
path=shared.TEMP_DIR,
file=str(random.randint(0, 999999))
)
2023-04-03 17:59:31 +00:00
success = True
2023-04-03 17:59:31 +00:00
if not cls._download_song_to_targets(source=sources[0], target=temp_target):
success = False
2023-04-03 17:59:31 +00:00
if not cls._post_process_targets(song, temp_target):
success = False
2023-04-03 17:59:31 +00:00
return success
2023-04-03 17:59:31 +00:00
@classmethod
2023-03-30 14:50:27 +00:00
def _post_process_targets(cls, song: Song, temp_target: Target):
write_metadata_to_target(song.metadata, temp_target)
2023-04-03 17:59:31 +00:00
2023-03-30 14:50:27 +00:00
target: Target
for target in song.target_collection:
temp_target.copy_content(target)
2023-03-30 12:39:28 +00:00
2023-02-06 14:06:38 +00:00
@classmethod
def _fetch_song_from_source(cls, source: Source, stop_at_level: int = 1) -> Song:
return Song()
2023-04-03 17:59:31 +00:00
@classmethod
def _fetch_album_from_source(cls, source: Source, stop_at_level: int = 1) -> Album:
2023-03-24 13:28:19 +00:00
return Album()
2023-03-20 13:40:32 +00:00
@classmethod
def _fetch_artist_from_source(cls, source: Source, stop_at_level: int = 1) -> Artist:
2023-03-20 13:40:32 +00:00
return Artist()
2023-03-24 14:58:21 +00:00
@classmethod
def _fetch_label_from_source(cls, source: Source, stop_at_level: int = 1) -> Label:
2023-03-24 13:28:19 +00:00
return Label()
2023-03-30 10:00:39 +00:00
@classmethod
def _get_type_of_url(cls, url: str) -> Optional[Union[Type[Song], Type[Album], Type[Artist], Type[Label]]]:
return None
2023-04-03 17:59:31 +00:00
2023-03-30 12:39:28 +00:00
@classmethod
2023-04-03 17:59:31 +00:00
def _download_song_to_targets(cls, source: Source, target: Target) -> Target:
2023-03-30 14:50:27 +00:00
return Target()