music-kraken-core/src/music_kraken/pages/abstract.py

372 lines
13 KiB
Python
Raw Normal View History

import logging
import random
2023-04-03 08:38:12 +00:00
from copy import copy
2023-05-03 12:43:08 +00:00
from typing import Optional, Union, Type, Dict, Set, List
2023-05-23 14:21:12 +00:00
import threading
2023-05-25 11:46:47 +00:00
from queue import Queue
import requests
from bs4 import BeautifulSoup
2023-04-20 17:45:29 +00:00
from ..connection import Connection
2023-03-10 09:13:35 +00:00
from ..objects import (
2023-01-23 13:53:35 +00:00
Song,
Source,
Album,
Artist,
Target,
2023-03-24 14:58:21 +00:00
DatabaseObject,
2023-03-20 13:40:32 +00:00
Options,
2023-03-24 13:28:19 +00:00
Collection,
2023-04-04 08:20:54 +00:00
Label,
2023-01-23 13:53:35 +00:00
)
2023-04-18 10:00:25 +00:00
from ..utils.enums.source import SourcePages
2023-04-18 10:14:34 +00:00
from ..utils.enums.album import AlbumType
from ..audio import write_metadata_to_target, correct_codec
from ..utils import shared
from ..utils.shared import DEFAULT_VALUES, DOWNLOAD_PATH, DOWNLOAD_FILE, THREADED
from ..utils.support_classes import Query, DownloadResult, DefaultTarget, EndThread, FinishedSearch
2023-04-04 17:17:58 +00:00
2023-05-23 16:09:53 +00:00
INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song]
INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]]
2023-05-24 06:50:56 +00:00
2023-05-23 16:09:53 +00:00
def _clean_music_object(music_object: INDEPENDENT_DB_OBJECTS, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
if type(music_object) == Label:
return _clean_label(label=music_object, collections=collections)
if type(music_object) == Artist:
return _clean_artist(artist=music_object, collections=collections)
if type(music_object) == Album:
return _clean_album(album=music_object, collections=collections)
if type(music_object) == Song:
return _clean_song(song=music_object, collections=collections)
def _clean_collection(collection: Collection, collection_dict: Dict[INDEPENDENT_DB_TYPES, Collection]):
if collection.element_type not in collection_dict:
return
for i, element in enumerate(collection):
r = collection_dict[collection.element_type].append(element, merge_into_existing=True)
collection[i] = r.current_element
if not r.was_the_same:
_clean_music_object(r.current_element, collection_dict)
def _clean_label(label: Label, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(label.current_artist_collection, collections)
_clean_collection(label.album_collection, collections)
def _clean_artist(artist: Artist, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(artist.main_album_collection, collections)
_clean_collection(artist.feature_song_collection, collections)
_clean_collection(artist.label_collection, collections)
def _clean_album(album: Album, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(album.label_collection, collections)
_clean_collection(album.song_collection, collections)
_clean_collection(album.artist_collection, collections)
def _clean_song(song: Song, collections: Dict[INDEPENDENT_DB_TYPES, Collection]):
_clean_collection(song.album_collection, collections)
_clean_collection(song.feature_artist_collection, collections)
_clean_collection(song.main_artist_collection, collections)
2023-05-24 07:16:29 +00:00
def clean_object(dirty_object: DatabaseObject) -> DatabaseObject:
if isinstance(dirty_object, INDEPENDENT_DB_OBJECTS):
2023-05-23 19:06:01 +00:00
collections = {
Label: Collection(element_type=Label),
Artist: Collection(element_type=Artist),
Album: Collection(element_type=Album),
Song: Collection(element_type=Song)
}
_clean_music_object(dirty_object, collections)
return dirty_object
2023-05-24 07:16:29 +00:00
def build_new_object(new_object: DatabaseObject) -> DatabaseObject:
new_object = clean_object(new_object)
new_object.compile(merge_into=False)
return new_object
2023-05-23 19:06:01 +00:00
2023-05-24 07:16:29 +00:00
def merge_together(old_object: DatabaseObject, new_object: DatabaseObject) -> DatabaseObject:
new_object = clean_object(new_object)
old_object.merge(new_object)
old_object.compile(merge_into=False)
return old_object
2023-05-23 19:06:01 +00:00
class LoreIpsum:
pass
Parent = threading.Thread if THREADED else LoreIpsum
class Page(Parent):
2023-01-23 13:53:35 +00:00
"""
This is an abstract class, laying out the
functionality for every other class fetching something
"""
2023-05-25 11:46:47 +00:00
2023-03-20 13:40:32 +00:00
SOURCE_TYPE: SourcePages
2023-05-23 14:21:12 +00:00
LOGGER = logging.getLogger("this shouldn't be used")
2023-05-23 08:49:52 +00:00
def __init__(self, end_event: EndThread, search_queue: Queue, search_result_queue: Queue):
self.end_event = end_event
2023-05-25 11:46:47 +00:00
self.search_queue = search_queue
self.search_result_queue = search_result_queue
Parent.__init__(self)
@property
def _empty_working_queues(self):
return self.search_queue.empty()
2023-05-23 16:09:53 +00:00
def run(self) -> None:
while bool(self.end_event) and self._empty_working_queues:
if not self.search_queue.empty():
self.search(self.search_queue.get())
self.search_result_queue.put(FinishedSearch())
continue
2023-05-24 06:50:56 +00:00
2023-05-24 08:12:03 +00:00
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
2023-05-24 06:50:56 +00:00
return None
2023-05-23 14:21:12 +00:00
2023-05-24 06:50:56 +00:00
def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup:
return BeautifulSoup(r.content, "html.parser")
2023-05-23 14:21:12 +00:00
# to search stuff
def search(self, query: Query) -> List[DatabaseObject]:
music_object = query.music_object
search_functions = {
Song: self.song_search,
Album: self.album_search,
Artist: self.artist_search,
Label: self.label_search
}
if type(music_object) in search_functions:
r = search_functions[type(music_object)](music_object)
if len(r) > 0:
return r
r = []
2023-05-23 08:49:52 +00:00
for default_query in query.default_search:
for single_option in self.general_search(default_query):
r.append(single_option)
self.search_result_queue.put(single_option)
2023-05-23 08:49:52 +00:00
2023-05-23 14:50:54 +00:00
return r
2023-05-23 14:21:12 +00:00
def general_search(self, search_query: str) -> List[DatabaseObject]:
return []
def label_search(self, label: Label) -> List[Label]:
return []
def artist_search(self, artist: Artist) -> List[Artist]:
return []
def album_search(self, album: Album) -> List[Album]:
2023-05-23 08:49:52 +00:00
return []
2023-05-23 14:21:12 +00:00
def song_search(self, song: Song) -> List[Song]:
return []
2023-01-23 13:53:35 +00:00
2023-05-23 19:06:01 +00:00
def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject:
2023-01-23 13:53:35 +00:00
"""
2023-05-23 19:06:01 +00:00
when a music object with lacking data is passed in, it returns
2023-01-23 13:53:35 +00:00
the SAME object **(no copy)** with more detailed data.
2023-05-23 16:11:56 +00:00
If you for example put in, an album, it fetches the tracklist
2023-01-23 13:53:35 +00:00
:param music_object:
2023-03-24 13:28:19 +00:00
:param stop_at_level:
This says the depth of the level the scraper will recurse to.
If this is for example set to 2, then the levels could be:
1. Level: the album
2. Level: every song of the album + every artist of the album
If no additional requests are needed to get the data one level below the supposed stop level
this gets ignored
2023-01-23 14:52:50 +00:00
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
2023-01-23 13:53:35 +00:00
"""
2023-04-03 17:59:31 +00:00
2023-05-23 19:06:01 +00:00
# creating a new object, of the same type
2023-03-24 14:58:21 +00:00
new_music_object: DatabaseObject = type(music_object)()
2023-05-23 19:06:01 +00:00
# only certain database objects, have a source list
2023-05-24 06:50:56 +00:00
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
2023-05-23 19:06:01 +00:00
source: Source
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE):
new_music_object.merge(
2023-05-24 08:12:03 +00:00
self.fetch_object_from_source(
source=source,
enforce_type=type(music_object),
stop_at_level=stop_at_level,
post_process=False
)
)
2023-03-29 09:34:58 +00:00
2023-05-24 07:16:29 +00:00
return merge_together(music_object, new_music_object)
2023-04-03 17:59:31 +00:00
2023-05-24 06:50:56 +00:00
def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]:
obj_type = self.get_source_type(source)
2023-03-30 10:00:39 +00:00
if obj_type is None:
2023-03-30 10:23:57 +00:00
return None
2023-05-24 06:50:56 +00:00
if enforce_type != obj_type and enforce_type is not None:
self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}")
return None
music_object: DatabaseObject = None
fetch_map = {
Song: self.fetch_song,
Album: self.fetch_album,
Artist: self.fetch_artist,
Label: self.fetch_label
}
if obj_type in fetch_map:
music_object = fetch_map[obj_type](source, stop_at_level)
2023-05-24 07:16:29 +00:00
else:
self.LOGGER.warning(f"Can't fetch details of type: {obj_type}")
return None
2023-04-03 17:59:31 +00:00
2023-05-24 07:16:29 +00:00
if post_process and music_object:
return build_new_object(music_object)
2023-04-16 12:36:33 +00:00
2023-03-30 10:09:36 +00:00
return music_object
2023-05-24 06:50:56 +00:00
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
return Song()
2023-03-20 13:40:32 +00:00
2023-05-24 06:50:56 +00:00
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
2023-04-03 17:59:31 +00:00
2023-05-24 06:50:56 +00:00
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
2023-01-23 14:52:50 +00:00
2023-05-24 06:50:56 +00:00
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()
2023-03-24 14:58:21 +00:00
2023-05-25 07:21:37 +00:00
def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False) -> DownloadResult:
naming_objects = {"genre": genre}
def fill_naming_objects(naming_music_object: DatabaseObject):
nonlocal naming_objects
for collection_name in naming_music_object.UPWARDS_COLLECTION_ATTRIBUTES:
collection: Collection = getattr(self, collection_name)
if collection.empty():
continue
if collection.element_type in naming_objects:
continue
dom_ordered_music_object: DatabaseObject = collection[0]
return fill_naming_objects(dom_ordered_music_object)
fill_naming_objects(music_object)
2023-05-25 09:21:39 +00:00
return self._download(music_object, {}, genre, download_all)
2023-05-24 23:27:05 +00:00
def _download(self, music_object: DatabaseObject, naming_objects: Dict[Type[DatabaseObject], DatabaseObject], download_all: bool = False) -> list:
# Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
if isinstance(music_object, Album):
if not download_all and music_object.album_type in shared.ALBUM_TYPE_BLACKLIST:
2023-05-25 09:21:39 +00:00
return DownloadResult()
2023-05-24 23:27:05 +00:00
2023-05-25 09:21:39 +00:00
self.fetch_details(music_object=music_object, stop_at_level=2)
2023-05-24 23:27:05 +00:00
naming_objects[type(music_object)] = music_object
if isinstance(music_object, Song):
2023-05-25 09:21:39 +00:00
return self._download_song(music_object, naming_objects)
2023-05-24 23:27:05 +00:00
2023-05-25 09:21:39 +00:00
download_result: DownloadResult = DownloadResult()
2023-05-24 23:27:05 +00:00
for collection_name in music_object.DOWNWARDS_COLLECTION_ATTRIBUTES:
collection: Collection = getattr(self, collection_name)
sub_ordered_music_object: DatabaseObject
for sub_ordered_music_object in collection:
2023-05-25 09:21:39 +00:00
download_result.merge(self._download(sub_ordered_music_object, naming_objects.copy(), download_all))
2023-05-24 23:27:05 +00:00
2023-05-25 09:21:39 +00:00
return download_result
2023-05-24 23:27:05 +00:00
2023-05-25 07:21:37 +00:00
def _download_song(self, song: Song, naming_objects: Dict[Type[DatabaseObject], DatabaseObject]):
name_attribute = DEFAULT_VALUES.copy()
# song
name_attribute["genre"] = naming_objects["genre"]
name_attribute["song"] = song.title
if Album in naming_objects:
album: Album = naming_objects[Album]
name_attribute["album"] = album.title
name_attribute["album_type"] = album.album_type.value
if Artist in naming_objects:
artist: Artist = naming_objects[Artist]
naming_objects["artist"] = artist.name
if Label in naming_objects:
label: Label = naming_objects[Label]
naming_objects["label"] = label.name
new_target = Target(
relative_to_music_dir=True,
path=DOWNLOAD_PATH.format(**name_attribute),
file=DOWNLOAD_FILE.format(**name_attribute)
)
2023-05-25 09:21:39 +00:00
2023-03-30 12:39:28 +00:00
if song.target_collection.empty:
2023-05-25 09:21:39 +00:00
song.target_collection.append(new_target)
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE)
2023-03-30 12:39:28 +00:00
if len(sources) == 0:
2023-05-25 09:21:39 +00:00
return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.")
2023-04-03 17:59:31 +00:00
2023-03-31 07:47:03 +00:00
temp_target: Target = Target(
path=shared.TEMP_DIR,
file=str(random.randint(0, 999999))
)
2023-05-25 09:21:39 +00:00
r = self._download_song_to_targets(source=sources[0], target=temp_target, desc=song.title)
2023-04-03 17:59:31 +00:00
2023-04-04 19:18:56 +00:00
if not r.is_fatal_error:
2023-05-25 09:21:39 +00:00
r.merge(self._post_process_targets(song, temp_target))
2023-04-05 09:54:02 +00:00
2023-04-04 18:58:22 +00:00
return r
2023-05-25 09:21:39 +00:00
def _post_process_targets(self, song: Song, temp_target: Target) -> DownloadResult:
correct_codec(temp_target)
2023-03-30 14:50:27 +00:00
write_metadata_to_target(song.metadata, temp_target)
2023-04-03 17:59:31 +00:00
2023-04-05 10:25:57 +00:00
r = DownloadResult()
2023-03-30 14:50:27 +00:00
target: Target
for target in song.target_collection:
2023-04-05 10:25:57 +00:00
if temp_target is not target:
temp_target.copy_content(target)
r.add_target(target)
2023-05-25 09:21:39 +00:00
temp_target.delete()
2023-04-05 10:25:57 +00:00
return r
2023-05-25 09:21:39 +00:00
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
2023-04-04 18:58:22 +00:00
return DownloadResult()