Merge branch 'HeIIow2:master' into master
This commit is contained in:
@@ -4,7 +4,9 @@ from . import (
|
||||
source,
|
||||
parents,
|
||||
formatted_text,
|
||||
album
|
||||
album,
|
||||
option,
|
||||
collection
|
||||
)
|
||||
|
||||
MusicObject = parents.DatabaseObject
|
||||
@@ -28,3 +30,6 @@ AlbumStatus = album.AlbumStatus
|
||||
Album = song.Album
|
||||
|
||||
FormattedText = formatted_text.FormattedText
|
||||
|
||||
Options = option.Options
|
||||
Collection = collection.Collection
|
||||
|
@@ -22,4 +22,5 @@ class AlbumType(Enum):
|
||||
LIVE_ALBUM = "Live Album"
|
||||
COMPILATION_ALBUM = "Compilation Album"
|
||||
MIXTAPE = "Mixtape"
|
||||
DEMO = "Demo"
|
||||
OTHER = "Other"
|
||||
|
110
src/music_kraken/objects/cache.py
Normal file
110
src/music_kraken/objects/cache.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional
|
||||
import weakref
|
||||
|
||||
from .parents import DatabaseObject
|
||||
|
||||
"""
|
||||
This is a cache for the objects, that et pulled out of the database.
|
||||
This is necessary, to not have duplicate objects with the same id.
|
||||
|
||||
Using a cache that maps the ojects to their id has multiple benefits:
|
||||
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
|
||||
- less ram usage
|
||||
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
|
||||
"""
|
||||
|
||||
|
||||
class ObjectCache:
|
||||
"""
|
||||
ObjectCache is a cache for the objects retrieved from a database.
|
||||
It maps each object to its id and uses weak references to manage its memory usage.
|
||||
Using a cache for these objects provides several benefits:
|
||||
|
||||
- Modifying an object updates all objects with the same id (due to copy by reference)
|
||||
- Reduced memory usage
|
||||
|
||||
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
|
||||
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
|
||||
|
||||
:method exists: Check if a DatabaseObject already exists in the cache.
|
||||
:method append: Add a DatabaseObject to the cache if it does not already exist.
|
||||
:method extent: Add a list of DatabaseObjects to the cache.
|
||||
:method remove: Remove a DatabaseObject from the cache by its id.
|
||||
:method get: Retrieve a DatabaseObject from the cache by its id. """
|
||||
object_to_id: Dict[str, DatabaseObject]
|
||||
weakref_map: Dict[weakref.ref, str]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.object_to_id = dict()
|
||||
self.weakref_map = defaultdict()
|
||||
|
||||
def exists(self, database_object: DatabaseObject) -> bool:
|
||||
"""
|
||||
Check if a DatabaseObject with the same id already exists in the cache.
|
||||
|
||||
:param database_object: The DatabaseObject to check for.
|
||||
:return: True if the DatabaseObject exists, False otherwise.
|
||||
"""
|
||||
if database_object.dynamic:
|
||||
return True
|
||||
return database_object.id in self.object_to_id
|
||||
|
||||
def on_death(self, weakref_: weakref.ref) -> None:
|
||||
"""
|
||||
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
|
||||
This function removes the DatabaseObject from the cache.
|
||||
|
||||
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
|
||||
"""
|
||||
data_id = self.weakref_map.pop(weakref_)
|
||||
self.object_to_id.pop(data_id)
|
||||
|
||||
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
|
||||
return weakref.ref(database_object, self.on_death)
|
||||
|
||||
|
||||
def append(self, database_object: DatabaseObject) -> bool:
|
||||
"""
|
||||
Add a DatabaseObject to the cache.
|
||||
|
||||
:param database_object: The DatabaseObject to add to the cache.
|
||||
:return: True if the DatabaseObject already exists in the cache, False otherwise.
|
||||
"""
|
||||
if self.exists(database_object):
|
||||
return True
|
||||
|
||||
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
|
||||
self.object_to_id[database_object.id] = database_object
|
||||
|
||||
return False
|
||||
|
||||
def extent(self, database_object_list: List[DatabaseObject]):
|
||||
"""
|
||||
adjacent to the extent method of list, this appends n Object
|
||||
"""
|
||||
for database_object in database_object_list:
|
||||
self.append(database_object)
|
||||
|
||||
def remove(self, _id: str):
|
||||
"""
|
||||
Remove a DatabaseObject from the cache.
|
||||
|
||||
:param _id: The id of the DatabaseObject to remove from the cache.
|
||||
"""
|
||||
data = self.object_to_id.get(_id)
|
||||
if data:
|
||||
self.weakref_map.pop(weakref.ref(data))
|
||||
self.object_to_id.pop(_id)
|
||||
|
||||
def __getitem__(self, item) -> Optional[DatabaseObject]:
|
||||
"""
|
||||
this returns the data obj
|
||||
:param item: the id of the music object
|
||||
:return:
|
||||
"""
|
||||
|
||||
return self.object_to_id.get(item)
|
||||
|
||||
def get(self, _id: str) -> Optional[DatabaseObject]:
|
||||
return self.__getitem__(_id)
|
@@ -33,6 +33,7 @@ class Collection:
|
||||
```
|
||||
"""
|
||||
self._attribute_to_object_map: Dict[str, Dict[object, DatabaseObject]] = defaultdict(dict)
|
||||
self._used_ids: set = set()
|
||||
|
||||
if data is not None:
|
||||
self.extend(data, merge_on_conflict=True)
|
||||
@@ -46,12 +47,27 @@ class Collection:
|
||||
continue
|
||||
|
||||
self._attribute_to_object_map[name][value] = element
|
||||
|
||||
self._used_ids.add(element.id)
|
||||
|
||||
def unmap_element(self, element: DatabaseObject):
|
||||
for name, value in element.indexing_values:
|
||||
if value is None:
|
||||
continue
|
||||
|
||||
if value in self._attribute_to_object_map[name]:
|
||||
if element is self._attribute_to_object_map[name][value]:
|
||||
try:
|
||||
self._attribute_to_object_map[name].pop(value)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def append(self, element: DatabaseObject, merge_on_conflict: bool = True):
|
||||
def append(self, element: DatabaseObject, merge_on_conflict: bool = True, merge_into_existing: bool = True) -> bool:
|
||||
"""
|
||||
:param element:
|
||||
:param merge_on_conflict:
|
||||
:return:
|
||||
:param merge_into_existing:
|
||||
:return did_not_exist:
|
||||
"""
|
||||
|
||||
# if the element type has been defined in the initializer it checks if the type matches
|
||||
@@ -60,17 +76,30 @@ class Collection:
|
||||
|
||||
for name, value in element.indexing_values:
|
||||
if value in self._attribute_to_object_map[name]:
|
||||
existing_object = self._attribute_to_object_map[name][value]
|
||||
|
||||
if merge_on_conflict:
|
||||
# if the object does already exist
|
||||
# thus merging and don't add it afterwards
|
||||
existing_object = self._attribute_to_object_map[name][value]
|
||||
existing_object.merge(element)
|
||||
# in case any relevant data has been added (e.g. it remaps the old object)
|
||||
self.map_element(existing_object)
|
||||
return
|
||||
if merge_into_existing:
|
||||
existing_object.merge(element)
|
||||
# in case any relevant data has been added (e.g. it remaps the old object)
|
||||
self.map_element(existing_object)
|
||||
else:
|
||||
element.merge(existing_object)
|
||||
|
||||
exists_at = self._data.index(existing_object)
|
||||
self._data[exists_at] = element
|
||||
|
||||
self.unmap_element(existing_object)
|
||||
self.map_element(element)
|
||||
|
||||
return False
|
||||
|
||||
self._data.append(element)
|
||||
self.map_element(element)
|
||||
|
||||
return True
|
||||
|
||||
def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True):
|
||||
for element in element_list:
|
||||
|
@@ -10,6 +10,10 @@ https://pandoc.org/installing.html
|
||||
|
||||
|
||||
class FormattedText:
|
||||
"""
|
||||
the self.html value should be saved to the database
|
||||
"""
|
||||
|
||||
doc = None
|
||||
|
||||
def __init__(
|
||||
@@ -39,35 +43,38 @@ class FormattedText:
|
||||
|
||||
def get_markdown(self) -> str:
|
||||
if self.doc is None:
|
||||
return None
|
||||
return ""
|
||||
return pandoc.write(self.doc, format="markdown").strip()
|
||||
|
||||
def get_html(self) -> str:
|
||||
if self.doc is None:
|
||||
return None
|
||||
return ""
|
||||
return pandoc.write(self.doc, format="html").strip()
|
||||
|
||||
def get_plaintext(self) -> str:
|
||||
if self.doc is None:
|
||||
return None
|
||||
return ""
|
||||
return pandoc.write(self.doc, format="plain").strip()
|
||||
|
||||
|
||||
@property
|
||||
def json(self) -> str:
|
||||
if self.doc is None:
|
||||
return None
|
||||
return pandoc.write(self.doc, format="json")
|
||||
def is_empty(self) -> bool:
|
||||
return self.doc is None
|
||||
|
||||
def __eq__(self, other) -> False:
|
||||
if type(other) != type(self):
|
||||
return False
|
||||
if self.is_empty and other.is_empty:
|
||||
return True
|
||||
|
||||
return self.doc == other.doc
|
||||
|
||||
|
||||
|
||||
plaintext = property(fget=get_plaintext, fset=set_plaintext)
|
||||
markdown = property(fget=get_markdown, fset=set_markdown)
|
||||
html = property(fget=get_html, fset=set_html)
|
||||
|
||||
|
||||
class NotesAttributes:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_plaintext = """
|
||||
World of Work
|
||||
|
@@ -1,14 +1,19 @@
|
||||
from typing import List
|
||||
|
||||
from collections import defaultdict
|
||||
import pycountry
|
||||
|
||||
from .parents import DatabaseObject
|
||||
from .source import Source, SourceCollection
|
||||
from .metadata import Metadata
|
||||
from .formatted_text import FormattedText
|
||||
|
||||
|
||||
class Lyrics(DatabaseObject):
|
||||
COLLECTION_ATTRIBUTES = ("source_collection",)
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"text": FormattedText(),
|
||||
"language": None
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
text: FormattedText,
|
||||
@@ -18,9 +23,9 @@ class Lyrics(DatabaseObject):
|
||||
source_list: List[Source] = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
DatabaseObject.__init__(self, _id=_id, dynamic=dynamic)
|
||||
DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
|
||||
|
||||
self.text: FormattedText = text
|
||||
self.text: FormattedText = text or FormattedText()
|
||||
self.language: pycountry.Languages = language
|
||||
|
||||
self.source_collection: SourceCollection = SourceCollection(source_list)
|
||||
|
@@ -154,6 +154,8 @@ class ID3Timestamp:
|
||||
return self.date_obj >= other.date_obj
|
||||
|
||||
def __eq__(self, other):
|
||||
if type(other) != type(self):
|
||||
return False
|
||||
return self.date_obj == other.date_obj
|
||||
|
||||
def get_time_format(self) -> str:
|
||||
|
@@ -21,8 +21,10 @@ class Options:
|
||||
|
||||
return self._data[index].options
|
||||
|
||||
def __getitem__(self, item: int) -> 'Options':
|
||||
def __getitem__(self, item: int) -> 'DatabaseObject':
|
||||
if type(item) != int:
|
||||
raise TypeError("Key needs to be an Integer")
|
||||
if item >= len(self._data):
|
||||
raise ValueError("Index out of bounds")
|
||||
|
||||
return self.get_next_options(item)
|
||||
return self._data[item]
|
||||
|
@@ -11,15 +11,18 @@ from .option import Options
|
||||
|
||||
class DatabaseObject:
|
||||
COLLECTION_ATTRIBUTES: tuple = tuple()
|
||||
SIMPLE_ATTRIBUTES: tuple = tuple()
|
||||
|
||||
SIMPLE_ATTRIBUTES: dict = dict()
|
||||
|
||||
def __init__(self, _id: str = None, dynamic: bool = False, **kwargs) -> None:
|
||||
self.automatic_id: bool = False
|
||||
|
||||
if _id is None and not dynamic:
|
||||
"""
|
||||
generates a random UUID
|
||||
https://docs.python.org/3/library/uuid.html
|
||||
"""
|
||||
_id = str(uuid.uuid4())
|
||||
self.automatic_id = True
|
||||
LOGGER.debug(f"id for {type(self).__name__} isn't set. Setting to {_id}")
|
||||
|
||||
# The id can only be None, if the object is dynamic (self.dynamic = True)
|
||||
@@ -43,7 +46,7 @@ class DatabaseObject:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
@property
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
"""
|
||||
@@ -53,9 +56,9 @@ class DatabaseObject:
|
||||
Returns:
|
||||
List[Tuple[str, object]]: the first element in the tuple is the name of the attribute, the second the value.
|
||||
"""
|
||||
|
||||
|
||||
return list()
|
||||
|
||||
|
||||
def merge(self, other, override: bool = False):
|
||||
if not isinstance(other, type(self)):
|
||||
LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"")
|
||||
@@ -64,11 +67,11 @@ class DatabaseObject:
|
||||
for collection in type(self).COLLECTION_ATTRIBUTES:
|
||||
getattr(self, collection).extend(getattr(other, collection))
|
||||
|
||||
for simple_attribute in type(self).SIMPLE_ATTRIBUTES:
|
||||
if getattr(other, simple_attribute) is None:
|
||||
for simple_attribute, default_value in type(self).SIMPLE_ATTRIBUTES.items():
|
||||
if getattr(other, simple_attribute) == default_value:
|
||||
continue
|
||||
|
||||
if override or getattr(self, simple_attribute) is None:
|
||||
if override or getattr(self, simple_attribute) == default_value:
|
||||
setattr(self, simple_attribute, getattr(other, simple_attribute))
|
||||
|
||||
@property
|
||||
@@ -83,6 +86,18 @@ class DatabaseObject:
|
||||
def option_string(self) -> str:
|
||||
return self.__repr__()
|
||||
|
||||
def compile(self) -> bool:
|
||||
"""
|
||||
compiles the recursive structures,
|
||||
|
||||
Args:
|
||||
traceback (set, optional): Defaults to an empty set.
|
||||
|
||||
Returns:
|
||||
bool: returns true if id has been found in set
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MainObject(DatabaseObject):
|
||||
"""
|
||||
@@ -95,7 +110,7 @@ class MainObject(DatabaseObject):
|
||||
It has all the functionality of the "DatabaseObject" (it inherits from said class)
|
||||
but also some added functions as well.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, _id: str = None, dynamic: bool = False, **kwargs):
|
||||
DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
from typing import List, Optional, Dict, Tuple
|
||||
import pycountry
|
||||
from collections import defaultdict
|
||||
|
||||
from .metadata import (
|
||||
Mapping as id3Mapping,
|
||||
@@ -46,7 +47,15 @@ class Song(MainObject):
|
||||
COLLECTION_ATTRIBUTES = (
|
||||
"lyrics_collection", "album_collection", "main_artist_collection", "feature_artist_collection",
|
||||
"source_collection")
|
||||
SIMPLE_ATTRIBUTES = ("title", "unified_title", "isrc", "length", "tracksort", "genre")
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"title": None,
|
||||
"unified_title": None,
|
||||
"isrc": None,
|
||||
"length": None,
|
||||
"tracksort": 0,
|
||||
"genre": None,
|
||||
"notes": FormattedText()
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -64,17 +73,21 @@ class Song(MainObject):
|
||||
album_list: List['Album'] = None,
|
||||
main_artist_list: List['Artist'] = None,
|
||||
feature_artist_list: List['Artist'] = None,
|
||||
notes: FormattedText = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
|
||||
# attributes
|
||||
self.title: str = title
|
||||
self.unified_title: str = unified_title or unify(title)
|
||||
self.unified_title: str = unified_title
|
||||
if unified_title is None and title is not None:
|
||||
self.unified_title = unify(title)
|
||||
|
||||
self.isrc: str = isrc
|
||||
self.length: int = length
|
||||
self.tracksort: int = tracksort or 0
|
||||
self.genre: str = genre
|
||||
self.notes: FormattedText = notes or FormattedText()
|
||||
|
||||
self.source_collection: SourceCollection = SourceCollection(source_list)
|
||||
self.target_collection: Collection = Collection(data=target_list, element_type=Target)
|
||||
@@ -83,6 +96,22 @@ class Song(MainObject):
|
||||
self.main_artist_collection = Collection(data=main_artist_list, element_type=Artist)
|
||||
self.feature_artist_collection = Collection(data=feature_artist_list, element_type=Artist)
|
||||
|
||||
def compile(self):
|
||||
album: Album
|
||||
for album in self.album_collection:
|
||||
if album.song_collection.append(self, merge_into_existing=False):
|
||||
album.compile()
|
||||
|
||||
artist: Artist
|
||||
for artist in self.feature_artist_collection:
|
||||
if artist.feature_song_collection.append(self, merge_into_existing=False):
|
||||
artist.compile()
|
||||
|
||||
for artist in self.main_artist_collection:
|
||||
for album in self.album_collection:
|
||||
if artist.main_album_collection.append(album, merge_into_existing=False):
|
||||
artist.compile()
|
||||
|
||||
@property
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
return [
|
||||
@@ -166,7 +195,17 @@ All objects dependent on Album
|
||||
|
||||
class Album(MainObject):
|
||||
COLLECTION_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection")
|
||||
SIMPLE_ATTRIBUTES = ("title", "album_status", "album_type", "language", "date", "barcode", "albumsort")
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"title": None,
|
||||
"unified_title": None,
|
||||
"album_status": None,
|
||||
"album_type": AlbumType.OTHER,
|
||||
"language": None,
|
||||
"date": ID3Timestamp(),
|
||||
"barcode": None,
|
||||
"albumsort": None,
|
||||
"notes": FormattedText()
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -184,15 +223,18 @@ class Album(MainObject):
|
||||
album_status: AlbumStatus = None,
|
||||
album_type: AlbumType = None,
|
||||
label_list: List['Label'] = None,
|
||||
notes: FormattedText = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
|
||||
|
||||
self.title: str = title
|
||||
self.unified_title: str = unified_title or unify(self.title)
|
||||
self.unified_title: str = unified_title
|
||||
if unified_title is None and title is not None:
|
||||
self.unified_title = unify(title)
|
||||
|
||||
self.album_status: AlbumStatus = album_status
|
||||
self.album_type: AlbumType = album_type
|
||||
self.album_type: AlbumType = album_type or AlbumType.OTHER
|
||||
self.language: pycountry.Languages = language
|
||||
self.date: ID3Timestamp = date or ID3Timestamp()
|
||||
|
||||
@@ -208,12 +250,29 @@ class Album(MainObject):
|
||||
to set albumsort with help of the release year
|
||||
"""
|
||||
self.albumsort: Optional[int] = albumsort
|
||||
self.notes = notes or FormattedText()
|
||||
|
||||
self.source_collection: SourceCollection = SourceCollection(source_list)
|
||||
self.song_collection: Collection = Collection(data=song_list, element_type=Song)
|
||||
self.artist_collection: Collection = Collection(data=artist_list, element_type=Artist)
|
||||
self.label_collection: Collection = Collection(data=label_list, element_type=Label)
|
||||
|
||||
def compile(self):
|
||||
song: Song
|
||||
for song in self.song_collection:
|
||||
if song.album_collection.append(self, merge_into_existing=False):
|
||||
song.compile()
|
||||
|
||||
artist: Artist
|
||||
for artist in self.artist_collection:
|
||||
if artist.main_album_collection.append(self, merge_into_existing=False):
|
||||
artist.compile()
|
||||
|
||||
label: Label
|
||||
for label in self.label_collection:
|
||||
if label.album_collection.append(self, merge_into_existing=False):
|
||||
label.compile()
|
||||
|
||||
@property
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
return [
|
||||
@@ -309,16 +368,23 @@ class Album(MainObject):
|
||||
return len(self.artist_collection) > 1
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
All objects dependent on Artist
|
||||
"""
|
||||
|
||||
|
||||
class Artist(MainObject):
|
||||
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection")
|
||||
SIMPLE_ATTRIBUTES = ("name", "name", "country", "formed_in", "notes", "lyrical_themes", "general_genre")
|
||||
COLLECTION_ATTRIBUTES = (
|
||||
"feature_song_collection", "main_album_collection", "label_collection", "source_collection")
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"name": None,
|
||||
"unified_name": None,
|
||||
"country": None,
|
||||
"formed_in": ID3Timestamp(),
|
||||
"notes": FormattedText(),
|
||||
"lyrical_themes": [],
|
||||
"general_genre": ""
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -340,7 +406,9 @@ class Artist(MainObject):
|
||||
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
|
||||
|
||||
self.name: str = name
|
||||
self.unified_name: str = unified_name or unify(self.name)
|
||||
self.unified_name: str = unified_name
|
||||
if unified_name is None and name is not None:
|
||||
self.unified_name = unify(name)
|
||||
|
||||
"""
|
||||
TODO implement album type and notes
|
||||
@@ -365,6 +433,22 @@ class Artist(MainObject):
|
||||
self.main_album_collection: Collection = Collection(data=main_album_list, element_type=Album)
|
||||
self.label_collection: Collection = Collection(data=label_list, element_type=Label)
|
||||
|
||||
def compile(self):
|
||||
song: "Song"
|
||||
for song in self.feature_song_collection:
|
||||
if song.feature_artist_collection.append(self, merge_into_existing=False):
|
||||
song.compile()
|
||||
|
||||
album: "Album"
|
||||
for album in self.main_album_collection:
|
||||
if album.artist_collection.append(self, merge_into_existing=False):
|
||||
album.compile()
|
||||
|
||||
label: Label
|
||||
for label in self.label_collection:
|
||||
if label.current_artist_collection.append(self, merge_into_existing=False):
|
||||
label.compile()
|
||||
|
||||
@property
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
return [
|
||||
@@ -463,7 +547,11 @@ Label
|
||||
|
||||
class Label(MainObject):
|
||||
COLLECTION_ATTRIBUTES = ("album_collection", "current_artist_collection")
|
||||
SIMPLE_ATTRIBUTES = ("name",)
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"name": None,
|
||||
"unified_name": None,
|
||||
"notes": FormattedText()
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -471,6 +559,7 @@ class Label(MainObject):
|
||||
dynamic: bool = False,
|
||||
name: str = None,
|
||||
unified_name: str = None,
|
||||
notes: FormattedText = None,
|
||||
album_list: List[Album] = None,
|
||||
current_artist_list: List[Artist] = None,
|
||||
source_list: List[Source] = None,
|
||||
@@ -479,12 +568,26 @@ class Label(MainObject):
|
||||
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
|
||||
|
||||
self.name: str = name
|
||||
self.unified_name: str = unified_name or unify(self.name)
|
||||
self.unified_name: str = unified_name
|
||||
if unified_name is None and name is not None:
|
||||
self.unified_name = unify(name)
|
||||
self.notes = notes or FormattedText()
|
||||
|
||||
self.source_collection: SourceCollection = SourceCollection(source_list)
|
||||
self.album_collection: Collection = Collection(data=album_list, element_type=Album)
|
||||
self.current_artist_collection: Collection = Collection(data=current_artist_list, element_type=Artist)
|
||||
|
||||
def compile(self) -> bool:
|
||||
album: Album
|
||||
for album in self.album_collection:
|
||||
if album.label_collection.append(self, merge_into_existing=False):
|
||||
album.compile()
|
||||
|
||||
artist: Artist
|
||||
for artist in self.current_artist_collection:
|
||||
if artist.label_collection.append(self, merge_into_existing=False):
|
||||
artist.compile()
|
||||
|
||||
@property
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
return [
|
||||
@@ -497,4 +600,4 @@ class Label(MainObject):
|
||||
def options(self) -> Options:
|
||||
options = [self]
|
||||
options.extend(self.current_artist_collection.shallow_list)
|
||||
options.extend(self.album_collection.shallow_list)
|
||||
options.extend(self.album_collection.shallow_list)
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Tuple
|
||||
from typing import List, Dict, Tuple, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .metadata import Mapping, Metadata
|
||||
from .parents import DatabaseObject
|
||||
@@ -25,9 +26,11 @@ class SourcePages(Enum):
|
||||
SPOTIFY = "spotify"
|
||||
|
||||
# This has nothing to do with audio, but bands can be here
|
||||
WIKIPEDIA = "wikipedia"
|
||||
INSTAGRAM = "instagram"
|
||||
FACEBOOK = "facebook"
|
||||
TWITTER = "twitter" # I will use nitter though lol
|
||||
TWITTER = "twitter" # I will use nitter though lol
|
||||
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
|
||||
|
||||
@classmethod
|
||||
def get_homepage(cls, attribute) -> str:
|
||||
@@ -42,7 +45,9 @@ class SourcePages(Enum):
|
||||
cls.INSTAGRAM: "https://www.instagram.com/",
|
||||
cls.FACEBOOK: "https://www.facebook.com/",
|
||||
cls.SPOTIFY: "https://open.spotify.com/",
|
||||
cls.TWITTER: "https://twitter.com/"
|
||||
cls.TWITTER: "https://twitter.com/",
|
||||
cls.MYSPACE: "https://myspace.com/",
|
||||
cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page"
|
||||
}
|
||||
return homepage_map[attribute]
|
||||
|
||||
@@ -55,6 +60,12 @@ class Source(DatabaseObject):
|
||||
Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")
|
||||
```
|
||||
"""
|
||||
COLLECTION_ATTRIBUTES = tuple()
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"type_enum": None,
|
||||
"page_enum": None,
|
||||
"url": None
|
||||
}
|
||||
|
||||
def __init__(self, page_enum: SourcePages, url: str, id_: str = None, type_enum=None) -> None:
|
||||
DatabaseObject.__init__(self, id_=id_)
|
||||
@@ -65,11 +76,14 @@ class Source(DatabaseObject):
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def match_url(cls, url: str):
|
||||
def match_url(cls, url: str) -> Optional["Source"]:
|
||||
"""
|
||||
this shouldn't be used, unlesse you are not certain what the source is for
|
||||
the reason is that it is more inefficient
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
url = parsed.geturl()
|
||||
|
||||
if url.startswith("https://www.youtube"):
|
||||
return cls(SourcePages.YOUTUBE, url)
|
||||
|
||||
@@ -82,6 +96,9 @@ class Source(DatabaseObject):
|
||||
if "bandcamp" in url:
|
||||
return cls(SourcePages.BANDCAMP, url)
|
||||
|
||||
if "wikipedia" in parsed.netloc:
|
||||
return cls(SourcePages.WIKIPEDIA, url)
|
||||
|
||||
if url.startswith("https://www.metal-archives.com/"):
|
||||
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url)
|
||||
|
||||
@@ -95,6 +112,9 @@ class Source(DatabaseObject):
|
||||
if url.startswith("https://twitter"):
|
||||
return cls(SourcePages.TWITTER, url)
|
||||
|
||||
if url.startswith("https://myspace.com"):
|
||||
return cls(SourcePages.MYSPACE, url)
|
||||
|
||||
def get_song_metadata(self) -> Metadata:
|
||||
return Metadata({
|
||||
Mapping.FILE_WEBPAGE_URL: [self.url],
|
||||
@@ -151,4 +171,4 @@ class SourceCollection(Collection):
|
||||
getting the sources for a specific page like
|
||||
YouTube or musify
|
||||
"""
|
||||
return self._page_to_source_list[source_page]
|
||||
return self._page_to_source_list[source_page].copy()
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from typing import Optional, List, Tuple
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
from ..utils import shared
|
||||
from .parents import DatabaseObject
|
||||
@@ -14,7 +15,11 @@ class Target(DatabaseObject):
|
||||
```
|
||||
"""
|
||||
|
||||
SIMPLE_ATTRIBUTES = ("_file", "_path")
|
||||
SIMPLE_ATTRIBUTES = {
|
||||
"_file": None,
|
||||
"_path": None
|
||||
}
|
||||
COLLECTION_ATTRIBUTES = tuple()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@@ -1,11 +1,14 @@
|
||||
from .encyclopaedia_metallum import EncyclopaediaMetallum
|
||||
from .musify import Musify
|
||||
|
||||
EncyclopaediaMetallum = EncyclopaediaMetallum
|
||||
Musify = Musify
|
||||
|
||||
MetadataPages = {
|
||||
EncyclopaediaMetallum
|
||||
EncyclopaediaMetallum,
|
||||
Musify
|
||||
}
|
||||
|
||||
AudioPages = {
|
||||
|
||||
Musify
|
||||
}
|
||||
|
@@ -1,6 +1,10 @@
|
||||
from typing import (
|
||||
List
|
||||
)
|
||||
from typing import Optional
|
||||
import requests
|
||||
import logging
|
||||
|
||||
LOGGER = logging.getLogger("this shouldn't be used")
|
||||
|
||||
from ..utils import shared
|
||||
|
||||
from ..objects import (
|
||||
Song,
|
||||
@@ -9,7 +13,10 @@ from ..objects import (
|
||||
Artist,
|
||||
Lyrics,
|
||||
Target,
|
||||
MusicObject
|
||||
MusicObject,
|
||||
Options,
|
||||
SourcePages,
|
||||
Collection
|
||||
)
|
||||
|
||||
|
||||
@@ -18,6 +25,50 @@ class Page:
|
||||
This is an abstract class, laying out the
|
||||
functionality for every other class fetching something
|
||||
"""
|
||||
API_SESSION: requests.Session = requests.Session()
|
||||
API_SESSION.proxies = shared.proxies
|
||||
TIMEOUT = 5
|
||||
TRIES = 5
|
||||
|
||||
SOURCE_TYPE: SourcePages
|
||||
|
||||
@classmethod
|
||||
def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
|
||||
requests.Request]:
|
||||
try:
|
||||
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT)
|
||||
except requests.exceptions.Timeout:
|
||||
return None
|
||||
|
||||
if r.status_code in accepted_response_codes:
|
||||
return r
|
||||
|
||||
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})")
|
||||
LOGGER.debug(r.content)
|
||||
|
||||
if trie <= cls.TRIES:
|
||||
LOGGER.warning("to many tries. Aborting.")
|
||||
|
||||
return cls.get_request(url, accepted_response_codes, trie + 1)
|
||||
|
||||
@classmethod
|
||||
def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
|
||||
requests.Request]:
|
||||
try:
|
||||
r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT)
|
||||
except requests.exceptions.Timeout:
|
||||
return None
|
||||
|
||||
if r.status_code in accepted_response_codes:
|
||||
return r
|
||||
|
||||
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})")
|
||||
LOGGER.debug(r.content)
|
||||
|
||||
if trie <= cls.TRIES:
|
||||
LOGGER.warning("to many tries. Aborting.")
|
||||
|
||||
return cls.post_request(url, accepted_response_codes, trie + 1)
|
||||
|
||||
class Query:
|
||||
def __init__(self, query: str):
|
||||
@@ -69,7 +120,7 @@ class Page:
|
||||
song_str = property(fget=lambda self: self.get_str(self.song))
|
||||
|
||||
@classmethod
|
||||
def search_by_query(cls, query: str) -> List[MusicObject]:
|
||||
def search_by_query(cls, query: str) -> Options:
|
||||
"""
|
||||
# The Query
|
||||
You can define a new parameter with "#",
|
||||
@@ -84,7 +135,7 @@ class Page:
|
||||
:return possible_music_objects:
|
||||
"""
|
||||
|
||||
return []
|
||||
return Options()
|
||||
|
||||
@classmethod
|
||||
def fetch_details(cls, music_object: MusicObject, flat: bool = False) -> MusicObject:
|
||||
@@ -102,16 +153,26 @@ class Page:
|
||||
"""
|
||||
|
||||
if type(music_object) == Song:
|
||||
return cls.fetch_song_details(music_object, flat=flat)
|
||||
|
||||
song = cls.fetch_song_details(music_object, flat=flat)
|
||||
song.compile()
|
||||
return song
|
||||
|
||||
if type(music_object) == Album:
|
||||
return cls.fetch_album_details(music_object, flat=flat)
|
||||
album = cls.fetch_album_details(music_object, flat=flat)
|
||||
album.compile()
|
||||
return album
|
||||
|
||||
if type(music_object) == Artist:
|
||||
return cls.fetch_artist_details(music_object, flat=flat)
|
||||
artist = cls.fetch_artist_details(music_object, flat=flat)
|
||||
artist.compile()
|
||||
return artist
|
||||
|
||||
raise NotImplementedError(f"MusicObject {type(music_object)} has not been implemented yet")
|
||||
|
||||
@classmethod
|
||||
def fetch_song_from_source(cls, source: Source, flat: bool = False) -> Song:
|
||||
return Song()
|
||||
|
||||
@classmethod
|
||||
def fetch_song_details(cls, song: Song, flat: bool = False) -> Song:
|
||||
"""
|
||||
@@ -127,9 +188,18 @@ class Page:
|
||||
|
||||
:return detailed_song: it modifies the input song
|
||||
"""
|
||||
|
||||
source: Source
|
||||
for source in song.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||
new_song = cls.fetch_song_from_source(source, flat)
|
||||
song.merge(new_song)
|
||||
|
||||
return song
|
||||
|
||||
@classmethod
|
||||
def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album:
|
||||
return Album()
|
||||
|
||||
@classmethod
|
||||
def fetch_album_details(cls, album: Album, flat: bool = False) -> Album:
|
||||
"""
|
||||
@@ -147,8 +217,17 @@ class Page:
|
||||
:return detailed_artist: it modifies the input artist
|
||||
"""
|
||||
|
||||
source: Source
|
||||
for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||
new_album: Album = cls.fetch_album_from_source(source, flat)
|
||||
album.merge(new_album)
|
||||
|
||||
return album
|
||||
|
||||
@classmethod
|
||||
def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
|
||||
return Artist()
|
||||
|
||||
@classmethod
|
||||
def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist:
|
||||
"""
|
||||
@@ -163,5 +242,10 @@ class Page:
|
||||
|
||||
:return detailed_artist: it modifies the input artist
|
||||
"""
|
||||
|
||||
source: Source
|
||||
for source in artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||
new_artist: Artist = cls.fetch_artist_from_source(source, flat)
|
||||
artist.merge(new_artist)
|
||||
|
||||
return artist
|
||||
|
@@ -17,7 +17,8 @@ from ..objects import (
|
||||
Album,
|
||||
ID3Timestamp,
|
||||
FormattedText,
|
||||
Label
|
||||
Label,
|
||||
Options
|
||||
)
|
||||
from ..utils import (
|
||||
string_processing
|
||||
@@ -34,7 +35,7 @@ class EncyclopaediaMetallum(Page):
|
||||
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
|
||||
|
||||
@classmethod
|
||||
def search_by_query(cls, query: str) -> List[MusicObject]:
|
||||
def search_by_query(cls, query: str) -> Options:
|
||||
query_obj = cls.Query(query)
|
||||
|
||||
if query_obj.is_raw:
|
||||
@@ -42,14 +43,14 @@ class EncyclopaediaMetallum(Page):
|
||||
return cls.advanced_search(query_obj)
|
||||
|
||||
@classmethod
|
||||
def advanced_search(cls, query: Page.Query) -> List[MusicObject]:
|
||||
def advanced_search(cls, query: Page.Query) -> Options:
|
||||
if query.song is not None:
|
||||
return cls.search_for_song(query=query)
|
||||
return Options(cls.search_for_song(query=query))
|
||||
if query.album is not None:
|
||||
return cls.search_for_album(query=query)
|
||||
return Options(cls.search_for_album(query=query))
|
||||
if query.artist is not None:
|
||||
return cls.search_for_artist(query=query)
|
||||
return []
|
||||
return Options(cls.search_for_artist(query=query))
|
||||
return Options
|
||||
|
||||
@classmethod
|
||||
def search_for_song(cls, query: Page.Query) -> List[Song]:
|
||||
|
813
src/music_kraken/pages/html/musify/album_overview.html
Normal file
813
src/music_kraken/pages/html/musify/album_overview.html
Normal file
File diff suppressed because one or more lines are too long
887
src/music_kraken/pages/musify.py
Normal file
887
src/music_kraken/pages/musify.py
Normal file
@@ -0,0 +1,887 @@
|
||||
from collections import defaultdict
|
||||
from typing import List, Optional, Union
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pycountry
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..utils.shared import (
|
||||
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
|
||||
)
|
||||
|
||||
from .abstract import Page
|
||||
from ..objects import (
|
||||
MusicObject,
|
||||
Artist,
|
||||
Source,
|
||||
SourcePages,
|
||||
Song,
|
||||
Album,
|
||||
ID3Timestamp,
|
||||
FormattedText,
|
||||
Label,
|
||||
Options,
|
||||
AlbumType,
|
||||
AlbumStatus
|
||||
)
|
||||
from ..utils import (
|
||||
string_processing,
|
||||
shared
|
||||
)
|
||||
from ..utils.shared import (
|
||||
MUSIFY_LOGGER as LOGGER
|
||||
)
|
||||
|
||||
"""
|
||||
https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
|
||||
https://musify.club/artist/ghost-bath-280348/releases?_pjax=#bodyContent
|
||||
https://musify.club/artist/ghost-bath-280348/clips?_pjax=#bodyContent
|
||||
https://musify.club/artist/ghost-bath-280348/photos?_pjax=#bodyContent
|
||||
|
||||
POST https://musify.club/artist/filtersongs
|
||||
ID: 280348
|
||||
NameForUrl: ghost-bath
|
||||
Page: 1
|
||||
IsAllowed: True
|
||||
SortOrder.Property: dateCreated
|
||||
SortOrder.IsAscending: false
|
||||
X-Requested-With: XMLHttpRequest
|
||||
|
||||
POST https://musify.club/artist/filteralbums
|
||||
ArtistID: 280348
|
||||
SortOrder.Property: dateCreated
|
||||
SortOrder.IsAscending: false
|
||||
X-Requested-With: XMLHttpRequest
|
||||
"""
|
||||
|
||||
|
||||
class MusifyTypes(Enum):
|
||||
ARTIST = "artist"
|
||||
RELEASE = "release"
|
||||
SONG = "track"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MusifyUrl:
|
||||
source_type: MusifyTypes
|
||||
name_without_id: str
|
||||
name_with_id: str
|
||||
musify_id: str
|
||||
url: str
|
||||
|
||||
|
||||
class Musify(Page):
|
||||
API_SESSION: requests.Session = requests.Session()
|
||||
API_SESSION.headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://musify.club/"
|
||||
}
|
||||
API_SESSION.proxies = shared.proxies
|
||||
TIMEOUT = 5
|
||||
TRIES = 5
|
||||
HOST = "https://musify.club"
|
||||
|
||||
SOURCE_TYPE = SourcePages.MUSIFY
|
||||
|
||||
@classmethod
|
||||
def parse_url(cls, url: str) -> MusifyUrl:
|
||||
parsed = urlparse(url)
|
||||
|
||||
path = parsed.path.split("/")
|
||||
|
||||
split_name = path[2].split("-")
|
||||
url_id = split_name[-1]
|
||||
name_for_url = "-".join(split_name[:-1])
|
||||
|
||||
try:
|
||||
type_enum = MusifyTypes(path[1])
|
||||
except ValueError as e:
|
||||
print(f"{path[1]} is not yet implemented, add it to MusifyTypes")
|
||||
raise e
|
||||
|
||||
return MusifyUrl(
|
||||
source_type=type_enum,
|
||||
name_without_id=name_for_url,
|
||||
name_with_id=path[2],
|
||||
musify_id=url_id,
|
||||
url=url
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def search_by_query(cls, query: str) -> Options:
|
||||
query_obj = cls.Query(query)
|
||||
|
||||
if query_obj.is_raw:
|
||||
return cls.plaintext_search(query_obj.query)
|
||||
return cls.plaintext_search(cls.get_plaintext_query(query_obj))
|
||||
|
||||
@classmethod
|
||||
def get_plaintext_query(cls, query: Page.Query) -> str:
|
||||
if query.album is None:
|
||||
return f"{query.artist or '*'} - {query.song or '*'}"
|
||||
return f"{query.artist or '*'} - {query.album or '*'} - {query.song or '*'}"
|
||||
|
||||
@classmethod
|
||||
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
|
||||
source_list: List[Source] = []
|
||||
name = None
|
||||
_id = None
|
||||
|
||||
# source
|
||||
anchor = contact.find("a")
|
||||
if anchor is not None:
|
||||
href = anchor.get("href")
|
||||
name = anchor.get("title")
|
||||
|
||||
if "-" in href:
|
||||
_id = href.split("-")[-1]
|
||||
|
||||
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
|
||||
|
||||
# artist image
|
||||
image_soup = contact.find("img")
|
||||
if image_soup is not None:
|
||||
alt = image_soup.get("alt")
|
||||
if alt is not None:
|
||||
name = alt
|
||||
|
||||
artist_thumbnail = image_soup.get("src")
|
||||
|
||||
return Artist(
|
||||
_id=_id,
|
||||
name=name,
|
||||
source_list=source_list
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def parse_album_contact(cls, contact: BeautifulSoup) -> Album:
|
||||
"""
|
||||
<div class="contacts__item">
|
||||
<a href="/release/ghost-bath-ghost-bath-2013-602489" title="Ghost Bath - 2013">
|
||||
|
||||
<div class="contacts__img release">
|
||||
<img alt="Ghost Bath" class="lozad" data-src="https://37s.musify.club/img/69/9060265/24178833.jpg"/>
|
||||
<noscript><img alt="Ghost Bath" src="https://37s.musify.club/img/69/9060265/24178833.jpg"/></noscript>
|
||||
</div>
|
||||
|
||||
<div class="contacts__info">
|
||||
<strong>Ghost Bath - 2013</strong>
|
||||
<small>Ghost Bath</small>
|
||||
<small>Треков: 4</small> <!--tracks-->
|
||||
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
"""
|
||||
|
||||
source_list: List[Source] = []
|
||||
title = None
|
||||
_id = None
|
||||
year = None
|
||||
artist_list: List[Artist] = []
|
||||
|
||||
def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
|
||||
nonlocal year
|
||||
nonlocal title
|
||||
|
||||
if title_date is None:
|
||||
return
|
||||
|
||||
title_date = title_date.strip()
|
||||
split_attr = title_date.split(delimiter)
|
||||
|
||||
if len(split_attr) < 2:
|
||||
return
|
||||
if not split_attr[-1].isdigit():
|
||||
return
|
||||
|
||||
year = int(split_attr[-1])
|
||||
title = delimiter.join(split_attr[:-1])
|
||||
|
||||
# source
|
||||
anchor = contact.find("a")
|
||||
if anchor is not None:
|
||||
href = anchor.get("href")
|
||||
|
||||
# get the title and year
|
||||
parse_title_date(anchor.get("title"))
|
||||
|
||||
if "-" in href:
|
||||
_id = href.split("-")[-1]
|
||||
|
||||
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
|
||||
|
||||
# cover art
|
||||
image_soup = contact.find("img")
|
||||
if image_soup is not None:
|
||||
alt = image_soup.get("alt")
|
||||
if alt is not None:
|
||||
title = alt
|
||||
|
||||
cover_art = image_soup.get("src")
|
||||
|
||||
contact_info_soup = contact.find("div", {"class": "contacts__info"})
|
||||
if contact_info_soup is not None:
|
||||
"""
|
||||
<strong>Ghost Bath - 2013</strong>
|
||||
<small>Ghost Bath</small>
|
||||
<small>Треков: 4</small> <!--tracks-->
|
||||
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
|
||||
"""
|
||||
|
||||
title_soup = contact_info_soup.find("strong")
|
||||
if title_soup is None:
|
||||
parse_title_date(title_soup)
|
||||
|
||||
small_list = contact_info_soup.find_all("small")
|
||||
if len(small_list) == 3:
|
||||
# artist
|
||||
artist_soup: BeautifulSoup = small_list[0]
|
||||
raw_artist_str = artist_soup.text
|
||||
|
||||
for artist_str in raw_artist_str.split("&\r\n"):
|
||||
artist_str = artist_str.rstrip("& ...\r\n")
|
||||
artist_str = artist_str.strip()
|
||||
|
||||
if artist_str.endswith("]") and "[" in artist_str:
|
||||
artist_str = artist_str.rsplit("[", maxsplit=1)[0]
|
||||
|
||||
artist_list.append(Artist(name=artist_str))
|
||||
|
||||
track_count_soup: BeautifulSoup = small_list[1]
|
||||
rating_soup: BeautifulSoup = small_list[2]
|
||||
else:
|
||||
LOGGER.warning("got an unequal ammount than 3 small elements")
|
||||
|
||||
return cls.ALBUM_CACHE.append(Album(
|
||||
_id=_id,
|
||||
title=title,
|
||||
source_list=source_list,
|
||||
date=ID3Timestamp(year=year),
|
||||
artist_list=artist_list
|
||||
))
|
||||
|
||||
@classmethod
|
||||
def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]:
|
||||
# print(contact_container_soup.prettify)
|
||||
contacts = []
|
||||
|
||||
# print(contact_container_soup)
|
||||
|
||||
contact: BeautifulSoup
|
||||
for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}):
|
||||
|
||||
anchor_soup = contact.find("a")
|
||||
|
||||
if anchor_soup is not None:
|
||||
url = anchor_soup.get("href")
|
||||
|
||||
if url is not None:
|
||||
# print(url)
|
||||
if "artist" in url:
|
||||
contacts.append(cls.parse_artist_contact(contact))
|
||||
elif "release" in url:
|
||||
contacts.append(cls.parse_album_contact(contact))
|
||||
return contacts
|
||||
|
||||
@classmethod
|
||||
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
|
||||
_id = None
|
||||
song_title = playlist_item_soup.get("data-name")
|
||||
artist_list: List[Artist] = []
|
||||
source_list: List[Source] = []
|
||||
|
||||
# details
|
||||
playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"})
|
||||
if playlist_details is not None:
|
||||
anchor_list = playlist_details.find_all("a")
|
||||
|
||||
if len(anchor_list) >= 2:
|
||||
print(anchor_list)
|
||||
# artists
|
||||
artist_anchor: BeautifulSoup
|
||||
for artist_anchor in anchor_list[:-1]:
|
||||
_id = None
|
||||
href = artist_anchor.get("href")
|
||||
artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href)
|
||||
if "-" in href:
|
||||
_id = href.split("-")[-1]
|
||||
|
||||
artist_list.append(Artist(
|
||||
_id=_id,
|
||||
name=artist_anchor.get_text(strip=True),
|
||||
source_list=[artist_source]
|
||||
))
|
||||
|
||||
# track
|
||||
track_soup: BeautifulSoup = anchor_list[-1]
|
||||
"""
|
||||
TODO
|
||||
this anchor text may have something like (feat. some artist)
|
||||
which is not acceptable
|
||||
"""
|
||||
href = track_soup.get("href")
|
||||
if href is not None:
|
||||
if "-" in href:
|
||||
raw_id: str = href.split("-")[-1]
|
||||
if raw_id.isdigit():
|
||||
_id = raw_id
|
||||
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
|
||||
|
||||
else:
|
||||
LOGGER.warning("there are not enough anchors (2) for artist and track")
|
||||
LOGGER.warning(str(artist_list))
|
||||
|
||||
"""
|
||||
artist_name = playlist_item_soup.get("data-artist")
|
||||
if artist_name is not None:
|
||||
artist_list.append(Artist(name=artist_name))
|
||||
"""
|
||||
id_attribute = playlist_item_soup.get("id")
|
||||
if id_attribute is not None:
|
||||
raw_id = id_attribute.replace("playerDiv", "")
|
||||
if raw_id.isdigit():
|
||||
_id = raw_id
|
||||
|
||||
return Song(
|
||||
_id=_id,
|
||||
title=song_title,
|
||||
main_artist_list=artist_list,
|
||||
source_list=source_list
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]:
|
||||
song_list = []
|
||||
|
||||
for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}):
|
||||
song_list.append(cls.parse_playlist_item(playlist_item_soup))
|
||||
|
||||
return song_list
|
||||
|
||||
@classmethod
|
||||
def plaintext_search(cls, query: str) -> Options:
|
||||
search_results = []
|
||||
|
||||
r = cls.get_request(f"https://musify.club/search?searchText={query}")
|
||||
if r is None:
|
||||
return Options()
|
||||
search_soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
|
||||
|
||||
# album and songs
|
||||
# child of div class: contacts row
|
||||
for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}):
|
||||
search_results.extend(cls.parse_contact_container(contact_container_soup))
|
||||
|
||||
# song
|
||||
# div class: playlist__item
|
||||
for playlist_soup in search_soup.find_all("div", {"class": "playlist"}):
|
||||
search_results.extend(cls.parse_playlist_soup(playlist_soup))
|
||||
|
||||
return Options(search_results)
|
||||
|
||||
@classmethod
|
||||
def parse_album_card(cls, album_card: BeautifulSoup, artist_name: str = None) -> Album:
|
||||
"""
|
||||
<div class="card release-thumbnail" data-type="2">
|
||||
<a href="/release/ghost-bath-self-loather-2021-1554266">
|
||||
<img alt="Self Loather" class="card-img-top lozad" data-src="https://40s-a.musify.club/img/70/24826582/62624396.jpg"/>
|
||||
<noscript><img alt="Self Loather" src="https://40s-a.musify.club/img/70/24826582/62624396.jpg"/></noscript>
|
||||
</a>
|
||||
<div class="card-body">
|
||||
<h4 class="card-subtitle">
|
||||
<a href="/release/ghost-bath-self-loather-2021-1554266">Self Loather</a>
|
||||
</h4>
|
||||
</div>
|
||||
<div class="card-footer"><p class="card-text"><a href="/albums/2021">2021</a></p></div>
|
||||
<div class="card-footer">
|
||||
<p class="card-text genre__labels">
|
||||
<a href="/genre/depressive-black-132">Depressive Black</a><a href="/genre/post-black-metal-295">Post-Black Metal</a> </p>
|
||||
</div>
|
||||
<div class="card-footer">
|
||||
<small><i class="zmdi zmdi-calendar" title="Добавлено"></i> 13.11.2021</small>
|
||||
<small><i class="zmdi zmdi-star zmdi-hc-fw" title="Рейтинг"></i> 5,88</small>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
album_type_map = defaultdict(lambda: AlbumType.OTHER, {
|
||||
1: AlbumType.OTHER, # literally other xD
|
||||
2: AlbumType.STUDIO_ALBUM,
|
||||
3: AlbumType.EP,
|
||||
4: AlbumType.SINGLE,
|
||||
5: AlbumType.OTHER, # BOOTLEG
|
||||
6: AlbumType.LIVE_ALBUM,
|
||||
7: AlbumType.COMPILATION_ALBUM, # compilation of different artists
|
||||
8: AlbumType.MIXTAPE,
|
||||
9: AlbumType.DEMO,
|
||||
10: AlbumType.MIXTAPE, # DJ Mixes
|
||||
11: AlbumType.COMPILATION_ALBUM, # compilation of only this artist
|
||||
13: AlbumType.COMPILATION_ALBUM, # unofficial
|
||||
14: AlbumType.MIXTAPE # "Soundtracks"
|
||||
})
|
||||
|
||||
_id: Optional[str] = None
|
||||
name: str = None
|
||||
source_list: List[Source] = []
|
||||
timestamp: Optional[ID3Timestamp] = None
|
||||
album_status = None
|
||||
|
||||
def set_name(new_name: str):
|
||||
nonlocal name
|
||||
nonlocal artist_name
|
||||
|
||||
# example of just setting not working: https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067
|
||||
if new_name.count(" - ") != 1:
|
||||
name = new_name
|
||||
return
|
||||
|
||||
potential_artist_list, potential_name = new_name.split(" - ")
|
||||
unified_artist_list = string_processing.unify(potential_artist_list)
|
||||
if artist_name is not None:
|
||||
if string_processing.unify(artist_name) not in unified_artist_list:
|
||||
name = new_name
|
||||
return
|
||||
|
||||
name = potential_name
|
||||
return
|
||||
|
||||
name = new_name
|
||||
|
||||
album_status_id = album_card.get("data-type")
|
||||
if album_status_id.isdigit():
|
||||
album_status_id = int(album_status_id)
|
||||
album_type = album_type_map[album_status_id]
|
||||
|
||||
if album_status_id == 5:
|
||||
album_status = AlbumStatus.BOOTLEG
|
||||
|
||||
def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False):
|
||||
nonlocal _id
|
||||
nonlocal name
|
||||
nonlocal source_list
|
||||
|
||||
if _anchor is None:
|
||||
return
|
||||
|
||||
href = _anchor.get("href")
|
||||
if href is not None:
|
||||
# add url to sources
|
||||
source_list.append(Source(
|
||||
cls.SOURCE_TYPE,
|
||||
cls.HOST + href
|
||||
))
|
||||
|
||||
# split id from url
|
||||
split_href = href.split("-")
|
||||
if len(split_href) > 1:
|
||||
_id = split_href[-1]
|
||||
|
||||
if not text_is_name:
|
||||
return
|
||||
|
||||
set_name(_anchor.text)
|
||||
|
||||
anchor_list = album_card.find_all("a", recursive=False)
|
||||
if len(anchor_list) > 0:
|
||||
anchor = anchor_list[0]
|
||||
parse_release_anchor(anchor)
|
||||
|
||||
thumbnail: BeautifulSoup = anchor.find("img")
|
||||
if thumbnail is not None:
|
||||
alt = thumbnail.get("alt")
|
||||
if alt is not None:
|
||||
set_name(alt)
|
||||
|
||||
image_url = thumbnail.get("src")
|
||||
else:
|
||||
LOGGER.debug("the card has no thumbnail or url")
|
||||
|
||||
card_body = album_card.find("div", {"class": "card-body"})
|
||||
if card_body is not None:
|
||||
parse_release_anchor(card_body.find("a"), text_is_name=True)
|
||||
|
||||
def parse_small_date(small_soup: BeautifulSoup):
|
||||
"""
|
||||
<small>
|
||||
<i class="zmdi zmdi-calendar" title="Добавлено"></i>
|
||||
13.11.2021
|
||||
</small>
|
||||
"""
|
||||
nonlocal timestamp
|
||||
|
||||
italic_tagging_soup: BeautifulSoup = small_soup.find("i")
|
||||
if italic_tagging_soup is None:
|
||||
return
|
||||
if italic_tagging_soup.get("title") != "Добавлено":
|
||||
# "Добавлено" can be translated to "Added (at)"
|
||||
return
|
||||
|
||||
raw_time = small_soup.text.strip()
|
||||
timestamp = ID3Timestamp.strptime(raw_time, "%d.%m.%Y")
|
||||
|
||||
# parse small date
|
||||
card_footer_list = album_card.find_all("div", {"class": "card-footer"})
|
||||
if len(card_footer_list) != 3:
|
||||
LOGGER.debug("there are not exactly 3 card footers in a card")
|
||||
|
||||
if len(card_footer_list) > 0:
|
||||
for any_small_soup in card_footer_list[-1].find_all("small"):
|
||||
parse_small_date(any_small_soup)
|
||||
else:
|
||||
LOGGER.debug("there is not even 1 footer in the album card")
|
||||
|
||||
return cls.ALBUM_CACHE.append(Album(
|
||||
_id=_id,
|
||||
title=name,
|
||||
source_list=source_list,
|
||||
date=timestamp,
|
||||
album_type=album_type,
|
||||
album_status=album_status
|
||||
))
|
||||
|
||||
@classmethod
|
||||
def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]:
|
||||
"""
|
||||
POST https://musify.club/artist/filteralbums
|
||||
ArtistID: 280348
|
||||
SortOrder.Property: dateCreated
|
||||
SortOrder.IsAscending: false
|
||||
X-Requested-With: XMLHttpRequest
|
||||
"""
|
||||
|
||||
endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums"
|
||||
|
||||
r = cls.post_request(url=endpoint, json={
|
||||
"ArtistID": str(url.musify_id),
|
||||
"SortOrder.Property": "dateCreated",
|
||||
"SortOrder.IsAscending": False,
|
||||
"X-Requested-With": "XMLHttpRequest"
|
||||
})
|
||||
if r is None:
|
||||
return []
|
||||
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
|
||||
|
||||
discography: List[Album] = []
|
||||
for card_soup in soup.find_all("div", {"class": "card"}):
|
||||
new_album: Album = cls.parse_album_card(card_soup, artist_name)
|
||||
album_source: Source
|
||||
if not flat:
|
||||
for album_source in new_album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
|
||||
new_album.merge(cls.fetch_album_from_source(album_source))
|
||||
|
||||
discography.append(new_album)
|
||||
|
||||
return discography
|
||||
|
||||
@classmethod
|
||||
def get_artist_attributes(cls, url: MusifyUrl) -> Artist:
|
||||
"""
|
||||
fetches the main Artist attributes from this endpoint
|
||||
https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
|
||||
it needs to parse html
|
||||
|
||||
:param url:
|
||||
:return:
|
||||
"""
|
||||
|
||||
r = cls.get_request(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent")
|
||||
if r is None:
|
||||
return Artist(_id=url.musify_id)
|
||||
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
|
||||
"""
|
||||
<ol class="breadcrumb" itemscope="" itemtype="http://schema.org/BreadcrumbList">
|
||||
<li class="breadcrumb-item" itemprop="itemListElement" itemscope="" itemtype="http://schema.org/ListItem"><a href="/" itemprop="item"><span itemprop="name">Главная</span><meta content="1" itemprop="position"/></a></li>
|
||||
<li class="breadcrumb-item" itemprop="itemListElement" itemscope="" itemtype="http://schema.org/ListItem"><a href="/artist" itemprop="item"><span itemprop="name">Исполнители</span><meta content="2" itemprop="position"/></a></li>
|
||||
<li class="breadcrumb-item active">Ghost Bath</li>
|
||||
</ol>
|
||||
|
||||
<ul class="nav nav-tabs nav-fill">
|
||||
<li class="nav-item"><a class="active nav-link" href="/artist/ghost-bath-280348">песни (41)</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="/artist/ghost-bath-280348/releases">альбомы (12)</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="/artist/ghost-bath-280348/clips">видеоклипы (23)</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="/artist/ghost-bath-280348/photos">фото (38)</a></li>
|
||||
</ul>
|
||||
|
||||
<header class="content__title">
|
||||
<h1>Ghost Bath</h1>
|
||||
<div class="actions">
|
||||
...
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<ul class="icon-list">
|
||||
<li>
|
||||
<i class="zmdi zmdi-globe zmdi-hc-fw" title="Страна"></i>
|
||||
<i class="flag-icon US shadow"></i>
|
||||
Соединенные Штаты
|
||||
</li>
|
||||
</ul>
|
||||
"""
|
||||
name = None
|
||||
source_list: List[Source] = []
|
||||
country = None
|
||||
notes: FormattedText = FormattedText()
|
||||
|
||||
breadcrumbs: BeautifulSoup = soup.find("ol", {"class": "breadcrumb"})
|
||||
if breadcrumbs is not None:
|
||||
breadcrumb_list: List[BeautifulSoup] = breadcrumbs.find_all("li", {"class": "breadcrumb-item"}, recursive=False)
|
||||
if len(breadcrumb_list) == 3:
|
||||
name = breadcrumb_list[-1].get_text(strip=True)
|
||||
else:
|
||||
LOGGER.debug("breadcrumb layout on artist page changed")
|
||||
|
||||
nav_tabs: BeautifulSoup = soup.find("ul", {"class": "nav-tabs"})
|
||||
if nav_tabs is not None:
|
||||
list_item: BeautifulSoup
|
||||
for list_item in nav_tabs.find_all("li", {"class": "nav-item"}, recursive=False):
|
||||
if not list_item.get_text(strip=True).startswith("песни"):
|
||||
# "песни" translates to "songs"
|
||||
continue
|
||||
|
||||
anchor: BeautifulSoup = list_item.find("a")
|
||||
if anchor is None:
|
||||
continue
|
||||
href = anchor.get("href")
|
||||
if href is None:
|
||||
continue
|
||||
|
||||
source_list.append(Source(
|
||||
cls.SOURCE_TYPE,
|
||||
cls.HOST + href
|
||||
))
|
||||
|
||||
content_title: BeautifulSoup = soup.find("header", {"class": "content__title"})
|
||||
if content_title is not None:
|
||||
h1_name: BeautifulSoup = content_title.find("h1", recursive=False)
|
||||
if h1_name is not None:
|
||||
name = h1_name.get_text(strip=True)
|
||||
|
||||
# country and sources
|
||||
icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"})
|
||||
if icon_list is not None:
|
||||
country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"})
|
||||
if country_italic is not None:
|
||||
style_classes: set = {'flag-icon', 'shadow'}
|
||||
classes: set = set(country_italic.get("class"))
|
||||
|
||||
country_set: set = classes.difference(style_classes)
|
||||
if len(country_set) != 1:
|
||||
LOGGER.debug("the country set contains multiple values")
|
||||
if len(country_set) != 0:
|
||||
"""
|
||||
This is the css file, where all flags that can be used on musify
|
||||
are laid out and styled.
|
||||
Every flag has two upper case letters, thus I assume they follow the alpha_2
|
||||
standard, though I haven't checked.
|
||||
https://musify.club/content/flags.min.css
|
||||
"""
|
||||
|
||||
country = pycountry.countries.get(alpha_2=list(country_set)[0])
|
||||
|
||||
# get all additional sources
|
||||
additional_source: BeautifulSoup
|
||||
for additional_source in icon_list.find_all("a", {"class", "link"}):
|
||||
href = additional_source.get("href")
|
||||
if href is None:
|
||||
continue
|
||||
new_src = Source.match_url(href)
|
||||
if new_src is None:
|
||||
continue
|
||||
source_list.append(new_src)
|
||||
|
||||
note_soup: BeautifulSoup = soup.find(id="text-main")
|
||||
if note_soup is not None:
|
||||
notes.html = note_soup.decode_contents()
|
||||
|
||||
return Artist(
|
||||
_id=url.musify_id,
|
||||
name=name,
|
||||
country=country,
|
||||
source_list=source_list,
|
||||
notes=notes
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
|
||||
"""
|
||||
fetches artist from source
|
||||
|
||||
[x] discography
|
||||
[x] attributes
|
||||
[] picture gallery
|
||||
|
||||
Args:
|
||||
source (Source): the source to fetch
|
||||
flat (bool, optional): if it is false, every album from discograohy will be fetched. Defaults to False.
|
||||
|
||||
Returns:
|
||||
Artist: the artist fetched
|
||||
"""
|
||||
|
||||
url = cls.parse_url(source.url)
|
||||
|
||||
artist = cls.get_artist_attributes(url)
|
||||
|
||||
discography: List[Album] = cls.get_discography(url, artist.name)
|
||||
artist.main_album_collection.extend(discography)
|
||||
|
||||
return artist
|
||||
|
||||
@classmethod
|
||||
def parse_song_card(cls, song_card: BeautifulSoup) -> Song:
|
||||
"""
|
||||
<div id="playerDiv3051" class="playlist__item" itemprop="track" itemscope="itemscope" itemtype="http://schema.org/MusicRecording" data-artist="Linkin Park" data-name="Papercut">
|
||||
<div id="play_3051" class="playlist__control play" data-url="/track/play/3051/linkin-park-papercut.mp3" data-position="1" data-title="Linkin Park - Papercut" title="Слушать Linkin Park - Papercut">
|
||||
<span class="ico-play"><i class="zmdi zmdi-play-circle-outline zmdi-hc-2-5x"></i></span>
|
||||
<span class="ico-pause"><i class="zmdi zmdi-pause-circle-outline zmdi-hc-2-5x"></i></span>
|
||||
</div>
|
||||
<div class="playlist__position">
|
||||
1
|
||||
</div>
|
||||
<div class="playlist__details">
|
||||
<div class="playlist__heading">
|
||||
<a href="/artist/linkin-park-5" rel="nofollow">Linkin Park</a> - <a class="strong" href="/track/linkin-park-papercut-3051">Papercut</a>
|
||||
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||
<meta content="/artist/linkin-park-5" itemprop="url" />
|
||||
<meta content="Linkin Park" itemprop="name" />
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div class="track__details track__rating hidden-xs-down">
|
||||
<span class="text-muted">
|
||||
<i class="zmdi zmdi-star-circle zmdi-hc-1-3x" title="Рейтинг"></i>
|
||||
326,3K
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="track__details hidden-xs-down">
|
||||
<span class="text-muted">03:05</span>
|
||||
<span class="text-muted">320 Кб/с</span>
|
||||
</div>
|
||||
<div class="track__details hidden-xs-down">
|
||||
<span title='Есть видео Linkin Park - Papercut'><i class='zmdi zmdi-videocam zmdi-hc-1-3x'></i></span>
|
||||
<span title='Есть текст Linkin Park - Papercut'><i class='zmdi zmdi-file-text zmdi-hc-1-3x'></i></span>
|
||||
</div>
|
||||
<div class="playlist__actions">
|
||||
<span class="pl-btn save-to-pl" id="add_3051" title="Сохранить в плейлист"><i class="zmdi zmdi-plus zmdi-hc-1-5x"></i></span>
|
||||
<a target="_blank" itemprop="audio" download="Linkin Park - Papercut.mp3" href="/track/dl/3051/linkin-park-papercut.mp3" class="no-ajaxy yaBrowser" id="dl_3051" title='Скачать Linkin Park - Papercut'>
|
||||
<span><i class="zmdi zmdi-download zmdi-hc-2-5x"></i></span>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
song_name = song_card.get("data-name")
|
||||
artist_list: List[Artist] = []
|
||||
source_list: List[Source] = []
|
||||
tracksort = None
|
||||
|
||||
def parse_title(_title: str) -> str:
|
||||
return _title
|
||||
|
||||
"""
|
||||
# get from parent div
|
||||
_artist_name = song_card.get("data-artist")
|
||||
if _artist_name is not None:
|
||||
artist_list.append(Artist(name=_artist_name))
|
||||
"""
|
||||
|
||||
# get tracksort
|
||||
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
|
||||
if tracksort_soup is not None:
|
||||
raw_tracksort: str = tracksort_soup.get_text(strip=True)
|
||||
if raw_tracksort.isdigit():
|
||||
tracksort = int(raw_tracksort)
|
||||
|
||||
# playlist details
|
||||
playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"})
|
||||
if playlist_details is not None:
|
||||
"""
|
||||
<div class="playlist__heading">
|
||||
<a href="/artist/tamas-141317" rel="nofollow">Tamas</a> ft.<a href="/artist/zombiez-630767" rel="nofollow">Zombiez</a> - <a class="strong" href="/track/tamas-zombiez-voodoo-feat-zombiez-16185276">Voodoo (Feat. Zombiez)</a>
|
||||
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||
<meta content="/artist/tamas-141317" itemprop="url" />
|
||||
<meta content="Tamas" itemprop="name" />
|
||||
</span>
|
||||
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
|
||||
<meta content="/artist/zombiez-630767" itemprop="url" />
|
||||
<meta content="Zombiez" itemprop="name" />
|
||||
</span>
|
||||
</div>
|
||||
"""
|
||||
# track
|
||||
anchor_list: List[BeautifulSoup] = playlist_details.find_all("a")
|
||||
if len(anchor_list) > 1:
|
||||
track_anchor: BeautifulSoup = anchor_list[-1]
|
||||
href: str = track_anchor.get("href")
|
||||
if href is not None:
|
||||
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
|
||||
song_name = parse_title(track_anchor.get_text(strip=True))
|
||||
|
||||
# artist
|
||||
artist_span: BeautifulSoup
|
||||
for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}):
|
||||
_artist_src = None
|
||||
_artist_name = None
|
||||
meta_artist_src = artist_span.find("meta", {"itemprop": "url"})
|
||||
if meta_artist_src is not None:
|
||||
meta_artist_url = meta_artist_src.get("content")
|
||||
if meta_artist_url is not None:
|
||||
_artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)]
|
||||
|
||||
meta_artist_name = artist_span.find("meta", {"itemprop": "name"})
|
||||
if meta_artist_name is not None:
|
||||
meta_artist_name_text = meta_artist_name.get("content")
|
||||
_artist_name = meta_artist_name_text
|
||||
|
||||
if _artist_name is not None or _artist_src is not None:
|
||||
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
|
||||
|
||||
return Song(
|
||||
title=song_name,
|
||||
tracksort=tracksort,
|
||||
main_artist_list=artist_list
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album:
|
||||
"""
|
||||
fetches album from source:
|
||||
eg. 'https://musify.club/release/linkin-park-hybrid-theory-2000-188'
|
||||
|
||||
/html/musify/album_overview.html
|
||||
[] tracklist
|
||||
[] attributes
|
||||
[] ratings
|
||||
|
||||
:param source:
|
||||
:param flat:
|
||||
:return:
|
||||
"""
|
||||
album = Album(title="Hi :)")
|
||||
|
||||
url = cls.parse_url(source.url)
|
||||
|
||||
endpoint = cls.HOST + "/release/" + url.name_with_id
|
||||
r = cls.get_request(endpoint)
|
||||
if r is None:
|
||||
return album
|
||||
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
|
||||
# <div class="card"><div class="card-body">...</div></div>
|
||||
cards_soup: BeautifulSoup = soup.find("div", {"class": "card-body"})
|
||||
if cards_soup is not None:
|
||||
card_soup: BeautifulSoup
|
||||
for card_soup in cards_soup.find_all("div", {"class": "playlist__item"}):
|
||||
album.song_collection.append(cls.parse_song_card(card_soup))
|
||||
album.update_tracksort()
|
||||
|
||||
return album
|
Reference in New Issue
Block a user