Merge branch 'HeIIow2:master' into master

This commit is contained in:
James Clarke
2023-03-23 17:27:44 +00:00
committed by GitHub
24 changed files with 2598 additions and 421 deletions

View File

@@ -4,7 +4,9 @@ from . import (
source,
parents,
formatted_text,
album
album,
option,
collection
)
MusicObject = parents.DatabaseObject
@@ -28,3 +30,6 @@ AlbumStatus = album.AlbumStatus
Album = song.Album
FormattedText = formatted_text.FormattedText
Options = option.Options
Collection = collection.Collection

View File

@@ -22,4 +22,5 @@ class AlbumType(Enum):
LIVE_ALBUM = "Live Album"
COMPILATION_ALBUM = "Compilation Album"
MIXTAPE = "Mixtape"
DEMO = "Demo"
OTHER = "Other"

View File

@@ -0,0 +1,110 @@
from collections import defaultdict
from typing import Dict, List, Optional
import weakref
from .parents import DatabaseObject
"""
This is a cache for the objects, that et pulled out of the database.
This is necessary, to not have duplicate objects with the same id.
Using a cache that maps the ojects to their id has multiple benefits:
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
- less ram usage
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
"""
class ObjectCache:
"""
ObjectCache is a cache for the objects retrieved from a database.
It maps each object to its id and uses weak references to manage its memory usage.
Using a cache for these objects provides several benefits:
- Modifying an object updates all objects with the same id (due to copy by reference)
- Reduced memory usage
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
:method exists: Check if a DatabaseObject already exists in the cache.
:method append: Add a DatabaseObject to the cache if it does not already exist.
:method extent: Add a list of DatabaseObjects to the cache.
:method remove: Remove a DatabaseObject from the cache by its id.
:method get: Retrieve a DatabaseObject from the cache by its id. """
object_to_id: Dict[str, DatabaseObject]
weakref_map: Dict[weakref.ref, str]
def __init__(self) -> None:
self.object_to_id = dict()
self.weakref_map = defaultdict()
def exists(self, database_object: DatabaseObject) -> bool:
"""
Check if a DatabaseObject with the same id already exists in the cache.
:param database_object: The DatabaseObject to check for.
:return: True if the DatabaseObject exists, False otherwise.
"""
if database_object.dynamic:
return True
return database_object.id in self.object_to_id
def on_death(self, weakref_: weakref.ref) -> None:
"""
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
This function removes the DatabaseObject from the cache.
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
"""
data_id = self.weakref_map.pop(weakref_)
self.object_to_id.pop(data_id)
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
return weakref.ref(database_object, self.on_death)
def append(self, database_object: DatabaseObject) -> bool:
"""
Add a DatabaseObject to the cache.
:param database_object: The DatabaseObject to add to the cache.
:return: True if the DatabaseObject already exists in the cache, False otherwise.
"""
if self.exists(database_object):
return True
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
self.object_to_id[database_object.id] = database_object
return False
def extent(self, database_object_list: List[DatabaseObject]):
"""
adjacent to the extent method of list, this appends n Object
"""
for database_object in database_object_list:
self.append(database_object)
def remove(self, _id: str):
"""
Remove a DatabaseObject from the cache.
:param _id: The id of the DatabaseObject to remove from the cache.
"""
data = self.object_to_id.get(_id)
if data:
self.weakref_map.pop(weakref.ref(data))
self.object_to_id.pop(_id)
def __getitem__(self, item) -> Optional[DatabaseObject]:
"""
this returns the data obj
:param item: the id of the music object
:return:
"""
return self.object_to_id.get(item)
def get(self, _id: str) -> Optional[DatabaseObject]:
return self.__getitem__(_id)

View File

@@ -33,6 +33,7 @@ class Collection:
```
"""
self._attribute_to_object_map: Dict[str, Dict[object, DatabaseObject]] = defaultdict(dict)
self._used_ids: set = set()
if data is not None:
self.extend(data, merge_on_conflict=True)
@@ -46,12 +47,27 @@ class Collection:
continue
self._attribute_to_object_map[name][value] = element
self._used_ids.add(element.id)
def unmap_element(self, element: DatabaseObject):
for name, value in element.indexing_values:
if value is None:
continue
if value in self._attribute_to_object_map[name]:
if element is self._attribute_to_object_map[name][value]:
try:
self._attribute_to_object_map[name].pop(value)
except KeyError:
pass
def append(self, element: DatabaseObject, merge_on_conflict: bool = True):
def append(self, element: DatabaseObject, merge_on_conflict: bool = True, merge_into_existing: bool = True) -> bool:
"""
:param element:
:param merge_on_conflict:
:return:
:param merge_into_existing:
:return did_not_exist:
"""
# if the element type has been defined in the initializer it checks if the type matches
@@ -60,17 +76,30 @@ class Collection:
for name, value in element.indexing_values:
if value in self._attribute_to_object_map[name]:
existing_object = self._attribute_to_object_map[name][value]
if merge_on_conflict:
# if the object does already exist
# thus merging and don't add it afterwards
existing_object = self._attribute_to_object_map[name][value]
existing_object.merge(element)
# in case any relevant data has been added (e.g. it remaps the old object)
self.map_element(existing_object)
return
if merge_into_existing:
existing_object.merge(element)
# in case any relevant data has been added (e.g. it remaps the old object)
self.map_element(existing_object)
else:
element.merge(existing_object)
exists_at = self._data.index(existing_object)
self._data[exists_at] = element
self.unmap_element(existing_object)
self.map_element(element)
return False
self._data.append(element)
self.map_element(element)
return True
def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True):
for element in element_list:

View File

@@ -10,6 +10,10 @@ https://pandoc.org/installing.html
class FormattedText:
"""
the self.html value should be saved to the database
"""
doc = None
def __init__(
@@ -39,35 +43,38 @@ class FormattedText:
def get_markdown(self) -> str:
if self.doc is None:
return None
return ""
return pandoc.write(self.doc, format="markdown").strip()
def get_html(self) -> str:
if self.doc is None:
return None
return ""
return pandoc.write(self.doc, format="html").strip()
def get_plaintext(self) -> str:
if self.doc is None:
return None
return ""
return pandoc.write(self.doc, format="plain").strip()
@property
def json(self) -> str:
if self.doc is None:
return None
return pandoc.write(self.doc, format="json")
def is_empty(self) -> bool:
return self.doc is None
def __eq__(self, other) -> False:
if type(other) != type(self):
return False
if self.is_empty and other.is_empty:
return True
return self.doc == other.doc
plaintext = property(fget=get_plaintext, fset=set_plaintext)
markdown = property(fget=get_markdown, fset=set_markdown)
html = property(fget=get_html, fset=set_html)
class NotesAttributes:
def __init__(self) -> None:
pass
if __name__ == "__main__":
_plaintext = """
World of Work

View File

@@ -1,14 +1,19 @@
from typing import List
from collections import defaultdict
import pycountry
from .parents import DatabaseObject
from .source import Source, SourceCollection
from .metadata import Metadata
from .formatted_text import FormattedText
class Lyrics(DatabaseObject):
COLLECTION_ATTRIBUTES = ("source_collection",)
SIMPLE_ATTRIBUTES = {
"text": FormattedText(),
"language": None
}
def __init__(
self,
text: FormattedText,
@@ -18,9 +23,9 @@ class Lyrics(DatabaseObject):
source_list: List[Source] = None,
**kwargs
) -> None:
DatabaseObject.__init__(self, _id=_id, dynamic=dynamic)
DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
self.text: FormattedText = text
self.text: FormattedText = text or FormattedText()
self.language: pycountry.Languages = language
self.source_collection: SourceCollection = SourceCollection(source_list)

View File

@@ -154,6 +154,8 @@ class ID3Timestamp:
return self.date_obj >= other.date_obj
def __eq__(self, other):
if type(other) != type(self):
return False
return self.date_obj == other.date_obj
def get_time_format(self) -> str:

View File

@@ -21,8 +21,10 @@ class Options:
return self._data[index].options
def __getitem__(self, item: int) -> 'Options':
def __getitem__(self, item: int) -> 'DatabaseObject':
if type(item) != int:
raise TypeError("Key needs to be an Integer")
if item >= len(self._data):
raise ValueError("Index out of bounds")
return self.get_next_options(item)
return self._data[item]

View File

@@ -11,15 +11,18 @@ from .option import Options
class DatabaseObject:
COLLECTION_ATTRIBUTES: tuple = tuple()
SIMPLE_ATTRIBUTES: tuple = tuple()
SIMPLE_ATTRIBUTES: dict = dict()
def __init__(self, _id: str = None, dynamic: bool = False, **kwargs) -> None:
self.automatic_id: bool = False
if _id is None and not dynamic:
"""
generates a random UUID
https://docs.python.org/3/library/uuid.html
"""
_id = str(uuid.uuid4())
self.automatic_id = True
LOGGER.debug(f"id for {type(self).__name__} isn't set. Setting to {_id}")
# The id can only be None, if the object is dynamic (self.dynamic = True)
@@ -43,7 +46,7 @@ class DatabaseObject:
return True
return False
@property
def indexing_values(self) -> List[Tuple[str, object]]:
"""
@@ -53,9 +56,9 @@ class DatabaseObject:
Returns:
List[Tuple[str, object]]: the first element in the tuple is the name of the attribute, the second the value.
"""
return list()
def merge(self, other, override: bool = False):
if not isinstance(other, type(self)):
LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"")
@@ -64,11 +67,11 @@ class DatabaseObject:
for collection in type(self).COLLECTION_ATTRIBUTES:
getattr(self, collection).extend(getattr(other, collection))
for simple_attribute in type(self).SIMPLE_ATTRIBUTES:
if getattr(other, simple_attribute) is None:
for simple_attribute, default_value in type(self).SIMPLE_ATTRIBUTES.items():
if getattr(other, simple_attribute) == default_value:
continue
if override or getattr(self, simple_attribute) is None:
if override or getattr(self, simple_attribute) == default_value:
setattr(self, simple_attribute, getattr(other, simple_attribute))
@property
@@ -83,6 +86,18 @@ class DatabaseObject:
def option_string(self) -> str:
return self.__repr__()
def compile(self) -> bool:
"""
compiles the recursive structures,
Args:
traceback (set, optional): Defaults to an empty set.
Returns:
bool: returns true if id has been found in set
"""
pass
class MainObject(DatabaseObject):
"""
@@ -95,7 +110,7 @@ class MainObject(DatabaseObject):
It has all the functionality of the "DatabaseObject" (it inherits from said class)
but also some added functions as well.
"""
def __init__(self, _id: str = None, dynamic: bool = False, **kwargs):
DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)

View File

@@ -1,6 +1,7 @@
import os
from typing import List, Optional, Dict, Tuple
import pycountry
from collections import defaultdict
from .metadata import (
Mapping as id3Mapping,
@@ -46,7 +47,15 @@ class Song(MainObject):
COLLECTION_ATTRIBUTES = (
"lyrics_collection", "album_collection", "main_artist_collection", "feature_artist_collection",
"source_collection")
SIMPLE_ATTRIBUTES = ("title", "unified_title", "isrc", "length", "tracksort", "genre")
SIMPLE_ATTRIBUTES = {
"title": None,
"unified_title": None,
"isrc": None,
"length": None,
"tracksort": 0,
"genre": None,
"notes": FormattedText()
}
def __init__(
self,
@@ -64,17 +73,21 @@ class Song(MainObject):
album_list: List['Album'] = None,
main_artist_list: List['Artist'] = None,
feature_artist_list: List['Artist'] = None,
notes: FormattedText = None,
**kwargs
) -> None:
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
# attributes
self.title: str = title
self.unified_title: str = unified_title or unify(title)
self.unified_title: str = unified_title
if unified_title is None and title is not None:
self.unified_title = unify(title)
self.isrc: str = isrc
self.length: int = length
self.tracksort: int = tracksort or 0
self.genre: str = genre
self.notes: FormattedText = notes or FormattedText()
self.source_collection: SourceCollection = SourceCollection(source_list)
self.target_collection: Collection = Collection(data=target_list, element_type=Target)
@@ -83,6 +96,22 @@ class Song(MainObject):
self.main_artist_collection = Collection(data=main_artist_list, element_type=Artist)
self.feature_artist_collection = Collection(data=feature_artist_list, element_type=Artist)
def compile(self):
album: Album
for album in self.album_collection:
if album.song_collection.append(self, merge_into_existing=False):
album.compile()
artist: Artist
for artist in self.feature_artist_collection:
if artist.feature_song_collection.append(self, merge_into_existing=False):
artist.compile()
for artist in self.main_artist_collection:
for album in self.album_collection:
if artist.main_album_collection.append(album, merge_into_existing=False):
artist.compile()
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
@@ -166,7 +195,17 @@ All objects dependent on Album
class Album(MainObject):
COLLECTION_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection")
SIMPLE_ATTRIBUTES = ("title", "album_status", "album_type", "language", "date", "barcode", "albumsort")
SIMPLE_ATTRIBUTES = {
"title": None,
"unified_title": None,
"album_status": None,
"album_type": AlbumType.OTHER,
"language": None,
"date": ID3Timestamp(),
"barcode": None,
"albumsort": None,
"notes": FormattedText()
}
def __init__(
self,
@@ -184,15 +223,18 @@ class Album(MainObject):
album_status: AlbumStatus = None,
album_type: AlbumType = None,
label_list: List['Label'] = None,
notes: FormattedText = None,
**kwargs
) -> None:
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
self.title: str = title
self.unified_title: str = unified_title or unify(self.title)
self.unified_title: str = unified_title
if unified_title is None and title is not None:
self.unified_title = unify(title)
self.album_status: AlbumStatus = album_status
self.album_type: AlbumType = album_type
self.album_type: AlbumType = album_type or AlbumType.OTHER
self.language: pycountry.Languages = language
self.date: ID3Timestamp = date or ID3Timestamp()
@@ -208,12 +250,29 @@ class Album(MainObject):
to set albumsort with help of the release year
"""
self.albumsort: Optional[int] = albumsort
self.notes = notes or FormattedText()
self.source_collection: SourceCollection = SourceCollection(source_list)
self.song_collection: Collection = Collection(data=song_list, element_type=Song)
self.artist_collection: Collection = Collection(data=artist_list, element_type=Artist)
self.label_collection: Collection = Collection(data=label_list, element_type=Label)
def compile(self):
song: Song
for song in self.song_collection:
if song.album_collection.append(self, merge_into_existing=False):
song.compile()
artist: Artist
for artist in self.artist_collection:
if artist.main_album_collection.append(self, merge_into_existing=False):
artist.compile()
label: Label
for label in self.label_collection:
if label.album_collection.append(self, merge_into_existing=False):
label.compile()
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
@@ -309,16 +368,23 @@ class Album(MainObject):
return len(self.artist_collection) > 1
"""
All objects dependent on Artist
"""
class Artist(MainObject):
COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection")
SIMPLE_ATTRIBUTES = ("name", "name", "country", "formed_in", "notes", "lyrical_themes", "general_genre")
COLLECTION_ATTRIBUTES = (
"feature_song_collection", "main_album_collection", "label_collection", "source_collection")
SIMPLE_ATTRIBUTES = {
"name": None,
"unified_name": None,
"country": None,
"formed_in": ID3Timestamp(),
"notes": FormattedText(),
"lyrical_themes": [],
"general_genre": ""
}
def __init__(
self,
@@ -340,7 +406,9 @@ class Artist(MainObject):
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
self.name: str = name
self.unified_name: str = unified_name or unify(self.name)
self.unified_name: str = unified_name
if unified_name is None and name is not None:
self.unified_name = unify(name)
"""
TODO implement album type and notes
@@ -365,6 +433,22 @@ class Artist(MainObject):
self.main_album_collection: Collection = Collection(data=main_album_list, element_type=Album)
self.label_collection: Collection = Collection(data=label_list, element_type=Label)
def compile(self):
song: "Song"
for song in self.feature_song_collection:
if song.feature_artist_collection.append(self, merge_into_existing=False):
song.compile()
album: "Album"
for album in self.main_album_collection:
if album.artist_collection.append(self, merge_into_existing=False):
album.compile()
label: Label
for label in self.label_collection:
if label.current_artist_collection.append(self, merge_into_existing=False):
label.compile()
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
@@ -463,7 +547,11 @@ Label
class Label(MainObject):
COLLECTION_ATTRIBUTES = ("album_collection", "current_artist_collection")
SIMPLE_ATTRIBUTES = ("name",)
SIMPLE_ATTRIBUTES = {
"name": None,
"unified_name": None,
"notes": FormattedText()
}
def __init__(
self,
@@ -471,6 +559,7 @@ class Label(MainObject):
dynamic: bool = False,
name: str = None,
unified_name: str = None,
notes: FormattedText = None,
album_list: List[Album] = None,
current_artist_list: List[Artist] = None,
source_list: List[Source] = None,
@@ -479,12 +568,26 @@ class Label(MainObject):
MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs)
self.name: str = name
self.unified_name: str = unified_name or unify(self.name)
self.unified_name: str = unified_name
if unified_name is None and name is not None:
self.unified_name = unify(name)
self.notes = notes or FormattedText()
self.source_collection: SourceCollection = SourceCollection(source_list)
self.album_collection: Collection = Collection(data=album_list, element_type=Album)
self.current_artist_collection: Collection = Collection(data=current_artist_list, element_type=Artist)
def compile(self) -> bool:
album: Album
for album in self.album_collection:
if album.label_collection.append(self, merge_into_existing=False):
album.compile()
artist: Artist
for artist in self.current_artist_collection:
if artist.label_collection.append(self, merge_into_existing=False):
artist.compile()
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
@@ -497,4 +600,4 @@ class Label(MainObject):
def options(self) -> Options:
options = [self]
options.extend(self.current_artist_collection.shallow_list)
options.extend(self.album_collection.shallow_list)
options.extend(self.album_collection.shallow_list)

View File

@@ -1,6 +1,7 @@
from collections import defaultdict
from enum import Enum
from typing import List, Dict, Tuple
from typing import List, Dict, Tuple, Optional
from urllib.parse import urlparse
from .metadata import Mapping, Metadata
from .parents import DatabaseObject
@@ -25,9 +26,11 @@ class SourcePages(Enum):
SPOTIFY = "spotify"
# This has nothing to do with audio, but bands can be here
WIKIPEDIA = "wikipedia"
INSTAGRAM = "instagram"
FACEBOOK = "facebook"
TWITTER = "twitter" # I will use nitter though lol
TWITTER = "twitter" # I will use nitter though lol
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
@classmethod
def get_homepage(cls, attribute) -> str:
@@ -42,7 +45,9 @@ class SourcePages(Enum):
cls.INSTAGRAM: "https://www.instagram.com/",
cls.FACEBOOK: "https://www.facebook.com/",
cls.SPOTIFY: "https://open.spotify.com/",
cls.TWITTER: "https://twitter.com/"
cls.TWITTER: "https://twitter.com/",
cls.MYSPACE: "https://myspace.com/",
cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page"
}
return homepage_map[attribute]
@@ -55,6 +60,12 @@ class Source(DatabaseObject):
Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")
```
"""
COLLECTION_ATTRIBUTES = tuple()
SIMPLE_ATTRIBUTES = {
"type_enum": None,
"page_enum": None,
"url": None
}
def __init__(self, page_enum: SourcePages, url: str, id_: str = None, type_enum=None) -> None:
DatabaseObject.__init__(self, id_=id_)
@@ -65,11 +76,14 @@ class Source(DatabaseObject):
self.url = url
@classmethod
def match_url(cls, url: str):
def match_url(cls, url: str) -> Optional["Source"]:
"""
this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient
"""
parsed = urlparse(url)
url = parsed.geturl()
if url.startswith("https://www.youtube"):
return cls(SourcePages.YOUTUBE, url)
@@ -82,6 +96,9 @@ class Source(DatabaseObject):
if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url)
if "wikipedia" in parsed.netloc:
return cls(SourcePages.WIKIPEDIA, url)
if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url)
@@ -95,6 +112,9 @@ class Source(DatabaseObject):
if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url)
if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url)
def get_song_metadata(self) -> Metadata:
return Metadata({
Mapping.FILE_WEBPAGE_URL: [self.url],
@@ -151,4 +171,4 @@ class SourceCollection(Collection):
getting the sources for a specific page like
YouTube or musify
"""
return self._page_to_source_list[source_page]
return self._page_to_source_list[source_page].copy()

View File

@@ -1,5 +1,6 @@
from typing import Optional, List, Tuple
from pathlib import Path
from collections import defaultdict
from ..utils import shared
from .parents import DatabaseObject
@@ -14,7 +15,11 @@ class Target(DatabaseObject):
```
"""
SIMPLE_ATTRIBUTES = ("_file", "_path")
SIMPLE_ATTRIBUTES = {
"_file": None,
"_path": None
}
COLLECTION_ATTRIBUTES = tuple()
def __init__(
self,

View File

@@ -1,11 +1,14 @@
from .encyclopaedia_metallum import EncyclopaediaMetallum
from .musify import Musify
EncyclopaediaMetallum = EncyclopaediaMetallum
Musify = Musify
MetadataPages = {
EncyclopaediaMetallum
EncyclopaediaMetallum,
Musify
}
AudioPages = {
Musify
}

View File

@@ -1,6 +1,10 @@
from typing import (
List
)
from typing import Optional
import requests
import logging
LOGGER = logging.getLogger("this shouldn't be used")
from ..utils import shared
from ..objects import (
Song,
@@ -9,7 +13,10 @@ from ..objects import (
Artist,
Lyrics,
Target,
MusicObject
MusicObject,
Options,
SourcePages,
Collection
)
@@ -18,6 +25,50 @@ class Page:
This is an abstract class, laying out the
functionality for every other class fetching something
"""
API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = shared.proxies
TIMEOUT = 5
TRIES = 5
SOURCE_TYPE: SourcePages
@classmethod
def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
requests.Request]:
try:
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT)
except requests.exceptions.Timeout:
return None
if r.status_code in accepted_response_codes:
return r
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})")
LOGGER.debug(r.content)
if trie <= cls.TRIES:
LOGGER.warning("to many tries. Aborting.")
return cls.get_request(url, accepted_response_codes, trie + 1)
@classmethod
def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
requests.Request]:
try:
r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT)
except requests.exceptions.Timeout:
return None
if r.status_code in accepted_response_codes:
return r
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})")
LOGGER.debug(r.content)
if trie <= cls.TRIES:
LOGGER.warning("to many tries. Aborting.")
return cls.post_request(url, accepted_response_codes, trie + 1)
class Query:
def __init__(self, query: str):
@@ -69,7 +120,7 @@ class Page:
song_str = property(fget=lambda self: self.get_str(self.song))
@classmethod
def search_by_query(cls, query: str) -> List[MusicObject]:
def search_by_query(cls, query: str) -> Options:
"""
# The Query
You can define a new parameter with "#",
@@ -84,7 +135,7 @@ class Page:
:return possible_music_objects:
"""
return []
return Options()
@classmethod
def fetch_details(cls, music_object: MusicObject, flat: bool = False) -> MusicObject:
@@ -102,16 +153,26 @@ class Page:
"""
if type(music_object) == Song:
return cls.fetch_song_details(music_object, flat=flat)
song = cls.fetch_song_details(music_object, flat=flat)
song.compile()
return song
if type(music_object) == Album:
return cls.fetch_album_details(music_object, flat=flat)
album = cls.fetch_album_details(music_object, flat=flat)
album.compile()
return album
if type(music_object) == Artist:
return cls.fetch_artist_details(music_object, flat=flat)
artist = cls.fetch_artist_details(music_object, flat=flat)
artist.compile()
return artist
raise NotImplementedError(f"MusicObject {type(music_object)} has not been implemented yet")
@classmethod
def fetch_song_from_source(cls, source: Source, flat: bool = False) -> Song:
return Song()
@classmethod
def fetch_song_details(cls, song: Song, flat: bool = False) -> Song:
"""
@@ -127,9 +188,18 @@ class Page:
:return detailed_song: it modifies the input song
"""
source: Source
for source in song.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
new_song = cls.fetch_song_from_source(source, flat)
song.merge(new_song)
return song
@classmethod
def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album:
return Album()
@classmethod
def fetch_album_details(cls, album: Album, flat: bool = False) -> Album:
"""
@@ -147,8 +217,17 @@ class Page:
:return detailed_artist: it modifies the input artist
"""
source: Source
for source in album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
new_album: Album = cls.fetch_album_from_source(source, flat)
album.merge(new_album)
return album
@classmethod
def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
return Artist()
@classmethod
def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist:
"""
@@ -163,5 +242,10 @@ class Page:
:return detailed_artist: it modifies the input artist
"""
source: Source
for source in artist.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
new_artist: Artist = cls.fetch_artist_from_source(source, flat)
artist.merge(new_artist)
return artist

View File

@@ -17,7 +17,8 @@ from ..objects import (
Album,
ID3Timestamp,
FormattedText,
Label
Label,
Options
)
from ..utils import (
string_processing
@@ -34,7 +35,7 @@ class EncyclopaediaMetallum(Page):
SOURCE_TYPE = SourcePages.ENCYCLOPAEDIA_METALLUM
@classmethod
def search_by_query(cls, query: str) -> List[MusicObject]:
def search_by_query(cls, query: str) -> Options:
query_obj = cls.Query(query)
if query_obj.is_raw:
@@ -42,14 +43,14 @@ class EncyclopaediaMetallum(Page):
return cls.advanced_search(query_obj)
@classmethod
def advanced_search(cls, query: Page.Query) -> List[MusicObject]:
def advanced_search(cls, query: Page.Query) -> Options:
if query.song is not None:
return cls.search_for_song(query=query)
return Options(cls.search_for_song(query=query))
if query.album is not None:
return cls.search_for_album(query=query)
return Options(cls.search_for_album(query=query))
if query.artist is not None:
return cls.search_for_artist(query=query)
return []
return Options(cls.search_for_artist(query=query))
return Options
@classmethod
def search_for_song(cls, query: Page.Query) -> List[Song]:

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,887 @@
from collections import defaultdict
from typing import List, Optional, Union
import requests
from bs4 import BeautifulSoup
import pycountry
import time
from urllib.parse import urlparse
from enum import Enum
from dataclasses import dataclass
from ..utils.shared import (
ENCYCLOPAEDIA_METALLUM_LOGGER as LOGGER
)
from .abstract import Page
from ..objects import (
MusicObject,
Artist,
Source,
SourcePages,
Song,
Album,
ID3Timestamp,
FormattedText,
Label,
Options,
AlbumType,
AlbumStatus
)
from ..utils import (
string_processing,
shared
)
from ..utils.shared import (
MUSIFY_LOGGER as LOGGER
)
"""
https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
https://musify.club/artist/ghost-bath-280348/releases?_pjax=#bodyContent
https://musify.club/artist/ghost-bath-280348/clips?_pjax=#bodyContent
https://musify.club/artist/ghost-bath-280348/photos?_pjax=#bodyContent
POST https://musify.club/artist/filtersongs
ID: 280348
NameForUrl: ghost-bath
Page: 1
IsAllowed: True
SortOrder.Property: dateCreated
SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
POST https://musify.club/artist/filteralbums
ArtistID: 280348
SortOrder.Property: dateCreated
SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
"""
class MusifyTypes(Enum):
ARTIST = "artist"
RELEASE = "release"
SONG = "track"
@dataclass
class MusifyUrl:
source_type: MusifyTypes
name_without_id: str
name_with_id: str
musify_id: str
url: str
class Musify(Page):
API_SESSION: requests.Session = requests.Session()
API_SESSION.headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Connection": "keep-alive",
"Referer": "https://musify.club/"
}
API_SESSION.proxies = shared.proxies
TIMEOUT = 5
TRIES = 5
HOST = "https://musify.club"
SOURCE_TYPE = SourcePages.MUSIFY
@classmethod
def parse_url(cls, url: str) -> MusifyUrl:
parsed = urlparse(url)
path = parsed.path.split("/")
split_name = path[2].split("-")
url_id = split_name[-1]
name_for_url = "-".join(split_name[:-1])
try:
type_enum = MusifyTypes(path[1])
except ValueError as e:
print(f"{path[1]} is not yet implemented, add it to MusifyTypes")
raise e
return MusifyUrl(
source_type=type_enum,
name_without_id=name_for_url,
name_with_id=path[2],
musify_id=url_id,
url=url
)
@classmethod
def search_by_query(cls, query: str) -> Options:
query_obj = cls.Query(query)
if query_obj.is_raw:
return cls.plaintext_search(query_obj.query)
return cls.plaintext_search(cls.get_plaintext_query(query_obj))
@classmethod
def get_plaintext_query(cls, query: Page.Query) -> str:
if query.album is None:
return f"{query.artist or '*'} - {query.song or '*'}"
return f"{query.artist or '*'} - {query.album or '*'} - {query.song or '*'}"
@classmethod
def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
source_list: List[Source] = []
name = None
_id = None
# source
anchor = contact.find("a")
if anchor is not None:
href = anchor.get("href")
name = anchor.get("title")
if "-" in href:
_id = href.split("-")[-1]
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
# artist image
image_soup = contact.find("img")
if image_soup is not None:
alt = image_soup.get("alt")
if alt is not None:
name = alt
artist_thumbnail = image_soup.get("src")
return Artist(
_id=_id,
name=name,
source_list=source_list
)
@classmethod
def parse_album_contact(cls, contact: BeautifulSoup) -> Album:
"""
<div class="contacts__item">
<a href="/release/ghost-bath-ghost-bath-2013-602489" title="Ghost Bath - 2013">
<div class="contacts__img release">
<img alt="Ghost Bath" class="lozad" data-src="https://37s.musify.club/img/69/9060265/24178833.jpg"/>
<noscript><img alt="Ghost Bath" src="https://37s.musify.club/img/69/9060265/24178833.jpg"/></noscript>
</div>
<div class="contacts__info">
<strong>Ghost Bath - 2013</strong>
<small>Ghost Bath</small>
<small>Треков: 4</small> <!--tracks-->
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
</div>
</a>
</div>
"""
source_list: List[Source] = []
title = None
_id = None
year = None
artist_list: List[Artist] = []
def parse_title_date(title_date: Optional[str], delimiter: str = " - "):
nonlocal year
nonlocal title
if title_date is None:
return
title_date = title_date.strip()
split_attr = title_date.split(delimiter)
if len(split_attr) < 2:
return
if not split_attr[-1].isdigit():
return
year = int(split_attr[-1])
title = delimiter.join(split_attr[:-1])
# source
anchor = contact.find("a")
if anchor is not None:
href = anchor.get("href")
# get the title and year
parse_title_date(anchor.get("title"))
if "-" in href:
_id = href.split("-")[-1]
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
# cover art
image_soup = contact.find("img")
if image_soup is not None:
alt = image_soup.get("alt")
if alt is not None:
title = alt
cover_art = image_soup.get("src")
contact_info_soup = contact.find("div", {"class": "contacts__info"})
if contact_info_soup is not None:
"""
<strong>Ghost Bath - 2013</strong>
<small>Ghost Bath</small>
<small>Треков: 4</small> <!--tracks-->
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
"""
title_soup = contact_info_soup.find("strong")
if title_soup is None:
parse_title_date(title_soup)
small_list = contact_info_soup.find_all("small")
if len(small_list) == 3:
# artist
artist_soup: BeautifulSoup = small_list[0]
raw_artist_str = artist_soup.text
for artist_str in raw_artist_str.split("&\r\n"):
artist_str = artist_str.rstrip("& ...\r\n")
artist_str = artist_str.strip()
if artist_str.endswith("]") and "[" in artist_str:
artist_str = artist_str.rsplit("[", maxsplit=1)[0]
artist_list.append(Artist(name=artist_str))
track_count_soup: BeautifulSoup = small_list[1]
rating_soup: BeautifulSoup = small_list[2]
else:
LOGGER.warning("got an unequal ammount than 3 small elements")
return cls.ALBUM_CACHE.append(Album(
_id=_id,
title=title,
source_list=source_list,
date=ID3Timestamp(year=year),
artist_list=artist_list
))
@classmethod
def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]:
# print(contact_container_soup.prettify)
contacts = []
# print(contact_container_soup)
contact: BeautifulSoup
for contact in contact_container_soup.find_all("div", {"class": "contacts__item"}):
anchor_soup = contact.find("a")
if anchor_soup is not None:
url = anchor_soup.get("href")
if url is not None:
# print(url)
if "artist" in url:
contacts.append(cls.parse_artist_contact(contact))
elif "release" in url:
contacts.append(cls.parse_album_contact(contact))
return contacts
@classmethod
def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song:
_id = None
song_title = playlist_item_soup.get("data-name")
artist_list: List[Artist] = []
source_list: List[Source] = []
# details
playlist_details: BeautifulSoup = playlist_item_soup.find("div", {"class", "playlist__heading"})
if playlist_details is not None:
anchor_list = playlist_details.find_all("a")
if len(anchor_list) >= 2:
print(anchor_list)
# artists
artist_anchor: BeautifulSoup
for artist_anchor in anchor_list[:-1]:
_id = None
href = artist_anchor.get("href")
artist_source: Source = Source(cls.SOURCE_TYPE, cls.HOST + href)
if "-" in href:
_id = href.split("-")[-1]
artist_list.append(Artist(
_id=_id,
name=artist_anchor.get_text(strip=True),
source_list=[artist_source]
))
# track
track_soup: BeautifulSoup = anchor_list[-1]
"""
TODO
this anchor text may have something like (feat. some artist)
which is not acceptable
"""
href = track_soup.get("href")
if href is not None:
if "-" in href:
raw_id: str = href.split("-")[-1]
if raw_id.isdigit():
_id = raw_id
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
else:
LOGGER.warning("there are not enough anchors (2) for artist and track")
LOGGER.warning(str(artist_list))
"""
artist_name = playlist_item_soup.get("data-artist")
if artist_name is not None:
artist_list.append(Artist(name=artist_name))
"""
id_attribute = playlist_item_soup.get("id")
if id_attribute is not None:
raw_id = id_attribute.replace("playerDiv", "")
if raw_id.isdigit():
_id = raw_id
return Song(
_id=_id,
title=song_title,
main_artist_list=artist_list,
source_list=source_list
)
@classmethod
def parse_playlist_soup(cls, playlist_soup: BeautifulSoup) -> List[Song]:
song_list = []
for playlist_item_soup in playlist_soup.find_all("div", {"class": "playlist__item"}):
song_list.append(cls.parse_playlist_item(playlist_item_soup))
return song_list
@classmethod
def plaintext_search(cls, query: str) -> Options:
search_results = []
r = cls.get_request(f"https://musify.club/search?searchText={query}")
if r is None:
return Options()
search_soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
# album and songs
# child of div class: contacts row
for contact_container_soup in search_soup.find_all("div", {"class": "contacts"}):
search_results.extend(cls.parse_contact_container(contact_container_soup))
# song
# div class: playlist__item
for playlist_soup in search_soup.find_all("div", {"class": "playlist"}):
search_results.extend(cls.parse_playlist_soup(playlist_soup))
return Options(search_results)
@classmethod
def parse_album_card(cls, album_card: BeautifulSoup, artist_name: str = None) -> Album:
"""
<div class="card release-thumbnail" data-type="2">
<a href="/release/ghost-bath-self-loather-2021-1554266">
<img alt="Self Loather" class="card-img-top lozad" data-src="https://40s-a.musify.club/img/70/24826582/62624396.jpg"/>
<noscript><img alt="Self Loather" src="https://40s-a.musify.club/img/70/24826582/62624396.jpg"/></noscript>
</a>
<div class="card-body">
<h4 class="card-subtitle">
<a href="/release/ghost-bath-self-loather-2021-1554266">Self Loather</a>
</h4>
</div>
<div class="card-footer"><p class="card-text"><a href="/albums/2021">2021</a></p></div>
<div class="card-footer">
<p class="card-text genre__labels">
<a href="/genre/depressive-black-132">Depressive Black</a><a href="/genre/post-black-metal-295">Post-Black Metal</a> </p>
</div>
<div class="card-footer">
<small><i class="zmdi zmdi-calendar" title="Добавлено"></i> 13.11.2021</small>
<small><i class="zmdi zmdi-star zmdi-hc-fw" title="Рейтинг"></i> 5,88</small>
</div>
</div>
"""
album_type_map = defaultdict(lambda: AlbumType.OTHER, {
1: AlbumType.OTHER, # literally other xD
2: AlbumType.STUDIO_ALBUM,
3: AlbumType.EP,
4: AlbumType.SINGLE,
5: AlbumType.OTHER, # BOOTLEG
6: AlbumType.LIVE_ALBUM,
7: AlbumType.COMPILATION_ALBUM, # compilation of different artists
8: AlbumType.MIXTAPE,
9: AlbumType.DEMO,
10: AlbumType.MIXTAPE, # DJ Mixes
11: AlbumType.COMPILATION_ALBUM, # compilation of only this artist
13: AlbumType.COMPILATION_ALBUM, # unofficial
14: AlbumType.MIXTAPE # "Soundtracks"
})
_id: Optional[str] = None
name: str = None
source_list: List[Source] = []
timestamp: Optional[ID3Timestamp] = None
album_status = None
def set_name(new_name: str):
nonlocal name
nonlocal artist_name
# example of just setting not working: https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067
if new_name.count(" - ") != 1:
name = new_name
return
potential_artist_list, potential_name = new_name.split(" - ")
unified_artist_list = string_processing.unify(potential_artist_list)
if artist_name is not None:
if string_processing.unify(artist_name) not in unified_artist_list:
name = new_name
return
name = potential_name
return
name = new_name
album_status_id = album_card.get("data-type")
if album_status_id.isdigit():
album_status_id = int(album_status_id)
album_type = album_type_map[album_status_id]
if album_status_id == 5:
album_status = AlbumStatus.BOOTLEG
def parse_release_anchor(_anchor: BeautifulSoup, text_is_name=False):
nonlocal _id
nonlocal name
nonlocal source_list
if _anchor is None:
return
href = _anchor.get("href")
if href is not None:
# add url to sources
source_list.append(Source(
cls.SOURCE_TYPE,
cls.HOST + href
))
# split id from url
split_href = href.split("-")
if len(split_href) > 1:
_id = split_href[-1]
if not text_is_name:
return
set_name(_anchor.text)
anchor_list = album_card.find_all("a", recursive=False)
if len(anchor_list) > 0:
anchor = anchor_list[0]
parse_release_anchor(anchor)
thumbnail: BeautifulSoup = anchor.find("img")
if thumbnail is not None:
alt = thumbnail.get("alt")
if alt is not None:
set_name(alt)
image_url = thumbnail.get("src")
else:
LOGGER.debug("the card has no thumbnail or url")
card_body = album_card.find("div", {"class": "card-body"})
if card_body is not None:
parse_release_anchor(card_body.find("a"), text_is_name=True)
def parse_small_date(small_soup: BeautifulSoup):
"""
<small>
<i class="zmdi zmdi-calendar" title="Добавлено"></i>
13.11.2021
</small>
"""
nonlocal timestamp
italic_tagging_soup: BeautifulSoup = small_soup.find("i")
if italic_tagging_soup is None:
return
if italic_tagging_soup.get("title") != "Добавлено":
# "Добавлено" can be translated to "Added (at)"
return
raw_time = small_soup.text.strip()
timestamp = ID3Timestamp.strptime(raw_time, "%d.%m.%Y")
# parse small date
card_footer_list = album_card.find_all("div", {"class": "card-footer"})
if len(card_footer_list) != 3:
LOGGER.debug("there are not exactly 3 card footers in a card")
if len(card_footer_list) > 0:
for any_small_soup in card_footer_list[-1].find_all("small"):
parse_small_date(any_small_soup)
else:
LOGGER.debug("there is not even 1 footer in the album card")
return cls.ALBUM_CACHE.append(Album(
_id=_id,
title=name,
source_list=source_list,
date=timestamp,
album_type=album_type,
album_status=album_status
))
@classmethod
def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]:
"""
POST https://musify.club/artist/filteralbums
ArtistID: 280348
SortOrder.Property: dateCreated
SortOrder.IsAscending: false
X-Requested-With: XMLHttpRequest
"""
endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums"
r = cls.post_request(url=endpoint, json={
"ArtistID": str(url.musify_id),
"SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": False,
"X-Requested-With": "XMLHttpRequest"
})
if r is None:
return []
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
discography: List[Album] = []
for card_soup in soup.find_all("div", {"class": "card"}):
new_album: Album = cls.parse_album_card(card_soup, artist_name)
album_source: Source
if not flat:
for album_source in new_album.source_collection.get_sources_from_page(cls.SOURCE_TYPE):
new_album.merge(cls.fetch_album_from_source(album_source))
discography.append(new_album)
return discography
@classmethod
def get_artist_attributes(cls, url: MusifyUrl) -> Artist:
"""
fetches the main Artist attributes from this endpoint
https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
it needs to parse html
:param url:
:return:
"""
r = cls.get_request(f"https://musify.club/{url.source_type.value}/{url.name_with_id}?_pjax=#bodyContent")
if r is None:
return Artist(_id=url.musify_id)
soup = BeautifulSoup(r.content, "html.parser")
"""
<ol class="breadcrumb" itemscope="" itemtype="http://schema.org/BreadcrumbList">
<li class="breadcrumb-item" itemprop="itemListElement" itemscope="" itemtype="http://schema.org/ListItem"><a href="/" itemprop="item"><span itemprop="name">Главная</span><meta content="1" itemprop="position"/></a></li>
<li class="breadcrumb-item" itemprop="itemListElement" itemscope="" itemtype="http://schema.org/ListItem"><a href="/artist" itemprop="item"><span itemprop="name">Исполнители</span><meta content="2" itemprop="position"/></a></li>
<li class="breadcrumb-item active">Ghost Bath</li>
</ol>
<ul class="nav nav-tabs nav-fill">
<li class="nav-item"><a class="active nav-link" href="/artist/ghost-bath-280348">песни (41)</a></li>
<li class="nav-item"><a class="nav-link" href="/artist/ghost-bath-280348/releases">альбомы (12)</a></li>
<li class="nav-item"><a class="nav-link" href="/artist/ghost-bath-280348/clips">видеоклипы (23)</a></li>
<li class="nav-item"><a class="nav-link" href="/artist/ghost-bath-280348/photos">фото (38)</a></li>
</ul>
<header class="content__title">
<h1>Ghost Bath</h1>
<div class="actions">
...
</div>
</header>
<ul class="icon-list">
<li>
<i class="zmdi zmdi-globe zmdi-hc-fw" title="Страна"></i>
<i class="flag-icon US shadow"></i>
Соединенные Штаты
</li>
</ul>
"""
name = None
source_list: List[Source] = []
country = None
notes: FormattedText = FormattedText()
breadcrumbs: BeautifulSoup = soup.find("ol", {"class": "breadcrumb"})
if breadcrumbs is not None:
breadcrumb_list: List[BeautifulSoup] = breadcrumbs.find_all("li", {"class": "breadcrumb-item"}, recursive=False)
if len(breadcrumb_list) == 3:
name = breadcrumb_list[-1].get_text(strip=True)
else:
LOGGER.debug("breadcrumb layout on artist page changed")
nav_tabs: BeautifulSoup = soup.find("ul", {"class": "nav-tabs"})
if nav_tabs is not None:
list_item: BeautifulSoup
for list_item in nav_tabs.find_all("li", {"class": "nav-item"}, recursive=False):
if not list_item.get_text(strip=True).startswith("песни"):
# "песни" translates to "songs"
continue
anchor: BeautifulSoup = list_item.find("a")
if anchor is None:
continue
href = anchor.get("href")
if href is None:
continue
source_list.append(Source(
cls.SOURCE_TYPE,
cls.HOST + href
))
content_title: BeautifulSoup = soup.find("header", {"class": "content__title"})
if content_title is not None:
h1_name: BeautifulSoup = content_title.find("h1", recursive=False)
if h1_name is not None:
name = h1_name.get_text(strip=True)
# country and sources
icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"})
if icon_list is not None:
country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"})
if country_italic is not None:
style_classes: set = {'flag-icon', 'shadow'}
classes: set = set(country_italic.get("class"))
country_set: set = classes.difference(style_classes)
if len(country_set) != 1:
LOGGER.debug("the country set contains multiple values")
if len(country_set) != 0:
"""
This is the css file, where all flags that can be used on musify
are laid out and styled.
Every flag has two upper case letters, thus I assume they follow the alpha_2
standard, though I haven't checked.
https://musify.club/content/flags.min.css
"""
country = pycountry.countries.get(alpha_2=list(country_set)[0])
# get all additional sources
additional_source: BeautifulSoup
for additional_source in icon_list.find_all("a", {"class", "link"}):
href = additional_source.get("href")
if href is None:
continue
new_src = Source.match_url(href)
if new_src is None:
continue
source_list.append(new_src)
note_soup: BeautifulSoup = soup.find(id="text-main")
if note_soup is not None:
notes.html = note_soup.decode_contents()
return Artist(
_id=url.musify_id,
name=name,
country=country,
source_list=source_list,
notes=notes
)
@classmethod
def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
"""
fetches artist from source
[x] discography
[x] attributes
[] picture gallery
Args:
source (Source): the source to fetch
flat (bool, optional): if it is false, every album from discograohy will be fetched. Defaults to False.
Returns:
Artist: the artist fetched
"""
url = cls.parse_url(source.url)
artist = cls.get_artist_attributes(url)
discography: List[Album] = cls.get_discography(url, artist.name)
artist.main_album_collection.extend(discography)
return artist
@classmethod
def parse_song_card(cls, song_card: BeautifulSoup) -> Song:
"""
<div id="playerDiv3051" class="playlist__item" itemprop="track" itemscope="itemscope" itemtype="http://schema.org/MusicRecording" data-artist="Linkin Park" data-name="Papercut">
<div id="play_3051" class="playlist__control play" data-url="/track/play/3051/linkin-park-papercut.mp3" data-position="1" data-title="Linkin Park - Papercut" title="Слушать Linkin Park - Papercut">
<span class="ico-play"><i class="zmdi zmdi-play-circle-outline zmdi-hc-2-5x"></i></span>
<span class="ico-pause"><i class="zmdi zmdi-pause-circle-outline zmdi-hc-2-5x"></i></span>
</div>
<div class="playlist__position">
1
</div>
<div class="playlist__details">
<div class="playlist__heading">
<a href="/artist/linkin-park-5" rel="nofollow">Linkin Park</a> - <a class="strong" href="/track/linkin-park-papercut-3051">Papercut</a>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/linkin-park-5" itemprop="url" />
<meta content="Linkin Park" itemprop="name" />
</span>
</div>
</div>
<div>
<div class="track__details track__rating hidden-xs-down">
<span class="text-muted">
<i class="zmdi zmdi-star-circle zmdi-hc-1-3x" title="Рейтинг"></i>
326,3K
</span>
</div>
</div>
<div class="track__details hidden-xs-down">
<span class="text-muted">03:05</span>
<span class="text-muted">320 Кб/с</span>
</div>
<div class="track__details hidden-xs-down">
<span title='Есть видео Linkin Park - Papercut'><i class='zmdi zmdi-videocam zmdi-hc-1-3x'></i></span>
<span title='Есть текст Linkin Park - Papercut'><i class='zmdi zmdi-file-text zmdi-hc-1-3x'></i></span>
</div>
<div class="playlist__actions">
<span class="pl-btn save-to-pl" id="add_3051" title="Сохранить в плейлист"><i class="zmdi zmdi-plus zmdi-hc-1-5x"></i></span>
<a target="_blank" itemprop="audio" download="Linkin Park - Papercut.mp3" href="/track/dl/3051/linkin-park-papercut.mp3" class="no-ajaxy yaBrowser" id="dl_3051" title='Скачать Linkin Park - Papercut'>
<span><i class="zmdi zmdi-download zmdi-hc-2-5x"></i></span>
</a>
</div>
</div>
"""
song_name = song_card.get("data-name")
artist_list: List[Artist] = []
source_list: List[Source] = []
tracksort = None
def parse_title(_title: str) -> str:
return _title
"""
# get from parent div
_artist_name = song_card.get("data-artist")
if _artist_name is not None:
artist_list.append(Artist(name=_artist_name))
"""
# get tracksort
tracksort_soup: BeautifulSoup = song_card.find("div", {"class": "playlist__position"})
if tracksort_soup is not None:
raw_tracksort: str = tracksort_soup.get_text(strip=True)
if raw_tracksort.isdigit():
tracksort = int(raw_tracksort)
# playlist details
playlist_details: BeautifulSoup = song_card.find("div", {"class": "playlist__details"})
if playlist_details is not None:
"""
<div class="playlist__heading">
<a href="/artist/tamas-141317" rel="nofollow">Tamas</a> ft.<a href="/artist/zombiez-630767" rel="nofollow">Zombiez</a> - <a class="strong" href="/track/tamas-zombiez-voodoo-feat-zombiez-16185276">Voodoo (Feat. Zombiez)</a>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/tamas-141317" itemprop="url" />
<meta content="Tamas" itemprop="name" />
</span>
<span itemprop="byArtist" itemscope="itemscope" itemtype="http://schema.org/MusicGroup">
<meta content="/artist/zombiez-630767" itemprop="url" />
<meta content="Zombiez" itemprop="name" />
</span>
</div>
"""
# track
anchor_list: List[BeautifulSoup] = playlist_details.find_all("a")
if len(anchor_list) > 1:
track_anchor: BeautifulSoup = anchor_list[-1]
href: str = track_anchor.get("href")
if href is not None:
source_list.append(Source(cls.SOURCE_TYPE, cls.HOST + href))
song_name = parse_title(track_anchor.get_text(strip=True))
# artist
artist_span: BeautifulSoup
for artist_span in playlist_details.find_all("span", {"itemprop": "byArtist"}):
_artist_src = None
_artist_name = None
meta_artist_src = artist_span.find("meta", {"itemprop": "url"})
if meta_artist_src is not None:
meta_artist_url = meta_artist_src.get("content")
if meta_artist_url is not None:
_artist_src = [Source(cls.SOURCE_TYPE, cls.HOST + meta_artist_url)]
meta_artist_name = artist_span.find("meta", {"itemprop": "name"})
if meta_artist_name is not None:
meta_artist_name_text = meta_artist_name.get("content")
_artist_name = meta_artist_name_text
if _artist_name is not None or _artist_src is not None:
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
return Song(
title=song_name,
tracksort=tracksort,
main_artist_list=artist_list
)
@classmethod
def fetch_album_from_source(cls, source: Source, flat: bool = False) -> Album:
"""
fetches album from source:
eg. 'https://musify.club/release/linkin-park-hybrid-theory-2000-188'
/html/musify/album_overview.html
[] tracklist
[] attributes
[] ratings
:param source:
:param flat:
:return:
"""
album = Album(title="Hi :)")
url = cls.parse_url(source.url)
endpoint = cls.HOST + "/release/" + url.name_with_id
r = cls.get_request(endpoint)
if r is None:
return album
soup = BeautifulSoup(r.content, "html.parser")
# <div class="card"><div class="card-body">...</div></div>
cards_soup: BeautifulSoup = soup.find("div", {"class": "card-body"})
if cards_soup is not None:
card_soup: BeautifulSoup
for card_soup in cards_soup.find_all("div", {"class": "playlist__item"}):
album.song_collection.append(cls.parse_song_card(card_soup))
album.update_tracksort()
return album