2023-03-17 12:58:58 +00:00
from collections import defaultdict
2023-04-04 18:58:22 +00:00
from dataclasses import dataclass
from enum import Enum
2024-04-17 12:15:56 +00:00
from typing import List , Optional , Type , Union , Generator
2023-04-04 18:58:22 +00:00
from urllib . parse import urlparse
import pycountry
2023-03-13 14:47:38 +00:00
from bs4 import BeautifulSoup
2023-04-20 20:30:45 +00:00
from . . connection import Connection
2023-03-13 14:47:38 +00:00
from . abstract import Page
2023-04-18 10:00:25 +00:00
from . . utils . enums . source import SourcePages
2023-04-18 10:14:34 +00:00
from . . utils . enums . album import AlbumType , AlbumStatus
2023-03-13 14:47:38 +00:00
from . . objects import (
Artist ,
Source ,
Song ,
Album ,
ID3Timestamp ,
FormattedText ,
Label ,
2023-05-24 15:32:22 +00:00
Target ,
2023-06-20 10:03:11 +00:00
DatabaseObject ,
2024-04-11 18:29:05 +00:00
Lyrics ,
Artwork
2023-03-13 14:47:38 +00:00
)
2023-09-10 14:27:09 +00:00
from . . utils . config import logging_settings
2023-04-04 18:58:22 +00:00
from . . utils import string_processing , shared
2024-04-25 22:23:04 +00:00
from . . utils . string_processing import clean_song_title
2023-10-23 14:21:44 +00:00
from . . utils . support_classes . query import Query
from . . utils . support_classes . download_result import DownloadResult
2023-03-13 14:47:38 +00:00
2023-03-16 21:52:47 +00:00
"""
https : / / musify . club / artist / ghost - bath - 280348 ? _pjax = #bodyContent
https : / / musify . club / artist / ghost - bath - 280348 / releases ? _pjax = #bodyContent
https : / / musify . club / artist / ghost - bath - 280348 / clips ? _pjax = #bodyContent
https : / / musify . club / artist / ghost - bath - 280348 / photos ? _pjax = #bodyContent
POST https : / / musify . club / artist / filtersongs
ID : 280348
NameForUrl : ghost - bath
Page : 1
IsAllowed : True
SortOrder . Property : dateCreated
SortOrder . IsAscending : false
X - Requested - With : XMLHttpRequest
POST https : / / musify . club / artist / filteralbums
ArtistID : 280348
SortOrder . Property : dateCreated
SortOrder . IsAscending : false
X - Requested - With : XMLHttpRequest
"""
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
class MusifyTypes ( Enum ) :
ARTIST = " artist "
2023-03-20 20:50:19 +00:00
RELEASE = " release "
2023-03-20 21:27:05 +00:00
SONG = " track "
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
@dataclass
class MusifyUrl :
source_type : MusifyTypes
name_without_id : str
name_with_id : str
musify_id : str
url : str
2023-05-24 15:32:22 +00:00
2023-03-17 11:31:56 +00:00
2023-05-24 15:32:22 +00:00
ALBUM_TYPE_MAP = defaultdict ( lambda : AlbumType . OTHER , {
1 : AlbumType . OTHER , # literally other xD
2 : AlbumType . STUDIO_ALBUM ,
3 : AlbumType . EP ,
4 : AlbumType . SINGLE ,
5 : AlbumType . OTHER , # BOOTLEG
6 : AlbumType . LIVE_ALBUM ,
7 : AlbumType . COMPILATION_ALBUM , # compilation of different artists
8 : AlbumType . MIXTAPE ,
9 : AlbumType . DEMO ,
10 : AlbumType . MIXTAPE , # DJ Mixes
11 : AlbumType . COMPILATION_ALBUM , # compilation of only this artist
12 : AlbumType . STUDIO_ALBUM , # split
13 : AlbumType . COMPILATION_ALBUM , # unofficial
14 : AlbumType . MIXTAPE # "Soundtracks"
} )
2023-03-13 14:47:38 +00:00
2023-05-24 15:32:22 +00:00
def parse_url ( url : str ) - > MusifyUrl :
parsed = urlparse ( url )
path = parsed . path . split ( " / " )
split_name = path [ 2 ] . split ( " - " )
url_id = split_name [ - 1 ]
name_for_url = " - " . join ( split_name [ : - 1 ] )
try :
type_enum = MusifyTypes ( path [ 1 ] )
except ValueError as e :
2023-09-10 14:27:09 +00:00
logging_settings [ " musify_logger " ] . warning ( f " { path [ 1 ] } is not yet implemented, add it to MusifyTypes " )
2023-05-24 15:32:22 +00:00
raise e
return MusifyUrl (
source_type = type_enum ,
name_without_id = name_for_url ,
name_with_id = path [ 2 ] ,
musify_id = url_id ,
url = url
2023-04-20 20:30:45 +00:00
)
2023-05-24 15:32:22 +00:00
2023-05-24 16:02:19 +00:00
class Musify ( Page ) :
2023-05-24 15:32:22 +00:00
# CHANGE
2023-03-13 14:47:38 +00:00
SOURCE_TYPE = SourcePages . MUSIFY
2023-09-10 14:27:09 +00:00
LOGGER = logging_settings [ " musify_logger " ]
2023-04-03 10:14:58 +00:00
2023-05-24 15:32:22 +00:00
HOST = " https://musify.club "
2023-05-25 11:46:47 +00:00
def __init__ ( self , * args , * * kwargs ) :
2023-05-24 15:32:22 +00:00
self . connection : Connection = Connection (
host = " https://musify.club/ " ,
2024-04-11 18:13:12 +00:00
logger = self . LOGGER ,
2024-04-25 23:05:22 +00:00
module = " musify " ,
2024-04-11 18:13:12 +00:00
)
self . stream_connection : Connection = Connection (
host = " https://musify.club/ " ,
logger = self . LOGGER ,
semantic_not_found = False ,
2023-03-20 20:50:19 +00:00
)
2023-05-24 15:32:22 +00:00
2023-05-25 11:46:47 +00:00
super ( ) . __init__ ( * args , * * kwargs )
2023-03-20 20:50:19 +00:00
2023-05-24 15:32:22 +00:00
def get_source_type ( self , source : Source ) - > Optional [ Type [ DatabaseObject ] ] :
if source . url is None :
return None
musify_url = parse_url ( source . url )
2023-05-24 16:02:19 +00:00
# Has no labels, because afaik musify has no Labels
2023-05-24 15:32:22 +00:00
musify_type_to_database_type = {
MusifyTypes . SONG : Song ,
MusifyTypes . RELEASE : Album ,
MusifyTypes . ARTIST : Artist
}
return musify_type_to_database_type . get ( musify_url . source_type )
def _parse_artist_contact ( self , contact : BeautifulSoup ) - > Artist :
2023-03-15 19:55:28 +00:00
source_list : List [ Source ] = [ ]
2023-03-18 12:01:27 +00:00
name = None
2023-03-15 19:55:28 +00:00
_id = None
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
# source
anchor = contact . find ( " a " )
if anchor is not None :
href = anchor . get ( " href " )
name = anchor . get ( " title " )
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
if " - " in href :
_id = href . split ( " - " ) [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href ) )
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
# artist image
image_soup = contact . find ( " img " )
if image_soup is not None :
alt = image_soup . get ( " alt " )
if alt is not None :
name = alt
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
artist_thumbnail = image_soup . get ( " src " )
2023-03-17 12:11:18 +00:00
2023-03-22 11:58:11 +00:00
return Artist (
2023-03-15 19:55:28 +00:00
name = name ,
source_list = source_list
2023-03-22 11:58:11 +00:00
)
2023-05-24 15:32:22 +00:00
def _parse_album_contact ( self , contact : BeautifulSoup ) - > Album :
2023-03-16 13:36:49 +00:00
"""
< div class = " contacts__item " >
< a href = " /release/ghost-bath-ghost-bath-2013-602489 " title = " Ghost Bath - 2013 " >
< div class = " contacts__img release " >
< img alt = " Ghost Bath " class = " lozad " data - src = " https://37s.musify.club/img/69/9060265/24178833.jpg " / >
< noscript > < img alt = " Ghost Bath " src = " https://37s.musify.club/img/69/9060265/24178833.jpg " / > < / noscript >
< / div >
< div class = " contacts__info " >
< strong > Ghost Bath - 2013 < / strong >
< small > Ghost Bath < / small >
< small > Треков : 4 < / small > < ! - - tracks - - >
< small > < i class = " zmdi zmdi-star zmdi-hc-fw " > < / i > 9 , 04 < / small >
< / div >
< / a >
< / div >
"""
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
source_list : List [ Source ] = [ ]
2023-03-18 12:01:27 +00:00
title = None
2023-03-16 13:36:49 +00:00
_id = None
year = None
artist_list : List [ Artist ] = [ ]
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
def parse_title_date ( title_date : Optional [ str ] , delimiter : str = " - " ) :
2023-03-18 12:01:27 +00:00
nonlocal year
nonlocal title
2023-03-16 13:36:49 +00:00
if title_date is None :
return
2023-03-17 12:11:18 +00:00
title_date = title_date . strip ( )
2023-03-16 13:36:49 +00:00
split_attr = title_date . split ( delimiter )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
if len ( split_attr ) < 2 :
return
if not split_attr [ - 1 ] . isdigit ( ) :
return
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
year = int ( split_attr [ - 1 ] )
title = delimiter . join ( split_attr [ : - 1 ] )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
# source
anchor = contact . find ( " a " )
if anchor is not None :
href = anchor . get ( " href " )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
# get the title and year
parse_title_date ( anchor . get ( " title " ) )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
if " - " in href :
_id = href . split ( " - " ) [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href ) )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
# cover art
image_soup = contact . find ( " img " )
if image_soup is not None :
alt = image_soup . get ( " alt " )
if alt is not None :
title = alt
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
cover_art = image_soup . get ( " src " )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
contact_info_soup = contact . find ( " div " , { " class " : " contacts__info " } )
if contact_info_soup is not None :
"""
< strong > Ghost Bath - 2013 < / strong >
< small > Ghost Bath < / small >
< small > Треков : 4 < / small > < ! - - tracks - - >
< small > < i class = " zmdi zmdi-star zmdi-hc-fw " > < / i > 9 , 04 < / small >
"""
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
title_soup = contact_info_soup . find ( " strong " )
if title_soup is None :
parse_title_date ( title_soup )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
small_list = contact_info_soup . find_all ( " small " )
if len ( small_list ) == 3 :
# artist
artist_soup : BeautifulSoup = small_list [ 0 ]
raw_artist_str = artist_soup . text
for artist_str in raw_artist_str . split ( " & \r \n " ) :
artist_str = artist_str . rstrip ( " & ... \r \n " )
artist_str = artist_str . strip ( )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
if artist_str . endswith ( " ] " ) and " [ " in artist_str :
artist_str = artist_str . rsplit ( " [ " , maxsplit = 1 ) [ 0 ]
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
artist_list . append ( Artist ( name = artist_str ) )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
track_count_soup : BeautifulSoup = small_list [ 1 ]
rating_soup : BeautifulSoup = small_list [ 2 ]
else :
2023-05-24 15:32:22 +00:00
self . LOGGER . warning ( " got an unequal ammount than 3 small elements " )
2023-03-17 12:11:18 +00:00
2023-03-27 14:20:16 +00:00
return Album (
2023-03-16 13:36:49 +00:00
title = title ,
source_list = source_list ,
date = ID3Timestamp ( year = year ) ,
artist_list = artist_list
2023-03-27 14:20:16 +00:00
)
2023-05-24 15:32:22 +00:00
def _parse_contact_container ( self , contact_container_soup : BeautifulSoup ) - > List [ Union [ Artist , Album ] ] :
2023-03-15 19:55:28 +00:00
contacts = [ ]
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
contact : BeautifulSoup
for contact in contact_container_soup . find_all ( " div " , { " class " : " contacts__item " } ) :
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
anchor_soup = contact . find ( " a " )
2023-03-16 13:36:49 +00:00
2023-03-15 19:55:28 +00:00
if anchor_soup is not None :
url = anchor_soup . get ( " href " )
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
if url is not None :
if " artist " in url :
2023-05-24 15:32:22 +00:00
contacts . append ( self . _parse_artist_contact ( contact ) )
2023-03-15 19:55:28 +00:00
elif " release " in url :
2023-05-24 15:32:22 +00:00
contacts . append ( self . _parse_album_contact ( contact ) )
2023-03-15 19:55:28 +00:00
return contacts
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
def _parse_playlist_item ( self , playlist_item_soup : BeautifulSoup ) - > Song :
2023-03-16 15:57:43 +00:00
_id = None
2023-03-18 12:01:27 +00:00
song_title = playlist_item_soup . get ( " data-name " )
2023-03-16 15:57:43 +00:00
artist_list : List [ Artist ] = [ ]
source_list : List [ Source ] = [ ]
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
# details
playlist_details : BeautifulSoup = playlist_item_soup . find ( " div " , { " class " , " playlist__heading " } )
if playlist_details is not None :
anchor_list = playlist_details . find_all ( " a " )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
if len ( anchor_list ) > = 2 :
# artists
2023-03-17 12:11:18 +00:00
artist_anchor : BeautifulSoup
2023-03-16 15:57:43 +00:00
for artist_anchor in anchor_list [ : - 1 ] :
_id = None
href = artist_anchor . get ( " href " )
2023-05-24 15:32:22 +00:00
artist_source : Source = Source ( self . SOURCE_TYPE , self . HOST + href )
2023-03-16 15:57:43 +00:00
if " - " in href :
_id = href . split ( " - " ) [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
artist_list . append ( Artist (
name = artist_anchor . get_text ( strip = True ) ,
source_list = [ artist_source ]
) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
# track
track_soup : BeautifulSoup = anchor_list [ - 1 ]
"""
TODO
this anchor text may have something like ( feat . some artist )
which is not acceptable
"""
href = track_soup . get ( " href " )
if href is not None :
if " - " in href :
raw_id : str = href . split ( " - " ) [ - 1 ]
if raw_id . isdigit ( ) :
_id = raw_id
2023-05-24 15:32:22 +00:00
source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href ) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
else :
2023-05-24 15:32:22 +00:00
self . LOGGER . debug ( " there are not enough anchors (2) for artist and track " )
self . LOGGER . debug ( str ( artist_list ) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
"""
artist_name = playlist_item_soup . get ( " data-artist " )
if artist_name is not None :
artist_list . append ( Artist ( name = artist_name ) )
"""
id_attribute = playlist_item_soup . get ( " id " )
if id_attribute is not None :
raw_id = id_attribute . replace ( " playerDiv " , " " )
if raw_id . isdigit ( ) :
_id = raw_id
2023-03-17 12:11:18 +00:00
2024-04-25 22:23:04 +00:00
2023-03-16 15:57:43 +00:00
return Song (
2024-04-25 22:23:04 +00:00
title = clean_song_title ( song_title , artist_name = artist_list [ 0 ] . name if len ( artist_list ) > 0 else None ) ,
2023-03-16 15:57:43 +00:00
main_artist_list = artist_list ,
source_list = source_list
)
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
def _parse_playlist_soup ( self , playlist_soup : BeautifulSoup ) - > List [ Song ] :
2023-03-16 15:57:43 +00:00
song_list = [ ]
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
for playlist_item_soup in playlist_soup . find_all ( " div " , { " class " : " playlist__item " } ) :
2023-05-24 15:32:22 +00:00
song_list . append ( self . _parse_playlist_item ( playlist_item_soup ) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
return song_list
2023-05-24 15:32:22 +00:00
def general_search ( self , search_query : str ) - > List [ DatabaseObject ] :
2023-03-15 19:55:28 +00:00
search_results = [ ]
2023-03-17 12:11:18 +00:00
2024-04-26 19:37:53 +00:00
r = self . connection . get ( f " https://musify.club/search?searchText= { search_query } " , name = " search_ " + search_query )
2023-03-17 17:16:06 +00:00
if r is None :
2023-05-24 15:32:22 +00:00
return [ ]
search_soup : BeautifulSoup = self . get_soup_from_response ( r )
2023-03-17 12:11:18 +00:00
2023-03-14 13:48:03 +00:00
# album and songs
# child of div class: contacts row
2023-03-15 19:55:28 +00:00
for contact_container_soup in search_soup . find_all ( " div " , { " class " : " contacts " } ) :
2023-05-24 15:32:22 +00:00
search_results . extend ( self . _parse_contact_container ( contact_container_soup ) )
2023-03-17 12:11:18 +00:00
2023-03-14 13:48:03 +00:00
# song
# div class: playlist__item
for playlist_soup in search_soup . find_all ( " div " , { " class " : " playlist " } ) :
2023-05-24 15:32:22 +00:00
search_results . extend ( self . _parse_playlist_soup ( playlist_soup ) )
2023-03-14 13:48:03 +00:00
2023-05-24 15:32:22 +00:00
return search_results
def fetch_song ( self , source : Source , stop_at_level : int = 1 ) - > Song :
2024-04-25 23:05:22 +00:00
musify_url = parse_url ( source . url )
r = self . connection . get ( source . url , name = " track_ " + musify_url . name_with_id )
2023-06-20 10:03:11 +00:00
if r is None :
2024-04-25 23:05:22 +00:00
return Song ( )
2023-06-20 10:03:11 +00:00
soup = self . get_soup_from_response ( r )
track_name : str = None
source_list : List [ Source ] = [ source ]
lyrics_list : List [ Lyrics ] = [ ]
artist_list : List [ Artist ] = [ ]
album_list : List [ Album ] = [ ]
2023-06-20 10:28:48 +00:00
def _parse_artist_anchor ( artist_soup : BeautifulSoup ) :
nonlocal artist_list
2023-06-20 10:52:00 +00:00
if artist_soup is None :
2023-06-20 10:28:48 +00:00
return
artist_src_list = [ ]
artist_name = None
href = artist_soup [ " href " ]
if href is not None :
artist_src_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href ) )
name_elem : BeautifulSoup = artist_soup . find ( " span " , { " itemprop " : " name " } )
if name_elem is not None :
artist_name = name_elem . text . strip ( )
artist_list . append ( Artist ( name = artist_name , source_list = artist_src_list ) )
def _parse_album_anchor ( album_soup : BeautifulSoup ) :
nonlocal album_list
if album_anchor is None :
return
album_source_list = [ ]
album_name = None
href = album_soup [ " href " ]
if href is not None :
album_source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href ) )
name_elem : BeautifulSoup = album_soup . find ( " span " , { " itemprop " : " name " } )
if name_elem is not None :
album_name = name_elem . text . strip ( )
album_list . append ( Album ( title = album_name , source_list = album_source_list ) )
# download url
anchor : BeautifulSoup
for anchor in soup . find_all ( " a " , { " itemprop " : " audio " } ) :
href = anchor [ " href " ]
if href is not None :
source . audio_url = self . HOST + href
2023-06-20 10:52:00 +00:00
# song detail
album_info : BeautifulSoup
for album_info in soup . find_all ( " ul " , { " class " : " album-info " } ) :
list_element : BeautifulSoup = album_info . find ( " li " )
if list_element is not None :
artist_soup : BeautifulSoup
for artist_soup in list_element . find_all ( " a " ) :
artist_source_list = [ ]
href = artist_soup [ " href " ]
if href is not None :
artist_source_list = [ Source ( self . SOURCE_TYPE , self . HOST + href ) ]
artist_list . append ( Artist (
name = artist_soup . text . strip ( ) ,
source_list = artist_source_list
) )
2023-06-20 10:28:48 +00:00
2023-06-20 10:03:11 +00:00
# breadcrums
breadcrumb_list_element_list : List [ BeautifulSoup ] = soup . find_all ( " ol " , { " class " : " breadcrumb " } )
for breadcrumb_list_element in breadcrumb_list_element_list :
list_points : List [ BeautifulSoup ] = breadcrumb_list_element . find_all ( " li " , " breadcrumb-item " )
if len ( list_points ) != 5 :
self . LOGGER . warning ( f " breadcrumbs of song doesn ' t have 5 items: { breadcrumb_list_element . prettify ( ) } " )
break
artist_anchor : BeautifulSoup = list_points [ 2 ] . find ( " a " )
2023-06-20 10:28:48 +00:00
_parse_artist_anchor ( artist_anchor )
2023-06-20 10:03:11 +00:00
album_anchor : BeautifulSoup = list_points [ 3 ] . find ( " a " )
2023-06-20 10:28:48 +00:00
_parse_album_anchor ( album_anchor )
2023-06-20 10:03:11 +00:00
track_name = list_points [ 4 ] . text . strip ( )
2024-04-11 18:29:05 +00:00
# artwork
artwork : Artwork = Artwork ( )
album_image_element_list : List [ BeautifulSoup ] = soup . find_all ( " img " , { " class " : " album-img " } )
for album_image_element in album_image_element_list :
artwork . append ( url = album_image_element . get ( " data-src " , album_image_element . get ( " src " ) ) )
2023-06-20 10:03:11 +00:00
# lyrics
lyrics_container : List [ BeautifulSoup ] = soup . find_all ( " div " , { " id " : " tabLyrics " } )
for lyrics in lyrics_container :
lyrics_text = lyrics . find ( " div " , { " style " : " white-space: pre-line " } ) . text . strip ( )
lyrics_list . append ( Lyrics ( text = FormattedText ( html = lyrics_text ) ) )
# youtube video
video_container_list : List [ BeautifulSoup ] = soup . find_all ( " div " , { " id " : " tabVideo " } )
for video_container in video_container_list :
iframe_list : List [ BeautifulSoup ] = video_container . findAll ( " iframe " )
for iframe in iframe_list :
source_list . append ( Source (
SourcePages . YOUTUBE ,
iframe [ " src " ] ,
referer_page = self . SOURCE_TYPE
) )
return Song (
2024-04-25 22:23:04 +00:00
title = clean_song_title ( track_name , artist_name = artist_list [ 0 ] . name if len ( artist_list ) > 0 else None ) ,
2023-06-20 10:03:11 +00:00
source_list = source_list ,
lyrics_list = lyrics_list ,
main_artist_list = artist_list ,
album_list = album_list ,
2024-04-11 18:29:05 +00:00
artwork = artwork ,
2023-06-20 10:03:11 +00:00
)
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
def _parse_song_card ( self , song_card : BeautifulSoup ) - > Song :
2023-03-17 11:31:56 +00:00
"""
2023-05-24 15:32:22 +00:00
< div id = " playerDiv3051 " class = " playlist__item " itemprop = " track " itemscope = " itemscope " itemtype = " http://schema.org/MusicRecording " data - artist = " Linkin Park " data - name = " Papercut " >
< div id = " play_3051 " class = " playlist__control play " data - url = " /track/play/3051/linkin-park-papercut.mp3 " data - position = " 1 " data - title = " Linkin Park - Papercut " title = " Слушать Linkin Park - Papercut " >
< span class = " ico-play " > < i class = " zmdi zmdi-play-circle-outline zmdi-hc-2-5x " > < / i > < / span >
< span class = " ico-pause " > < i class = " zmdi zmdi-pause-circle-outline zmdi-hc-2-5x " > < / i > < / span >
< / div >
< div class = " playlist__position " >
1
< / div >
< div class = " playlist__details " >
< div class = " playlist__heading " >
< a href = " /artist/linkin-park-5 " rel = " nofollow " > Linkin Park < / a > - < a class = " strong " href = " /track/linkin-park-papercut-3051 " > Papercut < / a >
< span itemprop = " byArtist " itemscope = " itemscope " itemtype = " http://schema.org/MusicGroup " >
< meta content = " /artist/linkin-park-5 " itemprop = " url " / >
< meta content = " Linkin Park " itemprop = " name " / >
< / span >
< / div >
< / div >
< div >
< div class = " track__details track__rating hidden-xs-down " >
< span class = " text-muted " >
< i class = " zmdi zmdi-star-circle zmdi-hc-1-3x " title = " Рейтинг " > < / i >
326 , 3 K
< / span >
< / div >
< / div >
< div class = " track__details hidden-xs-down " >
< span class = " text-muted " > 03 : 05 < / span >
< span class = " text-muted " > 320 К б / с < / span >
< / div >
< div class = " track__details hidden-xs-down " >
< span title = ' Есть видео Linkin Park - Papercut ' > < i class = ' zmdi zmdi-videocam zmdi-hc-1-3x ' > < / i > < / span >
< span title = ' Есть текст Linkin Park - Papercut ' > < i class = ' zmdi zmdi-file-text zmdi-hc-1-3x ' > < / i > < / span >
< / div >
< div class = " playlist__actions " >
< span class = " pl-btn save-to-pl " id = " add_3051 " title = " Сохранить в плейлист " > < i class = " zmdi zmdi-plus zmdi-hc-1-5x " > < / i > < / span >
< a target = " _blank " itemprop = " audio " download = " Linkin Park - Papercut.mp3 " href = " /track/dl/3051/linkin-park-papercut.mp3 " class = " no-ajaxy yaBrowser " id = " dl_3051 " title = ' Скачать Linkin Park - Papercut ' >
< span > < i class = " zmdi zmdi-download zmdi-hc-2-5x " > < / i > < / span >
< / a >
< / div >
2023-03-17 11:31:56 +00:00
< / div >
"""
2023-05-24 15:32:22 +00:00
song_name = song_card . get ( " data-name " )
artist_list : List [ Artist ] = [ ]
2023-03-17 11:31:56 +00:00
source_list : List [ Source ] = [ ]
2023-05-24 15:32:22 +00:00
tracksort = None
2023-03-17 12:58:58 +00:00
2023-05-24 15:32:22 +00:00
current_url = None
2023-03-20 14:33:45 +00:00
2023-05-24 15:32:22 +00:00
def parse_title ( _title : str ) - > str :
return _title
2023-03-17 12:58:58 +00:00
2023-05-24 15:32:22 +00:00
"""
# get from parent div
_artist_name = song_card . get ( " data-artist " )
if _artist_name is not None :
artist_list . append ( Artist ( name = _artist_name ) )
"""
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
# get tracksort
tracksort_soup : BeautifulSoup = song_card . find ( " div " , { " class " : " playlist__position " } )
if tracksort_soup is not None :
raw_tracksort : str = tracksort_soup . get_text ( strip = True )
if raw_tracksort . isdigit ( ) :
tracksort = int ( raw_tracksort )
2023-03-17 12:58:58 +00:00
2023-05-24 15:32:22 +00:00
# playlist details
playlist_details : BeautifulSoup = song_card . find ( " div " , { " class " : " playlist__details " } )
if playlist_details is not None :
"""
< div class = " playlist__heading " >
< a href = " /artist/tamas-141317 " rel = " nofollow " > Tamas < / a > ft . < a href = " /artist/zombiez-630767 " rel = " nofollow " > Zombiez < / a > - < a class = " strong " href = " /track/tamas-zombiez-voodoo-feat-zombiez-16185276 " > Voodoo ( Feat . Zombiez ) < / a >
< span itemprop = " byArtist " itemscope = " itemscope " itemtype = " http://schema.org/MusicGroup " >
< meta content = " /artist/tamas-141317 " itemprop = " url " / >
< meta content = " Tamas " itemprop = " name " / >
< / span >
< span itemprop = " byArtist " itemscope = " itemscope " itemtype = " http://schema.org/MusicGroup " >
< meta content = " /artist/zombiez-630767 " itemprop = " url " / >
< meta content = " Zombiez " itemprop = " name " / >
< / span >
< / div >
"""
# track
anchor_list : List [ BeautifulSoup ] = playlist_details . find_all ( " a " )
if len ( anchor_list ) > 1 :
track_anchor : BeautifulSoup = anchor_list [ - 1 ]
href : str = track_anchor . get ( " href " )
if href is not None :
current_url = self . HOST + href
source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href ) )
song_name = parse_title ( track_anchor . get_text ( strip = True ) )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
# artist
artist_span : BeautifulSoup
for artist_span in playlist_details . find_all ( " span " , { " itemprop " : " byArtist " } ) :
_artist_src = None
_artist_name = None
meta_artist_src = artist_span . find ( " meta " , { " itemprop " : " url " } )
if meta_artist_src is not None :
meta_artist_url = meta_artist_src . get ( " content " )
if meta_artist_url is not None :
_artist_src = [ Source ( self . SOURCE_TYPE , self . HOST + meta_artist_url ) ]
2023-03-17 12:58:58 +00:00
2023-05-24 15:32:22 +00:00
meta_artist_name = artist_span . find ( " meta " , { " itemprop " : " name " } )
if meta_artist_name is not None :
meta_artist_name_text = meta_artist_name . get ( " content " )
_artist_name = meta_artist_name_text
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
if _artist_name is not None or _artist_src is not None :
artist_list . append ( Artist ( name = _artist_name , source_list = _artist_src ) )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
# playlist actions
playlist_actions : BeautifulSoup = song_card . find ( " div " , { " class " : " playlist__actions " } )
if playlist_actions is not None :
"""
< div class = " playlist__actions " >
< span class = " pl-btn save-to-pl " id = " add_3051 " title = " Сохранить в плейлист " > < i class = " zmdi zmdi-plus zmdi-hc-1-5x " > < / i > < / span >
< a target = " _blank " itemprop = " audio " download = " Linkin Park - Papercut.mp3 " href = " /track/dl/3051/linkin-park-papercut.mp3 " class = " no-ajaxy yaBrowser " id = " dl_3051 " title = ' Скачать Linkin Park - Papercut ' >
< span > < i class = " zmdi zmdi-download zmdi-hc-2-5x " > < / i > < / span >
< / a >
< / div >
"""
# getting the actual download link:
download_anchor = playlist_actions . find ( " a " , { " itemprop " : " audio " } )
if download_anchor is not None :
download_href = download_anchor . get ( " href " )
if download_href is not None and current_url is not None :
source_list . append ( Source (
self . SOURCE_TYPE ,
url = current_url ,
2024-01-15 10:40:48 +00:00
audio_url = self . HOST + download_href
2023-05-24 15:32:22 +00:00
) )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
return Song (
2024-04-25 22:23:04 +00:00
title = clean_song_title ( song_name , artist_name = artist_list [ 0 ] . name if len ( artist_list ) > 0 else None ) ,
2023-05-24 15:32:22 +00:00
tracksort = tracksort ,
main_artist_list = artist_list ,
source_list = source_list
)
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
def fetch_album ( self , source : Source , stop_at_level : int = 1 ) - > Album :
"""
fetches album from source :
eg . ' https://musify.club/release/linkin-park-hybrid-theory-2000-188 '
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
/ html / musify / album_overview . html
- [ x ] tracklist
- [ x ] attributes
- [ ] ratings
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
: param stop_at_level :
: param source :
: return :
2023-03-17 11:31:56 +00:00
"""
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
url = parse_url ( source . url )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
endpoint = self . HOST + " /release/ " + url . name_with_id
2024-04-25 23:05:22 +00:00
r = self . connection . get ( endpoint , name = url . name_with_id )
2023-03-17 17:16:06 +00:00
if r is None :
2023-05-24 15:32:22 +00:00
return Album ( )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
soup = BeautifulSoup ( r . content , " html.parser " )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
album = self . _parse_album ( soup )
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
# <div class="card"><div class="card-body">...</div></div>
cards_soup : BeautifulSoup = soup . find ( " div " , { " class " : " card-body " } )
if cards_soup is not None :
card_soup : BeautifulSoup
for card_soup in cards_soup . find_all ( " div " , { " class " : " playlist__item " } ) :
new_song = self . _parse_song_card ( card_soup )
album . song_collection . append ( new_song )
if stop_at_level > 1 :
song : Song
for song in album . song_collection :
2023-05-25 09:21:39 +00:00
sources = song . source_collection . get_sources_from_page ( self . SOURCE_TYPE )
2023-05-24 15:32:22 +00:00
for source in sources :
song . merge ( self . fetch_song ( source = source ) )
album . update_tracksort ( )
return album
def _get_artist_attributes ( self , url : MusifyUrl ) - > Artist :
2023-03-17 17:16:06 +00:00
"""
fetches the main Artist attributes from this endpoint
https : / / musify . club / artist / ghost - bath - 280348 ? _pjax = #bodyContent
it needs to parse html
: param url :
: return :
"""
2023-03-17 22:27:14 +00:00
2024-04-25 23:05:22 +00:00
r = self . connection . get ( f " https://musify.club/ { url . source_type . value } / { url . name_with_id } ?_pjax=#bodyContent " , name = " artist_attributes_ " + url . name_with_id )
2023-03-17 22:27:14 +00:00
if r is None :
2023-05-24 15:32:22 +00:00
return Artist ( )
2023-03-17 22:27:14 +00:00
2023-05-24 15:32:22 +00:00
soup = self . get_soup_from_response ( r )
2023-03-17 22:27:14 +00:00
"""
< ol class = " breadcrumb " itemscope = " " itemtype = " http://schema.org/BreadcrumbList " >
< li class = " breadcrumb-item " itemprop = " itemListElement " itemscope = " " itemtype = " http://schema.org/ListItem " > < a href = " / " itemprop = " item " > < span itemprop = " name " > Главная < / span > < meta content = " 1 " itemprop = " position " / > < / a > < / li >
< li class = " breadcrumb-item " itemprop = " itemListElement " itemscope = " " itemtype = " http://schema.org/ListItem " > < a href = " /artist " itemprop = " item " > < span itemprop = " name " > Исполнители < / span > < meta content = " 2 " itemprop = " position " / > < / a > < / li >
< li class = " breadcrumb-item active " > Ghost Bath < / li >
< / ol >
< ul class = " nav nav-tabs nav-fill " >
< li class = " nav-item " > < a class = " active nav-link " href = " /artist/ghost-bath-280348 " > песни ( 41 ) < / a > < / li >
< li class = " nav-item " > < a class = " nav-link " href = " /artist/ghost-bath-280348/releases " > альбомы ( 12 ) < / a > < / li >
< li class = " nav-item " > < a class = " nav-link " href = " /artist/ghost-bath-280348/clips " > видеоклипы ( 23 ) < / a > < / li >
< li class = " nav-item " > < a class = " nav-link " href = " /artist/ghost-bath-280348/photos " > фото ( 38 ) < / a > < / li >
< / ul >
< header class = " content__title " >
< h1 > Ghost Bath < / h1 >
< div class = " actions " >
. . .
< / div >
< / header >
< ul class = " icon-list " >
< li >
< i class = " zmdi zmdi-globe zmdi-hc-fw " title = " Страна " > < / i >
< i class = " flag-icon US shadow " > < / i >
Соединенные Штаты
< / li >
< / ul >
"""
2023-03-18 12:01:27 +00:00
name = None
2023-03-17 22:27:14 +00:00
source_list : List [ Source ] = [ ]
2023-03-17 22:55:38 +00:00
country = None
2023-03-20 13:40:32 +00:00
notes : FormattedText = FormattedText ( )
2023-03-17 22:27:14 +00:00
breadcrumbs : BeautifulSoup = soup . find ( " ol " , { " class " : " breadcrumb " } )
if breadcrumbs is not None :
2023-03-20 13:40:32 +00:00
breadcrumb_list : List [ BeautifulSoup ] = breadcrumbs . find_all ( " li " , { " class " : " breadcrumb-item " } , recursive = False )
2023-03-17 22:27:14 +00:00
if len ( breadcrumb_list ) == 3 :
name = breadcrumb_list [ - 1 ] . get_text ( strip = True )
else :
2023-05-24 15:32:22 +00:00
self . LOGGER . debug ( " breadcrumb layout on artist page changed " )
2023-03-17 22:27:14 +00:00
nav_tabs : BeautifulSoup = soup . find ( " ul " , { " class " : " nav-tabs " } )
if nav_tabs is not None :
list_item : BeautifulSoup
for list_item in nav_tabs . find_all ( " li " , { " class " : " nav-item " } , recursive = False ) :
if not list_item . get_text ( strip = True ) . startswith ( " песни " ) :
# "песни" translates to "songs"
continue
anchor : BeautifulSoup = list_item . find ( " a " )
if anchor is None :
continue
href = anchor . get ( " href " )
if href is None :
continue
source_list . append ( Source (
2023-05-24 15:32:22 +00:00
self . SOURCE_TYPE ,
self . HOST + href
2023-03-17 22:27:14 +00:00
) )
content_title : BeautifulSoup = soup . find ( " header " , { " class " : " content__title " } )
if content_title is not None :
2023-03-20 13:40:32 +00:00
h1_name : BeautifulSoup = content_title . find ( " h1 " , recursive = False )
2023-03-17 22:27:14 +00:00
if h1_name is not None :
name = h1_name . get_text ( strip = True )
2023-03-18 12:01:27 +00:00
# country and sources
2023-03-17 22:27:14 +00:00
icon_list : BeautifulSoup = soup . find ( " ul " , { " class " : " icon-list " } )
if icon_list is not None :
country_italic : BeautifulSoup = icon_list . find ( " i " , { " class " , " flag-icon " } )
if country_italic is not None :
style_classes : set = { ' flag-icon ' , ' shadow ' }
classes : set = set ( country_italic . get ( " class " ) )
country_set : set = classes . difference ( style_classes )
if len ( country_set ) != 1 :
2023-05-24 15:32:22 +00:00
self . LOGGER . debug ( " the country set contains multiple values " )
2023-03-17 22:27:14 +00:00
if len ( country_set ) != 0 :
"""
This is the css file , where all flags that can be used on musify
are laid out and styled .
Every flag has two upper case letters , thus I assume they follow the alpha_2
https : / / musify . club / content / flags . min . css
"""
country = pycountry . countries . get ( alpha_2 = list ( country_set ) [ 0 ] )
2023-03-18 12:01:27 +00:00
# get all additional sources
additional_source : BeautifulSoup
for additional_source in icon_list . find_all ( " a " , { " class " , " link " } ) :
href = additional_source . get ( " href " )
if href is None :
continue
2023-05-24 15:32:22 +00:00
new_src = Source . match_url ( href , referer_page = self . SOURCE_TYPE )
2023-03-18 12:01:27 +00:00
if new_src is None :
continue
source_list . append ( new_src )
2023-03-17 22:55:38 +00:00
note_soup : BeautifulSoup = soup . find ( id = " text-main " )
if note_soup is not None :
2023-03-20 13:40:32 +00:00
notes . html = note_soup . decode_contents ( )
2023-03-17 22:55:38 +00:00
2023-03-22 11:58:11 +00:00
return Artist (
2023-03-17 22:27:14 +00:00
name = name ,
2023-03-17 22:55:38 +00:00
country = country ,
source_list = source_list ,
notes = notes
2023-03-22 11:58:11 +00:00
)
2023-03-17 17:16:06 +00:00
2023-05-24 15:32:22 +00:00
def _parse_album_card ( self , album_card : BeautifulSoup , artist_name : str = None ) - > Album :
"""
< div class = " card release-thumbnail " data - type = " 2 " >
< a href = " /release/ghost-bath-self-loather-2021-1554266 " >
< img alt = " Self Loather " class = " card-img-top lozad " data - src = " https://40s-a.musify.club/img/70/24826582/62624396.jpg " / >
< noscript > < img alt = " Self Loather " src = " https://40s-a.musify.club/img/70/24826582/62624396.jpg " / > < / noscript >
< / a >
< div class = " card-body " >
< h4 class = " card-subtitle " >
< a href = " /release/ghost-bath-self-loather-2021-1554266 " > Self Loather < / a >
< / h4 >
< / div >
< div class = " card-footer " > < p class = " card-text " > < a href = " /albums/2021 " > 2021 < / a > < / p > < / div >
< div class = " card-footer " >
< p class = " card-text genre__labels " >
< a href = " /genre/depressive-black-132 " > Depressive Black < / a > < a href = " /genre/post-black-metal-295 " > Post - Black Metal < / a > < / p >
< / div >
< div class = " card-footer " >
< small > < i class = " zmdi zmdi-calendar " title = " Добавлено " > < / i > 13.11 .2021 < / small >
< small > < i class = " zmdi zmdi-star zmdi-hc-fw " title = " Рейтинг " > < / i > 5 , 88 < / small >
< / div >
< / div >
2023-03-17 11:31:56 +00:00
"""
2023-05-24 15:32:22 +00:00
_id : Optional [ str ] = None
name : str = None
source_list : List [ Source ] = [ ]
timestamp : Optional [ ID3Timestamp ] = None
album_status = None
2023-03-17 11:31:56 +00:00
2023-05-24 15:32:22 +00:00
def set_name ( new_name : str ) :
nonlocal name
nonlocal artist_name
# example of just setting not working:
# https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067
if new_name . count ( " - " ) != 1 :
name = new_name
return
potential_artist_list , potential_name = new_name . split ( " - " )
unified_artist_list = string_processing . unify ( potential_artist_list )
if artist_name is not None :
if string_processing . unify ( artist_name ) not in unified_artist_list :
name = new_name
return
name = potential_name
return
name = new_name
2023-03-17 11:31:56 +00:00
2023-05-24 15:32:22 +00:00
album_status_id = album_card . get ( " data-type " )
if album_status_id . isdigit ( ) :
album_status_id = int ( album_status_id )
album_type = ALBUM_TYPE_MAP [ album_status_id ]
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
if album_status_id == 5 :
album_status = AlbumStatus . BOOTLEG
2023-03-17 17:16:06 +00:00
2023-05-24 15:32:22 +00:00
def parse_release_anchor ( _anchor : BeautifulSoup , text_is_name = False ) :
nonlocal _id
nonlocal name
nonlocal source_list
2023-03-17 12:11:18 +00:00
2023-05-24 15:32:22 +00:00
if _anchor is None :
return
2023-03-20 21:27:05 +00:00
2023-05-24 15:32:22 +00:00
href = _anchor . get ( " href " )
if href is not None :
# add url to sources
source_list . append ( Source (
self . SOURCE_TYPE ,
self . HOST + href
) )
2023-03-20 21:27:05 +00:00
2023-05-24 15:32:22 +00:00
# split id from url
split_href = href . split ( " - " )
if len ( split_href ) > 1 :
_id = split_href [ - 1 ]
2023-04-18 13:24:39 +00:00
2023-05-24 15:32:22 +00:00
if not text_is_name :
return
2023-03-20 22:11:55 +00:00
2023-05-24 15:32:22 +00:00
set_name ( _anchor . text )
2023-03-20 21:27:05 +00:00
2023-05-24 15:32:22 +00:00
anchor_list = album_card . find_all ( " a " , recursive = False )
if len ( anchor_list ) > 0 :
anchor = anchor_list [ 0 ]
parse_release_anchor ( anchor )
2023-03-20 21:27:05 +00:00
2023-05-24 15:32:22 +00:00
thumbnail : BeautifulSoup = anchor . find ( " img " )
if thumbnail is not None :
alt = thumbnail . get ( " alt " )
if alt is not None :
set_name ( alt )
image_url = thumbnail . get ( " src " )
else :
self . LOGGER . debug ( " the card has no thumbnail or url " )
card_body = album_card . find ( " div " , { " class " : " card-body " } )
if card_body is not None :
parse_release_anchor ( card_body . find ( " a " ) , text_is_name = True )
def parse_small_date ( small_soup : BeautifulSoup ) :
2023-03-20 22:11:55 +00:00
"""
2023-05-24 15:32:22 +00:00
< small >
< i class = " zmdi zmdi-calendar " title = " Добавлено " > < / i >
13.11 .2021
< / small >
2023-03-20 22:11:55 +00:00
"""
2023-05-24 15:32:22 +00:00
nonlocal timestamp
2023-03-20 22:11:55 +00:00
2023-05-24 15:32:22 +00:00
italic_tagging_soup : BeautifulSoup = small_soup . find ( " i " )
if italic_tagging_soup is None :
return
if italic_tagging_soup . get ( " title " ) != " Добавлено " :
# "Добавлено" can be translated to "Added (at)"
return
2023-03-20 22:11:55 +00:00
2023-05-24 15:32:22 +00:00
raw_time = small_soup . text . strip ( )
timestamp = ID3Timestamp . strptime ( raw_time , " %d . % m. % Y " )
2023-03-20 22:11:55 +00:00
2023-05-24 15:32:22 +00:00
# parse small date
card_footer_list = album_card . find_all ( " div " , { " class " : " card-footer " } )
if len ( card_footer_list ) != 3 :
self . LOGGER . debug ( " there are not exactly 3 card footers in a card " )
2023-03-20 21:27:05 +00:00
2023-05-24 15:32:22 +00:00
if len ( card_footer_list ) > 0 :
for any_small_soup in card_footer_list [ - 1 ] . find_all ( " small " ) :
parse_small_date ( any_small_soup )
else :
self . LOGGER . debug ( " there is not even 1 footer in the album card " )
2023-04-18 13:13:18 +00:00
2023-05-24 15:32:22 +00:00
return Album (
title = name ,
source_list = source_list ,
date = timestamp ,
album_type = album_type ,
album_status = album_status
2023-03-20 21:27:05 +00:00
)
2023-05-24 15:32:22 +00:00
def _parse_album ( self , soup : BeautifulSoup ) - > Album :
2023-04-03 17:59:31 +00:00
name : str = None
source_list : List [ Source ] = [ ]
artist_list : List [ Artist ] = [ ]
2023-04-19 18:05:06 +00:00
date : ID3Timestamp = None
2023-04-03 17:59:31 +00:00
"""
if breadcrumb list has 4 elements , then
the - 2 is the artist link ,
the - 1 is the album
"""
2023-04-19 17:40:00 +00:00
# breadcrumb
2023-04-03 17:59:31 +00:00
breadcrumb_soup : BeautifulSoup = soup . find ( " ol " , { " class " , " breadcrumb " } )
breadcrumb_elements : List [ BeautifulSoup ] = breadcrumb_soup . find_all ( " li " , { " class " : " breadcrumb-item " } )
if len ( breadcrumb_elements ) == 4 :
# album
album_crumb : BeautifulSoup = breadcrumb_elements [ - 1 ]
name = album_crumb . text . strip ( )
# artist
artist_crumb : BeautifulSoup = breadcrumb_elements [ - 2 ]
anchor : BeautifulSoup = artist_crumb . find ( " a " )
if anchor is not None :
href = anchor . get ( " href " )
artist_source_list : List [ Source ] = [ ]
if href is not None :
2023-05-24 15:32:22 +00:00
artist_source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + href . strip ( ) ) )
2023-04-03 17:59:31 +00:00
span : BeautifulSoup = anchor . find ( " span " )
if span is not None :
artist_list . append ( Artist (
name = span . get_text ( strip = True ) ,
source_list = artist_source_list
) )
else :
2023-05-24 15:32:22 +00:00
self . LOGGER . debug ( " there are not 4 breadcrumb items, which shouldn ' t be the case " )
2023-04-03 17:59:31 +00:00
2023-04-19 17:40:00 +00:00
# meta
2023-04-03 17:59:31 +00:00
meta_url : BeautifulSoup = soup . find ( " meta " , { " itemprop " : " url " } )
if meta_url is not None :
url = meta_url . get ( " content " )
if url is not None :
2023-05-24 15:32:22 +00:00
source_list . append ( Source ( self . SOURCE_TYPE , self . HOST + url ) )
2023-04-03 17:59:31 +00:00
meta_name : BeautifulSoup = soup . find ( " meta " , { " itemprop " : " name " } )
if meta_name is not None :
_name = meta_name . get ( " content " )
if _name is not None :
name = _name
2023-04-19 17:40:00 +00:00
2023-04-19 18:05:06 +00:00
# album info
album_info_ul : BeautifulSoup = soup . find ( " ul " , { " class " : " album-info " } )
2023-04-19 17:40:00 +00:00
if album_info_ul is not None :
artist_anchor : BeautifulSoup
for artist_anchor in album_info_ul . find_all ( " a " , { " itemprop " : " byArtist " } ) :
# line 98
2023-04-19 18:05:06 +00:00
artist_source_list : List [ Source ] = [ ]
2023-04-19 17:40:00 +00:00
artist_url_meta = artist_anchor . find ( " meta " , { " itemprop " : " url " } )
2023-04-19 18:05:06 +00:00
if artist_url_meta is not None :
artist_href = artist_url_meta . get ( " content " )
if artist_href is not None :
2023-05-24 15:32:22 +00:00
artist_source_list . append ( Source ( self . SOURCE_TYPE , url = self . HOST + artist_href ) )
2023-04-19 18:05:06 +00:00
artist_meta_name = artist_anchor . find ( " meta " , { " itemprop " : " name " } )
if artist_meta_name is not None :
artist_name = artist_meta_name . get ( " content " )
if artist_name is not None :
artist_list . append ( Artist (
name = artist_name ,
source_list = artist_source_list
) )
time_soup : BeautifulSoup = album_info_ul . find ( " time " , { " itemprop " : " datePublished " } )
if time_soup is not None :
raw_datetime = time_soup . get ( " datetime " )
if raw_datetime is not None :
2023-04-19 18:07:59 +00:00
try :
date = ID3Timestamp . strptime ( raw_datetime , " % Y- % m- %d " )
except ValueError :
2023-05-24 15:32:22 +00:00
self . LOGGER . debug ( f " Raw datetime doesn ' t match time format %Y-%m-%d: { raw_datetime } " )
2023-04-03 17:59:31 +00:00
return Album (
title = name ,
source_list = source_list ,
2023-04-19 18:05:06 +00:00
artist_list = artist_list ,
date = date
2023-04-03 17:59:31 +00:00
)
2024-04-17 12:15:56 +00:00
def _get_discography ( self , url : MusifyUrl , artist_name : str = None , stop_at_level : int = 1 ) - > Generator [ Album , None , None ] :
2023-05-24 15:32:22 +00:00
"""
POST https : / / musify . club / artist / filteralbums
ArtistID : 280348
SortOrder . Property : dateCreated
SortOrder . IsAscending : false
X - Requested - With : XMLHttpRequest
2023-03-20 20:50:19 +00:00
"""
2023-03-20 16:03:14 +00:00
2023-05-24 15:32:22 +00:00
endpoint = self . HOST + " / " + url . source_type . value + " /filteralbums "
2023-03-20 16:03:14 +00:00
2023-05-24 15:32:22 +00:00
r = self . connection . post ( url = endpoint , json = {
" ArtistID " : str ( url . musify_id ) ,
" SortOrder.Property " : " dateCreated " ,
" SortOrder.IsAscending " : False ,
" X-Requested-With " : " XMLHttpRequest "
2024-04-25 23:05:22 +00:00
} , name = " discography_ " + url . name_with_id )
2023-05-24 15:32:22 +00:00
if r is None :
return [ ]
soup : BeautifulSoup = BeautifulSoup ( r . content , features = " html.parser " )
for card_soup in soup . find_all ( " div " , { " class " : " card " } ) :
2024-04-17 12:15:56 +00:00
yield self . _parse_album_card ( card_soup , artist_name )
2023-05-24 15:32:22 +00:00
def fetch_artist ( self , source : Source , stop_at_level : int = 1 ) - > Artist :
2023-03-20 16:03:14 +00:00
"""
2023-05-24 15:32:22 +00:00
fetches artist from source
2023-03-20 21:27:05 +00:00
2023-05-24 15:32:22 +00:00
[ x ] discography
[ x ] attributes
[ ] picture gallery
2023-03-20 20:50:19 +00:00
2023-05-24 15:32:22 +00:00
Args :
source ( Source ) : the source to fetch
stop_at_level : int = 1 : if it is false , every album from discograohy will be fetched . Defaults to False .
2023-03-20 20:50:19 +00:00
2023-05-24 15:32:22 +00:00
Returns :
Artist : the artist fetched
"""
2023-03-20 20:50:19 +00:00
2023-05-24 15:32:22 +00:00
url = parse_url ( source . url )
2023-04-03 17:59:31 +00:00
2023-05-24 15:32:22 +00:00
artist = self . _get_artist_attributes ( url )
2024-04-17 12:15:56 +00:00
artist . main_album_collection . extend ( self . _get_discography ( url , artist . name ) )
2023-04-03 08:38:12 +00:00
2023-05-24 15:32:22 +00:00
return artist
def fetch_label ( self , source : Source , stop_at_level : int = 1 ) - > Label :
return Label ( )
2023-03-30 14:10:48 +00:00
2023-06-12 17:46:46 +00:00
def download_song_to_target ( self , source : Source , target : Target , desc : str = None ) - > DownloadResult :
2023-03-30 14:10:48 +00:00
"""
https : / / musify . club / track / im - in - a - coffin - life - never - was - waste - of - skin - 16360302
2023-03-30 14:50:27 +00:00
https : / / musify . club / track / dl / 16360302 / im - in - a - coffin - life - never - was - waste - of - skin . mp3
2023-03-30 14:10:48 +00:00
"""
2023-04-18 13:24:39 +00:00
endpoint = source . audio_url
2023-04-03 17:59:31 +00:00
2023-04-18 13:24:39 +00:00
if source . audio_url is None :
2023-05-25 09:21:39 +00:00
url : MusifyUrl = parse_url ( source . url )
2023-04-18 13:24:39 +00:00
if url . source_type != MusifyTypes . SONG :
return DownloadResult ( error_message = f " The url is not of the type Song: { source . url } " )
endpoint = f " https://musify.club/track/dl/ { url . musify_id } / { url . name_without_id } .mp3 "
2023-05-25 09:21:39 +00:00
self . LOGGER . warning ( f " The source has no audio link. Falling back to { endpoint } . " )
2023-04-04 15:59:08 +00:00
2024-04-26 12:24:14 +00:00
return self . stream_connection . stream_into ( endpoint , target , raw_url = True , exclude_headers = [ " Host " ] , name = desc )