2023-03-17 12:58:58 +00:00
from collections import defaultdict
2023-04-04 18:58:22 +00:00
from dataclasses import dataclass
from enum import Enum
2023-03-30 10:00:39 +00:00
from typing import List , Optional , Type , Union
2023-04-04 18:58:22 +00:00
from urllib . parse import urlparse
import pycountry
2023-03-13 14:47:38 +00:00
import requests
from bs4 import BeautifulSoup
2023-04-20 20:30:45 +00:00
from . . connection import Connection
2023-03-13 14:47:38 +00:00
from . abstract import Page
2023-04-18 10:00:25 +00:00
from . . utils . enums . source import SourcePages
2023-04-18 10:14:34 +00:00
from . . utils . enums . album import AlbumType , AlbumStatus
2023-03-13 14:47:38 +00:00
from . . objects import (
Artist ,
Source ,
Song ,
Album ,
ID3Timestamp ,
FormattedText ,
Label ,
2023-03-17 12:58:58 +00:00
Options ,
2023-03-30 14:10:48 +00:00
Target
2023-03-13 14:47:38 +00:00
)
2023-04-20 20:30:45 +00:00
from . . utils . shared import MUSIFY_LOGGER
2023-04-04 18:58:22 +00:00
from . . utils import string_processing , shared
from . support_classes . download_result import DownloadResult
2023-03-13 14:47:38 +00:00
2023-03-16 21:52:47 +00:00
"""
https : / / musify . club / artist / ghost - bath - 280348 ? _pjax = #bodyContent
https : / / musify . club / artist / ghost - bath - 280348 / releases ? _pjax = #bodyContent
https : / / musify . club / artist / ghost - bath - 280348 / clips ? _pjax = #bodyContent
https : / / musify . club / artist / ghost - bath - 280348 / photos ? _pjax = #bodyContent
POST https : / / musify . club / artist / filtersongs
ID : 280348
NameForUrl : ghost - bath
Page : 1
IsAllowed : True
SortOrder . Property : dateCreated
SortOrder . IsAscending : false
X - Requested - With : XMLHttpRequest
POST https : / / musify . club / artist / filteralbums
ArtistID : 280348
SortOrder . Property : dateCreated
SortOrder . IsAscending : false
X - Requested - With : XMLHttpRequest
"""
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
class MusifyTypes ( Enum ) :
ARTIST = " artist "
2023-03-20 20:50:19 +00:00
RELEASE = " release "
2023-03-20 21:27:05 +00:00
SONG = " track "
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
@dataclass
class MusifyUrl :
source_type : MusifyTypes
name_without_id : str
name_with_id : str
musify_id : str
url : str
2023-03-13 14:47:38 +00:00
2023-03-15 19:55:28 +00:00
class Musify ( Page ) :
2023-03-13 14:47:38 +00:00
API_SESSION : requests . Session = requests . Session ( )
API_SESSION . headers = {
" User-Agent " : " Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0 " ,
" Connection " : " keep-alive " ,
" Referer " : " https://musify.club/ "
}
API_SESSION . proxies = shared . proxies
2023-04-04 08:05:37 +00:00
TIMEOUT = 7
2023-04-04 10:47:34 +00:00
POST_TIMEOUT = 15
2023-03-17 17:16:06 +00:00
TRIES = 5
HOST = " https://musify.club "
2023-03-13 14:47:38 +00:00
2023-04-20 20:30:45 +00:00
CONNECTION = Connection (
host = " https://musify.club/ " ,
logger = MUSIFY_LOGGER
)
2023-03-13 14:47:38 +00:00
SOURCE_TYPE = SourcePages . MUSIFY
2023-04-03 10:14:58 +00:00
2023-04-04 18:58:22 +00:00
LOGGER = shared . MUSIFY_LOGGER
2023-03-17 12:11:18 +00:00
2023-03-20 20:50:19 +00:00
@classmethod
def parse_url ( cls , url : str ) - > MusifyUrl :
parsed = urlparse ( url )
path = parsed . path . split ( " / " )
split_name = path [ 2 ] . split ( " - " )
url_id = split_name [ - 1 ]
name_for_url = " - " . join ( split_name [ : - 1 ] )
try :
type_enum = MusifyTypes ( path [ 1 ] )
except ValueError as e :
2023-04-04 18:58:22 +00:00
cls . LOGGER . warning ( f " { path [ 1 ] } is not yet implemented, add it to MusifyTypes " )
2023-03-20 20:50:19 +00:00
raise e
return MusifyUrl (
source_type = type_enum ,
name_without_id = name_for_url ,
name_with_id = path [ 2 ] ,
musify_id = url_id ,
url = url
)
2023-03-13 14:47:38 +00:00
@classmethod
def search_by_query ( cls , query : str ) - > Options :
query_obj = cls . Query ( query )
if query_obj . is_raw :
2023-03-14 13:48:03 +00:00
return cls . plaintext_search ( query_obj . query )
return cls . plaintext_search ( cls . get_plaintext_query ( query_obj ) )
2023-03-13 14:47:38 +00:00
@classmethod
2023-03-14 13:48:03 +00:00
def get_plaintext_query ( cls , query : Page . Query ) - > str :
if query . album is None :
return f " { query . artist or ' * ' } - { query . song or ' * ' } "
2023-03-16 13:36:49 +00:00
return f " { query . artist or ' * ' } - { query . album or ' * ' } - { query . song or ' * ' } "
2023-03-13 14:47:38 +00:00
2023-03-14 13:58:54 +00:00
@classmethod
2023-03-15 19:55:28 +00:00
def parse_artist_contact ( cls , contact : BeautifulSoup ) - > Artist :
source_list : List [ Source ] = [ ]
2023-03-18 12:01:27 +00:00
name = None
2023-03-15 19:55:28 +00:00
_id = None
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
# source
anchor = contact . find ( " a " )
if anchor is not None :
href = anchor . get ( " href " )
name = anchor . get ( " title " )
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
if " - " in href :
_id = href . split ( " - " ) [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
source_list . append ( Source ( cls . SOURCE_TYPE , cls . HOST + href ) )
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
# artist image
image_soup = contact . find ( " img " )
if image_soup is not None :
alt = image_soup . get ( " alt " )
if alt is not None :
name = alt
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
artist_thumbnail = image_soup . get ( " src " )
2023-03-17 12:11:18 +00:00
2023-03-22 11:58:11 +00:00
return Artist (
2023-03-15 19:55:28 +00:00
_id = _id ,
name = name ,
source_list = source_list
2023-03-22 11:58:11 +00:00
)
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
@classmethod
def parse_album_contact ( cls , contact : BeautifulSoup ) - > Album :
2023-03-16 13:36:49 +00:00
"""
< div class = " contacts__item " >
< a href = " /release/ghost-bath-ghost-bath-2013-602489 " title = " Ghost Bath - 2013 " >
< div class = " contacts__img release " >
< img alt = " Ghost Bath " class = " lozad " data - src = " https://37s.musify.club/img/69/9060265/24178833.jpg " / >
< noscript > < img alt = " Ghost Bath " src = " https://37s.musify.club/img/69/9060265/24178833.jpg " / > < / noscript >
< / div >
< div class = " contacts__info " >
< strong > Ghost Bath - 2013 < / strong >
< small > Ghost Bath < / small >
< small > Треков : 4 < / small > < ! - - tracks - - >
< small > < i class = " zmdi zmdi-star zmdi-hc-fw " > < / i > 9 , 04 < / small >
< / div >
< / a >
< / div >
"""
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
source_list : List [ Source ] = [ ]
2023-03-18 12:01:27 +00:00
title = None
2023-03-16 13:36:49 +00:00
_id = None
year = None
artist_list : List [ Artist ] = [ ]
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
def parse_title_date ( title_date : Optional [ str ] , delimiter : str = " - " ) :
2023-03-18 12:01:27 +00:00
nonlocal year
nonlocal title
2023-03-16 13:36:49 +00:00
if title_date is None :
return
2023-03-17 12:11:18 +00:00
title_date = title_date . strip ( )
2023-03-16 13:36:49 +00:00
split_attr = title_date . split ( delimiter )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
if len ( split_attr ) < 2 :
return
if not split_attr [ - 1 ] . isdigit ( ) :
return
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
year = int ( split_attr [ - 1 ] )
title = delimiter . join ( split_attr [ : - 1 ] )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
# source
anchor = contact . find ( " a " )
if anchor is not None :
href = anchor . get ( " href " )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
# get the title and year
parse_title_date ( anchor . get ( " title " ) )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
if " - " in href :
_id = href . split ( " - " ) [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
source_list . append ( Source ( cls . SOURCE_TYPE , cls . HOST + href ) )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
# cover art
image_soup = contact . find ( " img " )
if image_soup is not None :
alt = image_soup . get ( " alt " )
if alt is not None :
title = alt
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
cover_art = image_soup . get ( " src " )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
contact_info_soup = contact . find ( " div " , { " class " : " contacts__info " } )
if contact_info_soup is not None :
"""
< strong > Ghost Bath - 2013 < / strong >
< small > Ghost Bath < / small >
< small > Треков : 4 < / small > < ! - - tracks - - >
< small > < i class = " zmdi zmdi-star zmdi-hc-fw " > < / i > 9 , 04 < / small >
"""
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
title_soup = contact_info_soup . find ( " strong " )
if title_soup is None :
parse_title_date ( title_soup )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
small_list = contact_info_soup . find_all ( " small " )
if len ( small_list ) == 3 :
# artist
artist_soup : BeautifulSoup = small_list [ 0 ]
raw_artist_str = artist_soup . text
for artist_str in raw_artist_str . split ( " & \r \n " ) :
artist_str = artist_str . rstrip ( " & ... \r \n " )
artist_str = artist_str . strip ( )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
if artist_str . endswith ( " ] " ) and " [ " in artist_str :
artist_str = artist_str . rsplit ( " [ " , maxsplit = 1 ) [ 0 ]
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
artist_list . append ( Artist ( name = artist_str ) )
2023-03-17 12:11:18 +00:00
2023-03-16 13:36:49 +00:00
track_count_soup : BeautifulSoup = small_list [ 1 ]
rating_soup : BeautifulSoup = small_list [ 2 ]
else :
2023-04-04 18:58:22 +00:00
cls . LOGGER . warning ( " got an unequal ammount than 3 small elements " )
2023-03-17 12:11:18 +00:00
2023-03-27 14:20:16 +00:00
return Album (
2023-03-16 13:36:49 +00:00
_id = _id ,
title = title ,
source_list = source_list ,
date = ID3Timestamp ( year = year ) ,
artist_list = artist_list
2023-03-27 14:20:16 +00:00
)
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
@classmethod
def parse_contact_container ( cls , contact_container_soup : BeautifulSoup ) - > List [ Union [ Artist , Album ] ] :
contacts = [ ]
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
contact : BeautifulSoup
for contact in contact_container_soup . find_all ( " div " , { " class " : " contacts__item " } ) :
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
anchor_soup = contact . find ( " a " )
2023-03-16 13:36:49 +00:00
2023-03-15 19:55:28 +00:00
if anchor_soup is not None :
url = anchor_soup . get ( " href " )
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
if url is not None :
if " artist " in url :
contacts . append ( cls . parse_artist_contact ( contact ) )
elif " release " in url :
contacts . append ( cls . parse_album_contact ( contact ) )
return contacts
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
@classmethod
def parse_playlist_item ( cls , playlist_item_soup : BeautifulSoup ) - > Song :
_id = None
2023-03-18 12:01:27 +00:00
song_title = playlist_item_soup . get ( " data-name " )
2023-03-16 15:57:43 +00:00
artist_list : List [ Artist ] = [ ]
source_list : List [ Source ] = [ ]
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
# details
playlist_details : BeautifulSoup = playlist_item_soup . find ( " div " , { " class " , " playlist__heading " } )
if playlist_details is not None :
anchor_list = playlist_details . find_all ( " a " )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
if len ( anchor_list ) > = 2 :
# artists
2023-03-17 12:11:18 +00:00
artist_anchor : BeautifulSoup
2023-03-16 15:57:43 +00:00
for artist_anchor in anchor_list [ : - 1 ] :
_id = None
href = artist_anchor . get ( " href " )
artist_source : Source = Source ( cls . SOURCE_TYPE , cls . HOST + href )
if " - " in href :
_id = href . split ( " - " ) [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
artist_list . append ( Artist (
_id = _id ,
name = artist_anchor . get_text ( strip = True ) ,
source_list = [ artist_source ]
) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
# track
track_soup : BeautifulSoup = anchor_list [ - 1 ]
"""
TODO
this anchor text may have something like ( feat . some artist )
which is not acceptable
"""
href = track_soup . get ( " href " )
if href is not None :
if " - " in href :
raw_id : str = href . split ( " - " ) [ - 1 ]
if raw_id . isdigit ( ) :
_id = raw_id
source_list . append ( Source ( cls . SOURCE_TYPE , cls . HOST + href ) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
else :
2023-04-04 18:58:22 +00:00
cls . LOGGER . debug ( " there are not enough anchors (2) for artist and track " )
cls . LOGGER . debug ( str ( artist_list ) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
"""
artist_name = playlist_item_soup . get ( " data-artist " )
if artist_name is not None :
artist_list . append ( Artist ( name = artist_name ) )
"""
id_attribute = playlist_item_soup . get ( " id " )
if id_attribute is not None :
raw_id = id_attribute . replace ( " playerDiv " , " " )
if raw_id . isdigit ( ) :
_id = raw_id
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
return Song (
_id = _id ,
title = song_title ,
main_artist_list = artist_list ,
source_list = source_list
)
2023-03-17 12:11:18 +00:00
2023-03-15 19:55:28 +00:00
@classmethod
def parse_playlist_soup ( cls , playlist_soup : BeautifulSoup ) - > List [ Song ] :
2023-03-16 15:57:43 +00:00
song_list = [ ]
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
for playlist_item_soup in playlist_soup . find_all ( " div " , { " class " : " playlist__item " } ) :
song_list . append ( cls . parse_playlist_item ( playlist_item_soup ) )
2023-03-17 12:11:18 +00:00
2023-03-16 15:57:43 +00:00
return song_list
2023-03-13 14:47:38 +00:00
@classmethod
2023-03-16 13:36:49 +00:00
def plaintext_search ( cls , query : str ) - > Options :
2023-03-15 19:55:28 +00:00
search_results = [ ]
2023-03-17 12:11:18 +00:00
2023-04-20 20:30:45 +00:00
r = cls . CONNECTION . get ( f " https://musify.club/search?searchText= { query } " )
2023-03-17 17:16:06 +00:00
if r is None :
return Options ( )
search_soup : BeautifulSoup = BeautifulSoup ( r . content , features = " html.parser " )
2023-03-17 12:11:18 +00:00
2023-03-14 13:48:03 +00:00
# album and songs
# child of div class: contacts row
2023-03-15 19:55:28 +00:00
for contact_container_soup in search_soup . find_all ( " div " , { " class " : " contacts " } ) :
search_results . extend ( cls . parse_contact_container ( contact_container_soup ) )
2023-03-17 12:11:18 +00:00
2023-03-14 13:48:03 +00:00
# song
# div class: playlist__item
for playlist_soup in search_soup . find_all ( " div " , { " class " : " playlist " } ) :
2023-03-15 19:55:28 +00:00
search_results . extend ( cls . parse_playlist_soup ( playlist_soup ) )
2023-03-14 13:48:03 +00:00
2023-03-16 13:36:49 +00:00
return Options ( search_results )
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
@classmethod
2023-03-20 14:33:45 +00:00
def parse_album_card ( cls , album_card : BeautifulSoup , artist_name : str = None ) - > Album :
2023-03-17 11:31:56 +00:00
"""
< div class = " card release-thumbnail " data - type = " 2 " >
< a href = " /release/ghost-bath-self-loather-2021-1554266 " >
< img alt = " Self Loather " class = " card-img-top lozad " data - src = " https://40s-a.musify.club/img/70/24826582/62624396.jpg " / >
< noscript > < img alt = " Self Loather " src = " https://40s-a.musify.club/img/70/24826582/62624396.jpg " / > < / noscript >
< / a >
< div class = " card-body " >
< h4 class = " card-subtitle " >
< a href = " /release/ghost-bath-self-loather-2021-1554266 " > Self Loather < / a >
< / h4 >
< / div >
< div class = " card-footer " > < p class = " card-text " > < a href = " /albums/2021 " > 2021 < / a > < / p > < / div >
< div class = " card-footer " >
< p class = " card-text genre__labels " >
< a href = " /genre/depressive-black-132 " > Depressive Black < / a > < a href = " /genre/post-black-metal-295 " > Post - Black Metal < / a > < / p >
< / div >
< div class = " card-footer " >
< small > < i class = " zmdi zmdi-calendar " title = " Добавлено " > < / i > 13.11 .2021 < / small >
< small > < i class = " zmdi zmdi-star zmdi-hc-fw " title = " Рейтинг " > < / i > 5 , 88 < / small >
< / div >
< / div >
"""
2023-03-17 12:58:58 +00:00
album_type_map = defaultdict ( lambda : AlbumType . OTHER , {
1 : AlbumType . OTHER , # literally other xD
2 : AlbumType . STUDIO_ALBUM ,
3 : AlbumType . EP ,
4 : AlbumType . SINGLE ,
5 : AlbumType . OTHER , # BOOTLEG
6 : AlbumType . LIVE_ALBUM ,
7 : AlbumType . COMPILATION_ALBUM , # compilation of different artists
8 : AlbumType . MIXTAPE ,
9 : AlbumType . DEMO ,
10 : AlbumType . MIXTAPE , # DJ Mixes
11 : AlbumType . COMPILATION_ALBUM , # compilation of only this artist
2023-04-04 10:47:34 +00:00
12 : AlbumType . STUDIO_ALBUM , # split
2023-03-17 12:58:58 +00:00
13 : AlbumType . COMPILATION_ALBUM , # unofficial
14 : AlbumType . MIXTAPE # "Soundtracks"
} )
_id : Optional [ str ] = None
2023-03-18 12:01:27 +00:00
name : str = None
2023-03-17 11:31:56 +00:00
source_list : List [ Source ] = [ ]
2023-03-17 12:58:58 +00:00
timestamp : Optional [ ID3Timestamp ] = None
album_status = None
2023-03-20 14:33:45 +00:00
def set_name ( new_name : str ) :
nonlocal name
nonlocal artist_name
2023-04-18 13:13:18 +00:00
# example of just setting not working:
# https://musify.club/release/unjoy-eurythmie-psychonaut-4-tired-numb-still-alive-2012-324067
2023-03-20 14:33:45 +00:00
if new_name . count ( " - " ) != 1 :
name = new_name
return
potential_artist_list , potential_name = new_name . split ( " - " )
unified_artist_list = string_processing . unify ( potential_artist_list )
if artist_name is not None :
if string_processing . unify ( artist_name ) not in unified_artist_list :
name = new_name
return
name = potential_name
return
name = new_name
2023-03-17 12:58:58 +00:00
album_status_id = album_card . get ( " data-type " )
if album_status_id . isdigit ( ) :
album_status_id = int ( album_status_id )
album_type = album_type_map [ album_status_id ]
if album_status_id == 5 :
album_status = AlbumStatus . BOOTLEG
2023-03-17 12:11:18 +00:00
2023-03-17 12:58:58 +00:00
def parse_release_anchor ( _anchor : BeautifulSoup , text_is_name = False ) :
nonlocal _id
nonlocal name
nonlocal source_list
if _anchor is None :
2023-03-17 11:39:19 +00:00
return
2023-03-17 12:11:18 +00:00
2023-03-17 12:58:58 +00:00
href = _anchor . get ( " href " )
if href is not None :
# add url to sources
source_list . append ( Source (
cls . SOURCE_TYPE ,
cls . HOST + href
) )
# split id from url
split_href = href . split ( " - " )
if len ( split_href ) > 1 :
_id = split_href [ - 1 ]
2023-03-17 12:11:18 +00:00
2023-03-17 11:39:19 +00:00
if not text_is_name :
return
2023-03-17 12:11:18 +00:00
2023-03-20 14:33:45 +00:00
set_name ( _anchor . text )
2023-03-17 12:11:18 +00:00
2023-03-17 11:39:19 +00:00
anchor_list = album_card . find_all ( " a " , recursive = False )
if len ( anchor_list ) > 0 :
anchor = anchor_list [ 0 ]
parse_release_anchor ( anchor )
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
thumbnail : BeautifulSoup = anchor . find ( " img " )
if thumbnail is not None :
alt = thumbnail . get ( " alt " )
if alt is not None :
2023-03-20 14:33:45 +00:00
set_name ( alt )
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
image_url = thumbnail . get ( " src " )
else :
2023-04-04 18:58:22 +00:00
cls . LOGGER . debug ( " the card has no thumbnail or url " )
2023-03-17 12:11:18 +00:00
2023-03-17 11:39:19 +00:00
card_body = album_card . find ( " div " , { " class " : " card-body " } )
if card_body is not None :
parse_release_anchor ( card_body . find ( " a " ) , text_is_name = True )
2023-03-17 12:11:18 +00:00
2023-03-17 12:58:58 +00:00
def parse_small_date ( small_soup : BeautifulSoup ) :
"""
< small >
< i class = " zmdi zmdi-calendar " title = " Добавлено " > < / i >
13.11 .2021
< / small >
"""
nonlocal timestamp
italic_tagging_soup : BeautifulSoup = small_soup . find ( " i " )
if italic_tagging_soup is None :
return
if italic_tagging_soup . get ( " title " ) != " Добавлено " :
# "Добавлено" can be translated to "Added (at)"
return
raw_time = small_soup . text . strip ( )
timestamp = ID3Timestamp . strptime ( raw_time , " %d . % m. % Y " )
# parse small date
2023-03-17 11:39:19 +00:00
card_footer_list = album_card . find_all ( " div " , { " class " : " card-footer " } )
2023-03-17 12:58:58 +00:00
if len ( card_footer_list ) != 3 :
2023-04-04 18:58:22 +00:00
cls . LOGGER . debug ( " there are not exactly 3 card footers in a card " )
2023-03-17 12:58:58 +00:00
if len ( card_footer_list ) > 0 :
for any_small_soup in card_footer_list [ - 1 ] . find_all ( " small " ) :
parse_small_date ( any_small_soup )
else :
2023-04-04 18:58:22 +00:00
cls . LOGGER . debug ( " there is not even 1 footer in the album card " )
2023-03-17 12:11:18 +00:00
2023-03-29 10:02:07 +00:00
return Album (
2023-03-17 12:58:58 +00:00
_id = _id ,
2023-03-17 11:39:19 +00:00
title = name ,
2023-03-17 12:58:58 +00:00
source_list = source_list ,
date = timestamp ,
album_type = album_type ,
album_status = album_status
2023-03-29 10:02:07 +00:00
)
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
@classmethod
2023-03-24 14:58:21 +00:00
def get_discography ( cls , url : MusifyUrl , artist_name : str = None , stop_at_level : int = 1 ) - > List [ Album ] :
2023-03-17 11:31:56 +00:00
"""
POST https : / / musify . club / artist / filteralbums
ArtistID : 280348
SortOrder . Property : dateCreated
SortOrder . IsAscending : false
X - Requested - With : XMLHttpRequest
"""
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
endpoint = cls . HOST + " / " + url . source_type . value + " /filteralbums "
2023-03-17 12:11:18 +00:00
2023-03-17 17:16:06 +00:00
r = cls . post_request ( url = endpoint , json = {
2023-03-17 11:31:56 +00:00
" ArtistID " : str ( url . musify_id ) ,
" SortOrder.Property " : " dateCreated " ,
" SortOrder.IsAscending " : False ,
" X-Requested-With " : " XMLHttpRequest "
} )
2023-03-17 17:16:06 +00:00
if r is None :
return [ ]
2023-03-17 11:31:56 +00:00
soup : BeautifulSoup = BeautifulSoup ( r . content , features = " html.parser " )
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
discography : List [ Album ] = [ ]
for card_soup in soup . find_all ( " div " , { " class " : " card " } ) :
2023-03-20 14:33:45 +00:00
new_album : Album = cls . parse_album_card ( card_soup , artist_name )
2023-03-20 13:40:32 +00:00
album_source : Source
2023-04-03 14:23:30 +00:00
2023-03-24 14:58:21 +00:00
if stop_at_level > 1 :
2023-03-20 13:40:32 +00:00
for album_source in new_album . source_collection . get_sources_from_page ( cls . SOURCE_TYPE ) :
2023-03-27 14:28:34 +00:00
new_album . merge ( cls . _fetch_album_from_source ( album_source , stop_at_level = stop_at_level - 1 ) )
2023-04-03 14:23:30 +00:00
2023-03-20 13:40:32 +00:00
discography . append ( new_album )
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
return discography
2023-03-17 12:11:18 +00:00
2023-03-17 17:16:06 +00:00
@classmethod
def get_artist_attributes ( cls , url : MusifyUrl ) - > Artist :
"""
fetches the main Artist attributes from this endpoint
https : / / musify . club / artist / ghost - bath - 280348 ? _pjax = #bodyContent
it needs to parse html
: param url :
: return :
"""
2023-03-17 22:27:14 +00:00
2023-04-20 20:30:45 +00:00
r = cls . CONNECTION . get ( f " https://musify.club/ { url . source_type . value } / { url . name_with_id } ?_pjax=#bodyContent " )
2023-03-17 22:27:14 +00:00
if r is None :
2023-03-20 13:40:32 +00:00
return Artist ( _id = url . musify_id )
2023-03-17 22:27:14 +00:00
soup = BeautifulSoup ( r . content , " html.parser " )
"""
< ol class = " breadcrumb " itemscope = " " itemtype = " http://schema.org/BreadcrumbList " >
< li class = " breadcrumb-item " itemprop = " itemListElement " itemscope = " " itemtype = " http://schema.org/ListItem " > < a href = " / " itemprop = " item " > < span itemprop = " name " > Главная < / span > < meta content = " 1 " itemprop = " position " / > < / a > < / li >
< li class = " breadcrumb-item " itemprop = " itemListElement " itemscope = " " itemtype = " http://schema.org/ListItem " > < a href = " /artist " itemprop = " item " > < span itemprop = " name " > Исполнители < / span > < meta content = " 2 " itemprop = " position " / > < / a > < / li >
< li class = " breadcrumb-item active " > Ghost Bath < / li >
< / ol >
< ul class = " nav nav-tabs nav-fill " >
< li class = " nav-item " > < a class = " active nav-link " href = " /artist/ghost-bath-280348 " > песни ( 41 ) < / a > < / li >
< li class = " nav-item " > < a class = " nav-link " href = " /artist/ghost-bath-280348/releases " > альбомы ( 12 ) < / a > < / li >
< li class = " nav-item " > < a class = " nav-link " href = " /artist/ghost-bath-280348/clips " > видеоклипы ( 23 ) < / a > < / li >
< li class = " nav-item " > < a class = " nav-link " href = " /artist/ghost-bath-280348/photos " > фото ( 38 ) < / a > < / li >
< / ul >
< header class = " content__title " >
< h1 > Ghost Bath < / h1 >
< div class = " actions " >
. . .
< / div >
< / header >
< ul class = " icon-list " >
< li >
< i class = " zmdi zmdi-globe zmdi-hc-fw " title = " Страна " > < / i >
< i class = " flag-icon US shadow " > < / i >
Соединенные Штаты
< / li >
< / ul >
"""
2023-03-18 12:01:27 +00:00
name = None
2023-03-17 22:27:14 +00:00
source_list : List [ Source ] = [ ]
2023-03-17 22:55:38 +00:00
country = None
2023-03-20 13:40:32 +00:00
notes : FormattedText = FormattedText ( )
2023-03-17 22:27:14 +00:00
breadcrumbs : BeautifulSoup = soup . find ( " ol " , { " class " : " breadcrumb " } )
if breadcrumbs is not None :
2023-03-20 13:40:32 +00:00
breadcrumb_list : List [ BeautifulSoup ] = breadcrumbs . find_all ( " li " , { " class " : " breadcrumb-item " } , recursive = False )
2023-03-17 22:27:14 +00:00
if len ( breadcrumb_list ) == 3 :
name = breadcrumb_list [ - 1 ] . get_text ( strip = True )
else :
2023-04-04 18:58:22 +00:00
cls . LOGGER . debug ( " breadcrumb layout on artist page changed " )
2023-03-17 22:27:14 +00:00
nav_tabs : BeautifulSoup = soup . find ( " ul " , { " class " : " nav-tabs " } )
if nav_tabs is not None :
list_item : BeautifulSoup
for list_item in nav_tabs . find_all ( " li " , { " class " : " nav-item " } , recursive = False ) :
if not list_item . get_text ( strip = True ) . startswith ( " песни " ) :
# "песни" translates to "songs"
continue
anchor : BeautifulSoup = list_item . find ( " a " )
if anchor is None :
continue
href = anchor . get ( " href " )
if href is None :
continue
source_list . append ( Source (
cls . SOURCE_TYPE ,
cls . HOST + href
) )
content_title : BeautifulSoup = soup . find ( " header " , { " class " : " content__title " } )
if content_title is not None :
2023-03-20 13:40:32 +00:00
h1_name : BeautifulSoup = content_title . find ( " h1 " , recursive = False )
2023-03-17 22:27:14 +00:00
if h1_name is not None :
name = h1_name . get_text ( strip = True )
2023-03-18 12:01:27 +00:00
# country and sources
2023-03-17 22:27:14 +00:00
icon_list : BeautifulSoup = soup . find ( " ul " , { " class " : " icon-list " } )
if icon_list is not None :
country_italic : BeautifulSoup = icon_list . find ( " i " , { " class " , " flag-icon " } )
if country_italic is not None :
style_classes : set = { ' flag-icon ' , ' shadow ' }
classes : set = set ( country_italic . get ( " class " ) )
country_set : set = classes . difference ( style_classes )
if len ( country_set ) != 1 :
2023-04-04 18:58:22 +00:00
cls . LOGGER . debug ( " the country set contains multiple values " )
2023-03-17 22:27:14 +00:00
if len ( country_set ) != 0 :
"""
This is the css file , where all flags that can be used on musify
are laid out and styled .
Every flag has two upper case letters , thus I assume they follow the alpha_2
https : / / musify . club / content / flags . min . css
"""
country = pycountry . countries . get ( alpha_2 = list ( country_set ) [ 0 ] )
2023-03-18 12:01:27 +00:00
# get all additional sources
additional_source : BeautifulSoup
for additional_source in icon_list . find_all ( " a " , { " class " , " link " } ) :
href = additional_source . get ( " href " )
if href is None :
continue
2023-04-18 11:35:00 +00:00
new_src = Source . match_url ( href , referer_page = cls . SOURCE_TYPE )
2023-03-18 12:01:27 +00:00
if new_src is None :
continue
source_list . append ( new_src )
2023-03-17 22:55:38 +00:00
note_soup : BeautifulSoup = soup . find ( id = " text-main " )
if note_soup is not None :
2023-03-20 13:40:32 +00:00
notes . html = note_soup . decode_contents ( )
2023-03-17 22:55:38 +00:00
2023-03-22 11:58:11 +00:00
return Artist (
2023-03-17 22:27:14 +00:00
_id = url . musify_id ,
name = name ,
2023-03-17 22:55:38 +00:00
country = country ,
source_list = source_list ,
notes = notes
2023-03-22 11:58:11 +00:00
)
2023-03-17 17:16:06 +00:00
2023-03-17 11:31:56 +00:00
@classmethod
2023-03-27 14:28:34 +00:00
def _fetch_artist_from_source ( cls , source : Source , stop_at_level : int = 1 ) - > Artist :
2023-03-17 11:31:56 +00:00
"""
fetches artist from source
2023-03-17 12:58:58 +00:00
[ x ] discography
2023-03-20 20:50:19 +00:00
[ x ] attributes
2023-03-17 22:27:14 +00:00
[ ] picture gallery
2023-03-17 11:31:56 +00:00
Args :
source ( Source ) : the source to fetch
2023-03-24 14:58:21 +00:00
stop_at_level : int = 1 : if it is false , every album from discograohy will be fetched . Defaults to False .
2023-03-17 11:31:56 +00:00
Returns :
Artist : the artist fetched
"""
2023-03-17 12:11:18 +00:00
2023-03-17 11:31:56 +00:00
url = cls . parse_url ( source . url )
2023-03-17 17:16:06 +00:00
artist = cls . get_artist_attributes ( url )
2023-03-17 12:11:18 +00:00
2023-03-20 14:33:45 +00:00
discography : List [ Album ] = cls . get_discography ( url , artist . name )
2023-03-17 17:16:06 +00:00
artist . main_album_collection . extend ( discography )
2023-04-03 14:23:30 +00:00
2023-03-17 17:16:06 +00:00
return artist
2023-03-20 21:27:05 +00:00
@classmethod
def parse_song_card ( cls , song_card : BeautifulSoup ) - > Song :
"""
< div id = " playerDiv3051 " class = " playlist__item " itemprop = " track " itemscope = " itemscope " itemtype = " http://schema.org/MusicRecording " data - artist = " Linkin Park " data - name = " Papercut " >
< div id = " play_3051 " class = " playlist__control play " data - url = " /track/play/3051/linkin-park-papercut.mp3 " data - position = " 1 " data - title = " Linkin Park - Papercut " title = " Слушать Linkin Park - Papercut " >
< span class = " ico-play " > < i class = " zmdi zmdi-play-circle-outline zmdi-hc-2-5x " > < / i > < / span >
< span class = " ico-pause " > < i class = " zmdi zmdi-pause-circle-outline zmdi-hc-2-5x " > < / i > < / span >
< / div >
< div class = " playlist__position " >
1
< / div >
< div class = " playlist__details " >
< div class = " playlist__heading " >
< a href = " /artist/linkin-park-5 " rel = " nofollow " > Linkin Park < / a > - < a class = " strong " href = " /track/linkin-park-papercut-3051 " > Papercut < / a >
< span itemprop = " byArtist " itemscope = " itemscope " itemtype = " http://schema.org/MusicGroup " >
< meta content = " /artist/linkin-park-5 " itemprop = " url " / >
< meta content = " Linkin Park " itemprop = " name " / >
< / span >
< / div >
< / div >
< div >
< div class = " track__details track__rating hidden-xs-down " >
< span class = " text-muted " >
< i class = " zmdi zmdi-star-circle zmdi-hc-1-3x " title = " Рейтинг " > < / i >
326 , 3 K
< / span >
< / div >
< / div >
< div class = " track__details hidden-xs-down " >
< span class = " text-muted " > 03 : 05 < / span >
< span class = " text-muted " > 320 К б / с < / span >
< / div >
< div class = " track__details hidden-xs-down " >
< span title = ' Есть видео Linkin Park - Papercut ' > < i class = ' zmdi zmdi-videocam zmdi-hc-1-3x ' > < / i > < / span >
< span title = ' Есть текст Linkin Park - Papercut ' > < i class = ' zmdi zmdi-file-text zmdi-hc-1-3x ' > < / i > < / span >
< / div >
< div class = " playlist__actions " >
< span class = " pl-btn save-to-pl " id = " add_3051 " title = " Сохранить в плейлист " > < i class = " zmdi zmdi-plus zmdi-hc-1-5x " > < / i > < / span >
< a target = " _blank " itemprop = " audio " download = " Linkin Park - Papercut.mp3 " href = " /track/dl/3051/linkin-park-papercut.mp3 " class = " no-ajaxy yaBrowser " id = " dl_3051 " title = ' Скачать Linkin Park - Papercut ' >
< span > < i class = " zmdi zmdi-download zmdi-hc-2-5x " > < / i > < / span >
< / a >
< / div >
< / div >
"""
song_name = song_card . get ( " data-name " )
artist_list : List [ Artist ] = [ ]
2023-03-20 22:11:55 +00:00
source_list : List [ Source ] = [ ]
2023-03-20 21:27:05 +00:00
tracksort = None
2023-04-18 13:24:39 +00:00
current_url = None
2023-03-20 22:11:55 +00:00
def parse_title ( _title : str ) - > str :
return _title
"""
2023-03-20 21:27:05 +00:00
# get from parent div
_artist_name = song_card . get ( " data-artist " )
if _artist_name is not None :
artist_list . append ( Artist ( name = _artist_name ) )
2023-03-20 22:11:55 +00:00
"""
2023-03-20 21:27:05 +00:00
# get tracksort
tracksort_soup : BeautifulSoup = song_card . find ( " div " , { " class " : " playlist__position " } )
if tracksort_soup is not None :
raw_tracksort : str = tracksort_soup . get_text ( strip = True )
if raw_tracksort . isdigit ( ) :
tracksort = int ( raw_tracksort )
# playlist details
2023-03-20 22:11:55 +00:00
playlist_details : BeautifulSoup = song_card . find ( " div " , { " class " : " playlist__details " } )
if playlist_details is not None :
"""
< div class = " playlist__heading " >
< a href = " /artist/tamas-141317 " rel = " nofollow " > Tamas < / a > ft . < a href = " /artist/zombiez-630767 " rel = " nofollow " > Zombiez < / a > - < a class = " strong " href = " /track/tamas-zombiez-voodoo-feat-zombiez-16185276 " > Voodoo ( Feat . Zombiez ) < / a >
< span itemprop = " byArtist " itemscope = " itemscope " itemtype = " http://schema.org/MusicGroup " >
< meta content = " /artist/tamas-141317 " itemprop = " url " / >
< meta content = " Tamas " itemprop = " name " / >
< / span >
< span itemprop = " byArtist " itemscope = " itemscope " itemtype = " http://schema.org/MusicGroup " >
< meta content = " /artist/zombiez-630767 " itemprop = " url " / >
< meta content = " Zombiez " itemprop = " name " / >
< / span >
< / div >
"""
# track
anchor_list : List [ BeautifulSoup ] = playlist_details . find_all ( " a " )
if len ( anchor_list ) > 1 :
track_anchor : BeautifulSoup = anchor_list [ - 1 ]
href : str = track_anchor . get ( " href " )
if href is not None :
2023-04-18 13:24:39 +00:00
current_url = cls . HOST + href
2023-03-20 22:11:55 +00:00
source_list . append ( Source ( cls . SOURCE_TYPE , cls . HOST + href ) )
song_name = parse_title ( track_anchor . get_text ( strip = True ) )
# artist
artist_span : BeautifulSoup
for artist_span in playlist_details . find_all ( " span " , { " itemprop " : " byArtist " } ) :
_artist_src = None
_artist_name = None
meta_artist_src = artist_span . find ( " meta " , { " itemprop " : " url " } )
if meta_artist_src is not None :
meta_artist_url = meta_artist_src . get ( " content " )
if meta_artist_url is not None :
_artist_src = [ Source ( cls . SOURCE_TYPE , cls . HOST + meta_artist_url ) ]
meta_artist_name = artist_span . find ( " meta " , { " itemprop " : " name " } )
if meta_artist_name is not None :
meta_artist_name_text = meta_artist_name . get ( " content " )
_artist_name = meta_artist_name_text
if _artist_name is not None or _artist_src is not None :
2023-03-22 11:58:11 +00:00
artist_list . append ( Artist ( name = _artist_name , source_list = _artist_src ) )
2023-03-20 21:27:05 +00:00
2023-04-18 13:13:18 +00:00
# playlist actions
playlist_actions : BeautifulSoup = song_card . find ( " div " , { " class " : " playlist__actions " } )
if playlist_actions is not None :
"""
< div class = " playlist__actions " >
< span class = " pl-btn save-to-pl " id = " add_3051 " title = " Сохранить в плейлист " > < i class = " zmdi zmdi-plus zmdi-hc-1-5x " > < / i > < / span >
< a target = " _blank " itemprop = " audio " download = " Linkin Park - Papercut.mp3 " href = " /track/dl/3051/linkin-park-papercut.mp3 " class = " no-ajaxy yaBrowser " id = " dl_3051 " title = ' Скачать Linkin Park - Papercut ' >
< span > < i class = " zmdi zmdi-download zmdi-hc-2-5x " > < / i > < / span >
< / a >
< / div >
"""
# getting the actual download link:
download_anchor = playlist_actions . find ( " a " , { " itemprop " : " audio " } )
if download_anchor is not None :
download_href = download_anchor . get ( " href " )
2023-04-18 13:24:39 +00:00
if download_href is not None and current_url is not None :
2023-04-18 13:13:18 +00:00
source_list . append ( Source (
cls . SOURCE_TYPE ,
2023-04-18 13:24:39 +00:00
url = current_url ,
2023-04-18 13:13:18 +00:00
adio_url = cls . HOST + download_href
) )
2023-03-20 21:27:05 +00:00
return Song (
title = song_name ,
tracksort = tracksort ,
2023-03-31 08:46:56 +00:00
main_artist_list = artist_list ,
source_list = source_list
2023-03-20 21:27:05 +00:00
)
2023-04-03 17:59:31 +00:00
@classmethod
def _parse_album ( cls , soup : BeautifulSoup ) - > Album :
name : str = None
source_list : List [ Source ] = [ ]
artist_list : List [ Artist ] = [ ]
2023-04-19 18:05:06 +00:00
date : ID3Timestamp = None
2023-04-03 17:59:31 +00:00
"""
if breadcrumb list has 4 elements , then
the - 2 is the artist link ,
the - 1 is the album
"""
2023-04-19 17:40:00 +00:00
# breadcrumb
2023-04-03 17:59:31 +00:00
breadcrumb_soup : BeautifulSoup = soup . find ( " ol " , { " class " , " breadcrumb " } )
breadcrumb_elements : List [ BeautifulSoup ] = breadcrumb_soup . find_all ( " li " , { " class " : " breadcrumb-item " } )
if len ( breadcrumb_elements ) == 4 :
# album
album_crumb : BeautifulSoup = breadcrumb_elements [ - 1 ]
name = album_crumb . text . strip ( )
# artist
artist_crumb : BeautifulSoup = breadcrumb_elements [ - 2 ]
anchor : BeautifulSoup = artist_crumb . find ( " a " )
if anchor is not None :
href = anchor . get ( " href " )
artist_source_list : List [ Source ] = [ ]
if href is not None :
artist_source_list . append ( Source ( cls . SOURCE_TYPE , cls . HOST + href . strip ( ) ) )
span : BeautifulSoup = anchor . find ( " span " )
if span is not None :
artist_list . append ( Artist (
name = span . get_text ( strip = True ) ,
source_list = artist_source_list
) )
else :
cls . LOGGER . debug ( " there are not 4 breadcrumb items, which shouldn ' t be the case " )
2023-04-19 17:40:00 +00:00
# meta
2023-04-03 17:59:31 +00:00
meta_url : BeautifulSoup = soup . find ( " meta " , { " itemprop " : " url " } )
if meta_url is not None :
url = meta_url . get ( " content " )
if url is not None :
source_list . append ( Source ( cls . SOURCE_TYPE , cls . HOST + url ) )
meta_name : BeautifulSoup = soup . find ( " meta " , { " itemprop " : " name " } )
if meta_name is not None :
_name = meta_name . get ( " content " )
if _name is not None :
name = _name
2023-04-19 17:40:00 +00:00
2023-04-19 18:05:06 +00:00
# album info
album_info_ul : BeautifulSoup = soup . find ( " ul " , { " class " : " album-info " } )
2023-04-19 17:40:00 +00:00
if album_info_ul is not None :
artist_anchor : BeautifulSoup
for artist_anchor in album_info_ul . find_all ( " a " , { " itemprop " : " byArtist " } ) :
# line 98
2023-04-19 18:05:06 +00:00
artist_source_list : List [ Source ] = [ ]
2023-04-19 17:40:00 +00:00
artist_url_meta = artist_anchor . find ( " meta " , { " itemprop " : " url " } )
2023-04-19 18:05:06 +00:00
if artist_url_meta is not None :
artist_href = artist_url_meta . get ( " content " )
if artist_href is not None :
artist_source_list . append ( Source ( cls . SOURCE_TYPE , url = cls . HOST + artist_href ) )
artist_meta_name = artist_anchor . find ( " meta " , { " itemprop " : " name " } )
if artist_meta_name is not None :
artist_name = artist_meta_name . get ( " content " )
if artist_name is not None :
artist_list . append ( Artist (
name = artist_name ,
source_list = artist_source_list
) )
time_soup : BeautifulSoup = album_info_ul . find ( " time " , { " itemprop " : " datePublished " } )
if time_soup is not None :
raw_datetime = time_soup . get ( " datetime " )
if raw_datetime is not None :
2023-04-19 18:07:59 +00:00
try :
date = ID3Timestamp . strptime ( raw_datetime , " % Y- % m- %d " )
except ValueError :
cls . LOGGER . debug ( f " Raw datetime doesn ' t match time format %Y-%m-%d: { raw_datetime } " )
2023-04-03 17:59:31 +00:00
return Album (
title = name ,
source_list = source_list ,
2023-04-19 18:05:06 +00:00
artist_list = artist_list ,
date = date
2023-04-03 17:59:31 +00:00
)
2023-03-17 11:31:56 +00:00
@classmethod
2023-03-27 14:28:34 +00:00
def _fetch_album_from_source ( cls , source : Source , stop_at_level : int = 1 ) - > Album :
2023-03-20 20:50:19 +00:00
"""
fetches album from source :
eg . ' https://musify.club/release/linkin-park-hybrid-theory-2000-188 '
2023-03-20 16:03:14 +00:00
2023-03-20 20:50:19 +00:00
/ html / musify / album_overview . html
2023-04-03 08:39:16 +00:00
- [ x ] tracklist
2023-04-20 13:36:12 +00:00
- [ x ] attributes
2023-04-03 08:39:16 +00:00
- [ ] ratings
2023-03-20 16:03:14 +00:00
2023-03-24 14:58:21 +00:00
: param stop_at_level :
2023-03-20 20:50:19 +00:00
: param source :
: return :
2023-03-20 16:03:14 +00:00
"""
2023-03-20 21:27:05 +00:00
2023-03-20 20:50:19 +00:00
url = cls . parse_url ( source . url )
endpoint = cls . HOST + " /release/ " + url . name_with_id
2023-04-20 20:30:45 +00:00
r = cls . CONNECTION . get ( endpoint )
2023-03-20 20:50:19 +00:00
if r is None :
2023-04-03 17:59:31 +00:00
return Album ( )
2023-03-20 20:50:19 +00:00
soup = BeautifulSoup ( r . content , " html.parser " )
2023-04-03 17:59:31 +00:00
album = cls . _parse_album ( soup )
2023-03-20 21:27:05 +00:00
# <div class="card"><div class="card-body">...</div></div>
cards_soup : BeautifulSoup = soup . find ( " div " , { " class " : " card-body " } )
if cards_soup is not None :
card_soup : BeautifulSoup
for card_soup in cards_soup . find_all ( " div " , { " class " : " playlist__item " } ) :
2023-04-03 15:41:52 +00:00
new_song = cls . parse_song_card ( card_soup )
album . song_collection . append ( new_song )
2023-04-03 08:38:12 +00:00
if stop_at_level > 1 :
song : Song
for song in album . song_collection :
sources = song . source_collection . get_sources_from_page ( cls . SOURCE_TYPE )
for source in sources :
song . merge ( cls . _fetch_song_from_source ( source = source ) )
2023-03-20 21:27:05 +00:00
album . update_tracksort ( )
2023-03-20 20:50:19 +00:00
2023-03-20 21:27:05 +00:00
return album
2023-03-30 10:00:39 +00:00
@classmethod
def _get_type_of_url ( cls , url : str ) - > Optional [ Union [ Type [ Song ] , Type [ Album ] , Type [ Artist ] , Type [ Label ] ] ] :
url : MusifyUrl = cls . parse_url ( url )
if url . source_type == MusifyTypes . ARTIST :
return Artist
if url . source_type == MusifyTypes . RELEASE :
return Album
if url . source_type == MusifyTypes . SONG :
return Song
return None
2023-03-30 14:10:48 +00:00
@classmethod
2023-04-04 18:58:22 +00:00
def _download_song_to_targets ( cls , source : Source , target : Target , desc : str = None ) - > DownloadResult :
2023-03-30 14:10:48 +00:00
"""
https : / / musify . club / track / im - in - a - coffin - life - never - was - waste - of - skin - 16360302
2023-03-30 14:50:27 +00:00
https : / / musify . club / track / dl / 16360302 / im - in - a - coffin - life - never - was - waste - of - skin . mp3
2023-03-30 14:10:48 +00:00
"""
2023-04-18 13:24:39 +00:00
endpoint = source . audio_url
2023-04-03 17:59:31 +00:00
2023-04-18 13:24:39 +00:00
if source . audio_url is None :
url : MusifyUrl = cls . parse_url ( source . url )
if url . source_type != MusifyTypes . SONG :
return DownloadResult ( error_message = f " The url is not of the type Song: { source . url } " )
endpoint = f " https://musify.club/track/dl/ { url . musify_id } / { url . name_without_id } .mp3 "
cls . LOGGER . warning ( f " The source has no audio link. Falling back to { endpoint } . " )
2023-04-04 15:59:08 +00:00
2023-04-23 10:08:39 +00:00
r = cls . CONNECTION . get ( endpoint , stream = True , allow_redirects = True , headers = { " Connection " : " https://musify.club/ " } )
2023-04-04 18:58:22 +00:00
if r is None :
return DownloadResult ( error_message = f " couldn ' t connect to { endpoint } " )
if target . stream_into ( r , desc = desc ) :
return DownloadResult ( total = 1 )
return DownloadResult ( error_message = f " Streaming to the file went wrong: { endpoint } , { str ( target . file_path ) } " )