From 05c96106833a4c0ddbb9570a644904e65da87c30 Mon Sep 17 00:00:00 2001 From: Hellow Date: Fri, 17 Mar 2023 18:16:06 +0100 Subject: [PATCH] moved error handling of get and post request into abstract.py instead do it new in every function --- src/music_kraken/pages/abstract.py | 57 +++++++++++++++++++++++++--- src/music_kraken/pages/musify.py | 60 ++++++++++++++---------------- 2 files changed, 80 insertions(+), 37 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index a3ca28a..6438129 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -1,6 +1,10 @@ -from typing import ( - List -) +from typing import Optional +import requests +import logging + +LOGGER = logging.getLogger("this shouldn't be used") + +from ..utils import shared from ..objects import ( Song, @@ -20,6 +24,49 @@ class Page: functionality for every other class fetching something """ + API_SESSION: requests.Session = requests.Session() + API_SESSION.proxies = shared.proxies + TIMEOUT = 5 + TRIES = 5 + + @classmethod + def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ + requests.Request]: + try: + r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT) + except requests.exceptions.Timeout: + return None + + if r.status_code in accepted_response_codes: + return r + + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") + LOGGER.debug(r.content) + + if trie <= cls.TRIES: + LOGGER.warning("to many tries. Aborting.") + + return cls.get_request(url, accepted_response_codes, trie + 1) + + @classmethod + def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ + requests.Request]: + try: + r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT) + except requests.exceptions.Timeout: + return None + + if r.status_code in accepted_response_codes: + return r + + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})") + LOGGER.debug(r.content) + + if trie <= cls.TRIES: + LOGGER.warning("to many tries. Aborting.") + + return cls.post_request(url, accepted_response_codes, trie + 1) + class Query: def __init__(self, query: str): self.query = query @@ -70,7 +117,7 @@ class Page: song_str = property(fget=lambda self: self.get_str(self.song)) @classmethod - def search_by_query(cls, query: str) -> Options: + def search_by_query(cls, query: str) -> Options: """ # The Query You can define a new parameter with "#", @@ -106,7 +153,7 @@ class Page: song = cls.fetch_song_details(music_object, flat=flat) song.compile() return song - + if type(music_object) == Album: album = cls.fetch_album_details(music_object, flat=flat) album.compile() diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 8a629b8..fbedc62 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -79,11 +79,12 @@ class Musify(Page): "Referer": "https://musify.club/" } API_SESSION.proxies = shared.proxies + TIMEOUT = 5 + TRIES = 5 + HOST = "https://musify.club" SOURCE_TYPE = SourcePages.MUSIFY - HOST = "https://musify.club" - @classmethod def search_by_query(cls, query: str) -> Options: query_obj = cls.Query(query) @@ -98,25 +99,6 @@ class Musify(Page): return f"{query.artist or '*'} - {query.song or '*'}" return f"{query.artist or '*'} - {query.album or '*'} - {query.song or '*'}" - @classmethod - def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]: - url = f"https://musify.club/search?searchText={query}" - LOGGER.debug(f"Trying to get soup from {url}") - try: - r = cls.API_SESSION.get(url, timeout=15) - except requests.exceptions.Timeout: - return None - if r.status_code != 200: - if r.status_code in [503] and trie < cls.TRIES: - LOGGER.warning(f"{cls.__name__} blocked downloading. ({trie}-{cls.TRIES})") - LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again") - time.sleep(cls.TIMEOUT) - return cls.get_soup_of_search(query, trie=trie + 1) - - LOGGER.warning("too many tries, returning") - return None - return BeautifulSoup(r.content, features="html.parser") - @classmethod def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: source_list: List[Source] = [] @@ -356,9 +338,10 @@ class Musify(Page): def plaintext_search(cls, query: str) -> Options: search_results = [] - search_soup = cls.get_soup_of_search(query=query) - if search_soup is None: - return None + r = cls.get_request(f"https://musify.club/search?searchText={query}") + if r is None: + return Options() + search_soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") # album and songs # child of div class: contacts row @@ -541,13 +524,14 @@ class Musify(Page): endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums" - r = cls.API_SESSION.post(url=endpoint, json={ + r = cls.post_request(url=endpoint, json={ "ArtistID": str(url.musify_id), "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": False, "X-Requested-With": "XMLHttpRequest" }) - + if r is None: + return [] soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") discography: List[Album] = [] @@ -556,6 +540,20 @@ class Musify(Page): return discography + @classmethod + def get_artist_attributes(cls, url: MusifyUrl) -> Artist: + """ + fetches the main Artist attributes from this endpoint + https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent + it needs to parse html + + :param url: + :return: + """ + return Artist( + name="" + ) + @classmethod def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: """ @@ -573,16 +571,14 @@ class Musify(Page): Artist: the artist fetched """ - print(source) url = cls.parse_url(source.url) - print(url) + + artist = cls.get_artist_attributes(url) discography: List[Album] = cls.get_discography(url) + artist.main_album_collection.extend(discography) - return Artist( - name="", - main_album_list=discography - ) + return artist @classmethod def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: