moved error handling of get and post request into abstract.py instead do it new in every function

2023-03-17 18:16:06 +01:00
parent 924bd01e1d
commit 05c9610683
2 changed files with 80 additions and 37 deletions
--- a/src/music_kraken/pages/abstract.py
+++ b/src/music_kraken/pages/abstract.py
@@ -1,6 +1,10 @@
-from typing import (
-    List
-)
+from typing import Optional
+import requests
+import logging
+
+LOGGER = logging.getLogger("this shouldn't be used")
+
+from ..utils import shared

 from ..objects import (
    Song,
@@ -20,6 +24,49 @@ class Page:
    functionality for every other class fetching something
    """

+    API_SESSION: requests.Session = requests.Session()
+    API_SESSION.proxies = shared.proxies
+    TIMEOUT = 5
+    TRIES = 5
+
+    @classmethod
+    def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
+        requests.Request]:
+        try:
+            r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT)
+        except requests.exceptions.Timeout:
+            return None
+
+        if r.status_code in accepted_response_codes:
+            return r
+
+        LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})")
+        LOGGER.debug(r.content)
+
+        if trie <= cls.TRIES:
+            LOGGER.warning("to many tries. Aborting.")
+
+        return cls.get_request(url, accepted_response_codes, trie + 1)
+
+    @classmethod
+    def post_request(cls, url: str, json: dict, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
+        requests.Request]:
+        try:
+            r = cls.API_SESSION.post(url, json=json, timeout=cls.TIMEOUT)
+        except requests.exceptions.Timeout:
+            return None
+
+        if r.status_code in accepted_response_codes:
+            return r
+
+        LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at {url}. ({trie}-{cls.TRIES})")
+        LOGGER.debug(r.content)
+
+        if trie <= cls.TRIES:
+            LOGGER.warning("to many tries. Aborting.")
+
+        return cls.post_request(url, accepted_response_codes, trie + 1)
+
    class Query:
        def __init__(self, query: str):
            self.query = query
--- a/src/music_kraken/pages/musify.py
+++ b/src/music_kraken/pages/musify.py
@@ -79,11 +79,12 @@ class Musify(Page):
        "Referer": "https://musify.club/"
    }
    API_SESSION.proxies = shared.proxies
+    TIMEOUT = 5
+    TRIES = 5
+    HOST = "https://musify.club"

    SOURCE_TYPE = SourcePages.MUSIFY

-    HOST = "https://musify.club"
-
    @classmethod
    def search_by_query(cls, query: str) -> Options:
        query_obj = cls.Query(query)
@@ -98,25 +99,6 @@ class Musify(Page):
            return f"{query.artist or '*'} - {query.song or '*'}"
        return f"{query.artist or '*'} - {query.album or '*'} - {query.song or '*'}"

-    @classmethod
-    def get_soup_of_search(cls, query: str, trie=0) -> Optional[BeautifulSoup]:
-        url = f"https://musify.club/search?searchText={query}"
-        LOGGER.debug(f"Trying to get soup from {url}")
-        try:
-            r = cls.API_SESSION.get(url, timeout=15)
-        except requests.exceptions.Timeout:
-            return None
-        if r.status_code != 200:
-            if r.status_code in [503] and trie < cls.TRIES:
-                LOGGER.warning(f"{cls.__name__} blocked downloading. ({trie}-{cls.TRIES})")
-                LOGGER.warning(f"retrying in {cls.TIMEOUT} seconds again")
-                time.sleep(cls.TIMEOUT)
-                return cls.get_soup_of_search(query, trie=trie + 1)
-
-            LOGGER.warning("too many tries, returning")
-            return None
-        return BeautifulSoup(r.content, features="html.parser")
-
    @classmethod
    def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist:
        source_list: List[Source] = []
@@ -356,9 +338,10 @@ class Musify(Page):
    def plaintext_search(cls, query: str) -> Options:
        search_results = []

-        search_soup = cls.get_soup_of_search(query=query)
-        if search_soup is None:
-            return None
+        r = cls.get_request(f"https://musify.club/search?searchText={query}")
+        if r is None:
+            return Options()
+        search_soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")

        # album and songs
        # child of div class: contacts row
@@ -541,13 +524,14 @@ class Musify(Page):

        endpoint = cls.HOST + "/" + url.source_type.value + "/filteralbums"

-        r = cls.API_SESSION.post(url=endpoint, json={
+        r = cls.post_request(url=endpoint, json={
            "ArtistID": str(url.musify_id),
            "SortOrder.Property": "dateCreated",
            "SortOrder.IsAscending": False,
            "X-Requested-With": "XMLHttpRequest"
        })
-
+        if r is None:
+            return []
        soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")

        discography: List[Album] = []
@@ -556,6 +540,20 @@ class Musify(Page):

        return discography

+    @classmethod
+    def get_artist_attributes(cls, url: MusifyUrl) -> Artist:
+        """
+        fetches the main Artist attributes from this endpoint
+        https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent
+        it needs to parse html
+
+        :param url:
+        :return:
+        """
+        return Artist(
+            name=""
+        )
+
    @classmethod
    def get_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
        """
@@ -573,16 +571,14 @@ class Musify(Page):
            Artist: the artist fetched
        """

-        print(source)
        url = cls.parse_url(source.url)
-        print(url)
+
+        artist = cls.get_artist_attributes(url)

        discography: List[Album] = cls.get_discography(url)
+        artist.main_album_collection.extend(discography)

-        return Artist(
-            name="",
-            main_album_list=discography
-        )
+        return artist

    @classmethod
    def fetch_artist_details(cls, artist: Artist, flat: bool = False) -> Artist: