diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 35ab53f..5341779 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -53,7 +53,7 @@ def real_download(): if __name__ == "__main__": music_kraken.cli(genre="test", command_list=[ - "#a Molchat Doma", - "0", + # "https://musify.club/release/molchat-doma-etazhi-2018-1092949", + "https://musify.club/release/ghost-bath-self-loather-2021-1554266", "ok" ]) diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index b9b1363..8c1a1e2 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -36,9 +36,9 @@ class Connection: self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200} self.SEMANTIC_NOT_FOUND = semantic_not_found - self._session_map: Dict[str] = { - self.HOST.netloc: self.new_session() - } + self.session = requests.Session() + self.session.headers = self.get_header(**self.HEADER_VALUES) + self.session.proxies = self.rotating_proxy.current_proxy def base_url(self, url: ParseResult = None): if url is None: @@ -46,31 +46,9 @@ class Connection: return urlunsplit((url.scheme, url.netloc, "", "", "")) - def _register_session(self, session: requests.Session, **header_values): - session.headers = self.get_header(**header_values) - self.rotating_proxy.register_session(session) - - def new_session( - self, - url: ParseResult = None, - refer_from_origin: bool = True - ) -> requests.Session: - - header_values = self.HEADER_VALUES.copy() - if url is not None: - header_values["Host"] = url.netloc - - if not refer_from_origin: - header_values["Referer"] = self.base_url(url=url) - - session = requests.Session() - self._register_session(session=session, **header_values) - - return session - def get_header(self, **header_values) -> Dict[str, str]: return { - "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", "Connection": "keep-alive", "Host": self.HOST.netloc, "Referer": self.base_url(), @@ -78,20 +56,21 @@ class Connection: } def rotate(self): - self.rotating_proxy.rotate() + self.session.proxies = self.rotating_proxy.rotate() - def get_session_from_url(self, url: str, refer_from_origin: bool = True) -> requests.Session: - parsed_url = urlparse(url) + def _update_headers( + self, + headers: Optional[dict], + refer_from_origin: bool, + url: ParseResult + ) -> Dict[str, str]: + if headers is None: + headers = dict() - if parsed_url.netloc in self._session_map: - print("saved session") - return self._session_map[parsed_url.netloc] + if not refer_from_origin: + headers["Referer"] = self.base_url(url=url) - self._session_map[parsed_url.netloc] = self.new_session( - url=parsed_url, - refer_from_origin=refer_from_origin - ) - return self._session_map[parsed_url.netloc] + return headers def _request( self, @@ -100,6 +79,8 @@ class Connection: accepted_response_code: set, url: str, timeout: float, + headers: dict, + refer_from_origin: bool = True, **kwargs ) -> Optional[requests.Response]: if try_count >= self.TRIES: @@ -108,9 +89,20 @@ class Connection: if timeout is None: timeout = self.TIMEOUT + parsed_url = urlparse(url) + + print(url) + print(parsed_url) + + headers = self._update_headers( + headers=headers, + refer_from_origin=refer_from_origin, + url=parsed_url + ) + retry = False try: - r = request(url=url, timeout=timeout, **kwargs) + r: requests.Response = request(url=parsed_url.geturl(), timeout=timeout, headers=headers, **kwargs) except requests.exceptions.Timeout: self.LOGGER.warning(f"Request timed out at \"{url}\": ({try_count}-{self.TRIES})") retry = True @@ -121,19 +113,19 @@ class Connection: if not retry: if self.SEMANTIC_NOT_FOUND and r.status_code == 404: self.LOGGER.warning(f"Couldn't find url (404): {url}") + print(r.headers) + print(r.request.headers) return if r.status_code in accepted_response_code: return r - print(r.content) - print(r.headers) - if not retry: self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} " f"at {url}. ({try_count}-{self.TRIES})") self.LOGGER.debug(r.content) self.rotate() + print(r.headers) return self._request( request=request, @@ -151,16 +143,18 @@ class Connection: stream: bool = False, accepted_response_codes: set = None, timeout: float = None, + headers: dict = None, **kwargs ) -> Optional[requests.Response]: - s = self.get_session_from_url(url, refer_from_origin) r = self._request( - request=s.get, + request=self.session.get, try_count=0, accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES, url=url, stream=stream, timeout=timeout, + headers=headers, + refer_from_origin=refer_from_origin, **kwargs ) if r is None: @@ -175,14 +169,17 @@ class Connection: stream: bool = False, accepted_response_codes: set = None, timeout: float = None, + headers: dict = None, **kwargs ) -> Optional[requests.Response]: r = self._request( - request=self.get_session_from_url(url, refer_from_origin).post, + request=self.session.post, try_count=0, accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES, url=url, timeout=timeout, + headers=headers, + refer_from_origin=refer_from_origin, json=json, stream=stream, **kwargs diff --git a/src/music_kraken/connection/rotating.py b/src/music_kraken/connection/rotating.py index adb4010..3b9c6bf 100644 --- a/src/music_kraken/connection/rotating.py +++ b/src/music_kraken/connection/rotating.py @@ -30,27 +30,14 @@ class RotatingObject: class RotatingProxy(RotatingObject): - def __init__(self, proxy_list: List[Dict[str, str]], session_list: List[requests.Session] = None): - self._session_list: List[requests.Session] = session_list - if self._session_list is None: - self._session_list = [] + def __init__(self, proxy_list: List[Dict[str, str]]): + super().__init__( + proxy_list if len(proxy_list) > 0 else [None] + ) - super().__init__(proxy_list if len(proxy_list) > 0 else [{}]) - - def register_session(self, session: requests.Session): - self._session_list.append(session) - session.proxies = self.current_proxy - - def rotate(self): - new_proxy = self.next - - for session in self._session_list: - session.proxies = new_proxy + def rotate(self) -> Dict[str, str]: + return self.next @property def current_proxy(self) -> Dict[str, str]: return super().object - - @property - def next(self) -> Dict[str, str]: - return super().object diff --git a/src/music_kraken/pages/download_center/search.py b/src/music_kraken/pages/download_center/search.py index 915fa21..f1c685c 100644 --- a/src/music_kraken/pages/download_center/search.py +++ b/src/music_kraken/pages/download_center/search.py @@ -6,6 +6,7 @@ from .multiple_options import MultiPageOptions from ..abstract import Page from ..support_classes.download_result import DownloadResult from ...objects import DatabaseObject, Source +from ...utils.enums.source import SourcePages class Search(Download): @@ -116,7 +117,7 @@ class Search(Download): can download directly after """ - source = Source.match_url(url=url) + source = Source.match_url(url=url, referer_page=SourcePages.MANUAL) if source is None: return False diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 049785e..48de5f5 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -1039,7 +1039,7 @@ class Musify(Page): cls.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.") - r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Host": "40s.musify.club", "Referer": endpoint}) + r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Connection": "https://musify.club/"}) if r is None: return DownloadResult(error_message=f"couldn't connect to {endpoint}") diff --git a/src/music_kraken/utils/enums/source.py b/src/music_kraken/utils/enums/source.py index b536e5c..b324f8d 100644 --- a/src/music_kraken/utils/enums/source.py +++ b/src/music_kraken/utils/enums/source.py @@ -25,6 +25,8 @@ class SourcePages(Enum): TWITTER = "twitter" # I will use nitter though lol MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE + MANUAL = "manual" + @classmethod def get_homepage(cls, attribute) -> str: homepage_map = {