failed attempts
This commit is contained in:
parent
12ef9eb3dd
commit
bd3e042ae2
@ -53,7 +53,7 @@ def real_download():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
music_kraken.cli(genre="test", command_list=[
|
music_kraken.cli(genre="test", command_list=[
|
||||||
"#a Molchat Doma",
|
# "https://musify.club/release/molchat-doma-etazhi-2018-1092949",
|
||||||
"0",
|
"https://musify.club/release/ghost-bath-self-loather-2021-1554266",
|
||||||
"ok"
|
"ok"
|
||||||
])
|
])
|
||||||
|
@ -36,9 +36,9 @@ class Connection:
|
|||||||
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
|
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
|
||||||
self.SEMANTIC_NOT_FOUND = semantic_not_found
|
self.SEMANTIC_NOT_FOUND = semantic_not_found
|
||||||
|
|
||||||
self._session_map: Dict[str] = {
|
self.session = requests.Session()
|
||||||
self.HOST.netloc: self.new_session()
|
self.session.headers = self.get_header(**self.HEADER_VALUES)
|
||||||
}
|
self.session.proxies = self.rotating_proxy.current_proxy
|
||||||
|
|
||||||
def base_url(self, url: ParseResult = None):
|
def base_url(self, url: ParseResult = None):
|
||||||
if url is None:
|
if url is None:
|
||||||
@ -46,31 +46,9 @@ class Connection:
|
|||||||
|
|
||||||
return urlunsplit((url.scheme, url.netloc, "", "", ""))
|
return urlunsplit((url.scheme, url.netloc, "", "", ""))
|
||||||
|
|
||||||
def _register_session(self, session: requests.Session, **header_values):
|
|
||||||
session.headers = self.get_header(**header_values)
|
|
||||||
self.rotating_proxy.register_session(session)
|
|
||||||
|
|
||||||
def new_session(
|
|
||||||
self,
|
|
||||||
url: ParseResult = None,
|
|
||||||
refer_from_origin: bool = True
|
|
||||||
) -> requests.Session:
|
|
||||||
|
|
||||||
header_values = self.HEADER_VALUES.copy()
|
|
||||||
if url is not None:
|
|
||||||
header_values["Host"] = url.netloc
|
|
||||||
|
|
||||||
if not refer_from_origin:
|
|
||||||
header_values["Referer"] = self.base_url(url=url)
|
|
||||||
|
|
||||||
session = requests.Session()
|
|
||||||
self._register_session(session=session, **header_values)
|
|
||||||
|
|
||||||
return session
|
|
||||||
|
|
||||||
def get_header(self, **header_values) -> Dict[str, str]:
|
def get_header(self, **header_values) -> Dict[str, str]:
|
||||||
return {
|
return {
|
||||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
"Host": self.HOST.netloc,
|
"Host": self.HOST.netloc,
|
||||||
"Referer": self.base_url(),
|
"Referer": self.base_url(),
|
||||||
@ -78,20 +56,21 @@ class Connection:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def rotate(self):
|
def rotate(self):
|
||||||
self.rotating_proxy.rotate()
|
self.session.proxies = self.rotating_proxy.rotate()
|
||||||
|
|
||||||
def get_session_from_url(self, url: str, refer_from_origin: bool = True) -> requests.Session:
|
def _update_headers(
|
||||||
parsed_url = urlparse(url)
|
self,
|
||||||
|
headers: Optional[dict],
|
||||||
|
refer_from_origin: bool,
|
||||||
|
url: ParseResult
|
||||||
|
) -> Dict[str, str]:
|
||||||
|
if headers is None:
|
||||||
|
headers = dict()
|
||||||
|
|
||||||
if parsed_url.netloc in self._session_map:
|
if not refer_from_origin:
|
||||||
print("saved session")
|
headers["Referer"] = self.base_url(url=url)
|
||||||
return self._session_map[parsed_url.netloc]
|
|
||||||
|
|
||||||
self._session_map[parsed_url.netloc] = self.new_session(
|
return headers
|
||||||
url=parsed_url,
|
|
||||||
refer_from_origin=refer_from_origin
|
|
||||||
)
|
|
||||||
return self._session_map[parsed_url.netloc]
|
|
||||||
|
|
||||||
def _request(
|
def _request(
|
||||||
self,
|
self,
|
||||||
@ -100,6 +79,8 @@ class Connection:
|
|||||||
accepted_response_code: set,
|
accepted_response_code: set,
|
||||||
url: str,
|
url: str,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
|
headers: dict,
|
||||||
|
refer_from_origin: bool = True,
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> Optional[requests.Response]:
|
) -> Optional[requests.Response]:
|
||||||
if try_count >= self.TRIES:
|
if try_count >= self.TRIES:
|
||||||
@ -108,9 +89,20 @@ class Connection:
|
|||||||
if timeout is None:
|
if timeout is None:
|
||||||
timeout = self.TIMEOUT
|
timeout = self.TIMEOUT
|
||||||
|
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
|
||||||
|
print(url)
|
||||||
|
print(parsed_url)
|
||||||
|
|
||||||
|
headers = self._update_headers(
|
||||||
|
headers=headers,
|
||||||
|
refer_from_origin=refer_from_origin,
|
||||||
|
url=parsed_url
|
||||||
|
)
|
||||||
|
|
||||||
retry = False
|
retry = False
|
||||||
try:
|
try:
|
||||||
r = request(url=url, timeout=timeout, **kwargs)
|
r: requests.Response = request(url=parsed_url.geturl(), timeout=timeout, headers=headers, **kwargs)
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
self.LOGGER.warning(f"Request timed out at \"{url}\": ({try_count}-{self.TRIES})")
|
self.LOGGER.warning(f"Request timed out at \"{url}\": ({try_count}-{self.TRIES})")
|
||||||
retry = True
|
retry = True
|
||||||
@ -121,19 +113,19 @@ class Connection:
|
|||||||
if not retry:
|
if not retry:
|
||||||
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
|
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
|
||||||
self.LOGGER.warning(f"Couldn't find url (404): {url}")
|
self.LOGGER.warning(f"Couldn't find url (404): {url}")
|
||||||
|
print(r.headers)
|
||||||
|
print(r.request.headers)
|
||||||
return
|
return
|
||||||
if r.status_code in accepted_response_code:
|
if r.status_code in accepted_response_code:
|
||||||
return r
|
return r
|
||||||
|
|
||||||
print(r.content)
|
|
||||||
print(r.headers)
|
|
||||||
|
|
||||||
if not retry:
|
if not retry:
|
||||||
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
|
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
|
||||||
f"at {url}. ({try_count}-{self.TRIES})")
|
f"at {url}. ({try_count}-{self.TRIES})")
|
||||||
self.LOGGER.debug(r.content)
|
self.LOGGER.debug(r.content)
|
||||||
|
|
||||||
self.rotate()
|
self.rotate()
|
||||||
|
print(r.headers)
|
||||||
|
|
||||||
return self._request(
|
return self._request(
|
||||||
request=request,
|
request=request,
|
||||||
@ -151,16 +143,18 @@ class Connection:
|
|||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
accepted_response_codes: set = None,
|
accepted_response_codes: set = None,
|
||||||
timeout: float = None,
|
timeout: float = None,
|
||||||
|
headers: dict = None,
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> Optional[requests.Response]:
|
) -> Optional[requests.Response]:
|
||||||
s = self.get_session_from_url(url, refer_from_origin)
|
|
||||||
r = self._request(
|
r = self._request(
|
||||||
request=s.get,
|
request=self.session.get,
|
||||||
try_count=0,
|
try_count=0,
|
||||||
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
|
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
|
||||||
url=url,
|
url=url,
|
||||||
stream=stream,
|
stream=stream,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
headers=headers,
|
||||||
|
refer_from_origin=refer_from_origin,
|
||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
if r is None:
|
if r is None:
|
||||||
@ -175,14 +169,17 @@ class Connection:
|
|||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
accepted_response_codes: set = None,
|
accepted_response_codes: set = None,
|
||||||
timeout: float = None,
|
timeout: float = None,
|
||||||
|
headers: dict = None,
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> Optional[requests.Response]:
|
) -> Optional[requests.Response]:
|
||||||
r = self._request(
|
r = self._request(
|
||||||
request=self.get_session_from_url(url, refer_from_origin).post,
|
request=self.session.post,
|
||||||
try_count=0,
|
try_count=0,
|
||||||
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
|
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
|
||||||
url=url,
|
url=url,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
headers=headers,
|
||||||
|
refer_from_origin=refer_from_origin,
|
||||||
json=json,
|
json=json,
|
||||||
stream=stream,
|
stream=stream,
|
||||||
**kwargs
|
**kwargs
|
||||||
|
@ -30,27 +30,14 @@ class RotatingObject:
|
|||||||
|
|
||||||
|
|
||||||
class RotatingProxy(RotatingObject):
|
class RotatingProxy(RotatingObject):
|
||||||
def __init__(self, proxy_list: List[Dict[str, str]], session_list: List[requests.Session] = None):
|
def __init__(self, proxy_list: List[Dict[str, str]]):
|
||||||
self._session_list: List[requests.Session] = session_list
|
super().__init__(
|
||||||
if self._session_list is None:
|
proxy_list if len(proxy_list) > 0 else [None]
|
||||||
self._session_list = []
|
)
|
||||||
|
|
||||||
super().__init__(proxy_list if len(proxy_list) > 0 else [{}])
|
def rotate(self) -> Dict[str, str]:
|
||||||
|
return self.next
|
||||||
def register_session(self, session: requests.Session):
|
|
||||||
self._session_list.append(session)
|
|
||||||
session.proxies = self.current_proxy
|
|
||||||
|
|
||||||
def rotate(self):
|
|
||||||
new_proxy = self.next
|
|
||||||
|
|
||||||
for session in self._session_list:
|
|
||||||
session.proxies = new_proxy
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_proxy(self) -> Dict[str, str]:
|
def current_proxy(self) -> Dict[str, str]:
|
||||||
return super().object
|
return super().object
|
||||||
|
|
||||||
@property
|
|
||||||
def next(self) -> Dict[str, str]:
|
|
||||||
return super().object
|
|
||||||
|
@ -6,6 +6,7 @@ from .multiple_options import MultiPageOptions
|
|||||||
from ..abstract import Page
|
from ..abstract import Page
|
||||||
from ..support_classes.download_result import DownloadResult
|
from ..support_classes.download_result import DownloadResult
|
||||||
from ...objects import DatabaseObject, Source
|
from ...objects import DatabaseObject, Source
|
||||||
|
from ...utils.enums.source import SourcePages
|
||||||
|
|
||||||
|
|
||||||
class Search(Download):
|
class Search(Download):
|
||||||
@ -116,7 +117,7 @@ class Search(Download):
|
|||||||
can download directly after
|
can download directly after
|
||||||
"""
|
"""
|
||||||
|
|
||||||
source = Source.match_url(url=url)
|
source = Source.match_url(url=url, referer_page=SourcePages.MANUAL)
|
||||||
if source is None:
|
if source is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -1039,7 +1039,7 @@ class Musify(Page):
|
|||||||
|
|
||||||
cls.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.")
|
cls.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.")
|
||||||
|
|
||||||
r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Host": "40s.musify.club", "Referer": endpoint})
|
r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Connection": "https://musify.club/"})
|
||||||
if r is None:
|
if r is None:
|
||||||
return DownloadResult(error_message=f"couldn't connect to {endpoint}")
|
return DownloadResult(error_message=f"couldn't connect to {endpoint}")
|
||||||
|
|
||||||
|
@ -25,6 +25,8 @@ class SourcePages(Enum):
|
|||||||
TWITTER = "twitter" # I will use nitter though lol
|
TWITTER = "twitter" # I will use nitter though lol
|
||||||
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
|
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
|
||||||
|
|
||||||
|
MANUAL = "manual"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_homepage(cls, attribute) -> str:
|
def get_homepage(cls, attribute) -> str:
|
||||||
homepage_map = {
|
homepage_map = {
|
||||||
|
Loading…
Reference in New Issue
Block a user