failed attempts

This commit is contained in:
Hellow 2023-04-23 12:08:39 +02:00
parent 12ef9eb3dd
commit bd3e042ae2
6 changed files with 54 additions and 67 deletions

View File

@ -53,7 +53,7 @@ def real_download():
if __name__ == "__main__":
music_kraken.cli(genre="test", command_list=[
"#a Molchat Doma",
"0",
# "https://musify.club/release/molchat-doma-etazhi-2018-1092949",
"https://musify.club/release/ghost-bath-self-loather-2021-1554266",
"ok"
])

View File

@ -36,9 +36,9 @@ class Connection:
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
self.SEMANTIC_NOT_FOUND = semantic_not_found
self._session_map: Dict[str] = {
self.HOST.netloc: self.new_session()
}
self.session = requests.Session()
self.session.headers = self.get_header(**self.HEADER_VALUES)
self.session.proxies = self.rotating_proxy.current_proxy
def base_url(self, url: ParseResult = None):
if url is None:
@ -46,31 +46,9 @@ class Connection:
return urlunsplit((url.scheme, url.netloc, "", "", ""))
def _register_session(self, session: requests.Session, **header_values):
session.headers = self.get_header(**header_values)
self.rotating_proxy.register_session(session)
def new_session(
self,
url: ParseResult = None,
refer_from_origin: bool = True
) -> requests.Session:
header_values = self.HEADER_VALUES.copy()
if url is not None:
header_values["Host"] = url.netloc
if not refer_from_origin:
header_values["Referer"] = self.base_url(url=url)
session = requests.Session()
self._register_session(session=session, **header_values)
return session
def get_header(self, **header_values) -> Dict[str, str]:
return {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"Connection": "keep-alive",
"Host": self.HOST.netloc,
"Referer": self.base_url(),
@ -78,20 +56,21 @@ class Connection:
}
def rotate(self):
self.rotating_proxy.rotate()
self.session.proxies = self.rotating_proxy.rotate()
def get_session_from_url(self, url: str, refer_from_origin: bool = True) -> requests.Session:
parsed_url = urlparse(url)
def _update_headers(
self,
headers: Optional[dict],
refer_from_origin: bool,
url: ParseResult
) -> Dict[str, str]:
if headers is None:
headers = dict()
if parsed_url.netloc in self._session_map:
print("saved session")
return self._session_map[parsed_url.netloc]
if not refer_from_origin:
headers["Referer"] = self.base_url(url=url)
self._session_map[parsed_url.netloc] = self.new_session(
url=parsed_url,
refer_from_origin=refer_from_origin
)
return self._session_map[parsed_url.netloc]
return headers
def _request(
self,
@ -100,6 +79,8 @@ class Connection:
accepted_response_code: set,
url: str,
timeout: float,
headers: dict,
refer_from_origin: bool = True,
**kwargs
) -> Optional[requests.Response]:
if try_count >= self.TRIES:
@ -108,9 +89,20 @@ class Connection:
if timeout is None:
timeout = self.TIMEOUT
parsed_url = urlparse(url)
print(url)
print(parsed_url)
headers = self._update_headers(
headers=headers,
refer_from_origin=refer_from_origin,
url=parsed_url
)
retry = False
try:
r = request(url=url, timeout=timeout, **kwargs)
r: requests.Response = request(url=parsed_url.geturl(), timeout=timeout, headers=headers, **kwargs)
except requests.exceptions.Timeout:
self.LOGGER.warning(f"Request timed out at \"{url}\": ({try_count}-{self.TRIES})")
retry = True
@ -121,19 +113,19 @@ class Connection:
if not retry:
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
self.LOGGER.warning(f"Couldn't find url (404): {url}")
print(r.headers)
print(r.request.headers)
return
if r.status_code in accepted_response_code:
return r
print(r.content)
print(r.headers)
if not retry:
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
f"at {url}. ({try_count}-{self.TRIES})")
self.LOGGER.debug(r.content)
self.rotate()
print(r.headers)
return self._request(
request=request,
@ -151,16 +143,18 @@ class Connection:
stream: bool = False,
accepted_response_codes: set = None,
timeout: float = None,
headers: dict = None,
**kwargs
) -> Optional[requests.Response]:
s = self.get_session_from_url(url, refer_from_origin)
r = self._request(
request=s.get,
request=self.session.get,
try_count=0,
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url,
stream=stream,
timeout=timeout,
headers=headers,
refer_from_origin=refer_from_origin,
**kwargs
)
if r is None:
@ -175,14 +169,17 @@ class Connection:
stream: bool = False,
accepted_response_codes: set = None,
timeout: float = None,
headers: dict = None,
**kwargs
) -> Optional[requests.Response]:
r = self._request(
request=self.get_session_from_url(url, refer_from_origin).post,
request=self.session.post,
try_count=0,
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url,
timeout=timeout,
headers=headers,
refer_from_origin=refer_from_origin,
json=json,
stream=stream,
**kwargs

View File

@ -30,27 +30,14 @@ class RotatingObject:
class RotatingProxy(RotatingObject):
def __init__(self, proxy_list: List[Dict[str, str]], session_list: List[requests.Session] = None):
self._session_list: List[requests.Session] = session_list
if self._session_list is None:
self._session_list = []
def __init__(self, proxy_list: List[Dict[str, str]]):
super().__init__(
proxy_list if len(proxy_list) > 0 else [None]
)
super().__init__(proxy_list if len(proxy_list) > 0 else [{}])
def register_session(self, session: requests.Session):
self._session_list.append(session)
session.proxies = self.current_proxy
def rotate(self):
new_proxy = self.next
for session in self._session_list:
session.proxies = new_proxy
def rotate(self) -> Dict[str, str]:
return self.next
@property
def current_proxy(self) -> Dict[str, str]:
return super().object
@property
def next(self) -> Dict[str, str]:
return super().object

View File

@ -6,6 +6,7 @@ from .multiple_options import MultiPageOptions
from ..abstract import Page
from ..support_classes.download_result import DownloadResult
from ...objects import DatabaseObject, Source
from ...utils.enums.source import SourcePages
class Search(Download):
@ -116,7 +117,7 @@ class Search(Download):
can download directly after
"""
source = Source.match_url(url=url)
source = Source.match_url(url=url, referer_page=SourcePages.MANUAL)
if source is None:
return False

View File

@ -1039,7 +1039,7 @@ class Musify(Page):
cls.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.")
r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Host": "40s.musify.club", "Referer": endpoint})
r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Connection": "https://musify.club/"})
if r is None:
return DownloadResult(error_message=f"couldn't connect to {endpoint}")

View File

@ -25,6 +25,8 @@ class SourcePages(Enum):
TWITTER = "twitter" # I will use nitter though lol
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
MANUAL = "manual"
@classmethod
def get_homepage(cls, attribute) -> str:
homepage_map = {