This commit is contained in:
Hellow 2023-04-22 14:20:19 +02:00
parent ac4911e1fc
commit 12ef9eb3dd
3 changed files with 61 additions and 21 deletions

View File

@ -0,0 +1,7 @@
# Connections
## Functions
A class, that gives me the options, to make web request

View File

@ -1,5 +1,5 @@
from typing import List, Dict, Callable, Optional, Set from typing import List, Dict, Callable, Optional, Set
from urllib.parse import urlparse, urlunsplit from urllib.parse import urlparse, urlunsplit, ParseResult
import logging import logging
import requests import requests
@ -17,7 +17,6 @@ class Connection:
timeout: int = 7, timeout: int = 7,
logger: logging.Logger = logging.getLogger("connection"), logger: logging.Logger = logging.getLogger("connection"),
header_values: Dict[str, str] = None, header_values: Dict[str, str] = None,
session: requests.Session = None,
accepted_response_codes: Set[int] = None, accepted_response_codes: Set[int] = None,
semantic_not_found: bool = True semantic_not_found: bool = True
): ):
@ -26,6 +25,8 @@ class Connection:
if header_values is None: if header_values is None:
header_values = dict() header_values = dict()
self.HEADER_VALUES = header_values
self.LOGGER = logger self.LOGGER = logger
self.HOST = urlparse(host) self.HOST = urlparse(host)
self.TRIES = tries self.TRIES = tries
@ -35,38 +36,63 @@ class Connection:
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200} self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
self.SEMANTIC_NOT_FOUND = semantic_not_found self.SEMANTIC_NOT_FOUND = semantic_not_found
self.session: requests.Session = session self._session_map: Dict[str] = {
if self.session is None: self.HOST.netloc: self.new_session()
self.new_session(**header_values) }
else:
self.rotating_proxy.register_session(session)
self.set_header()
@property def base_url(self, url: ParseResult = None):
def base_url(self): if url is None:
return urlunsplit((self.HOST.scheme, self.HOST.netloc, "", "", "")) url = self.HOST
def new_session(self, **header_values): return urlunsplit((url.scheme, url.netloc, "", "", ""))
session = requests.Session()
def _register_session(self, session: requests.Session, **header_values):
session.headers = self.get_header(**header_values) session.headers = self.get_header(**header_values)
self.rotating_proxy.register_session(session) self.rotating_proxy.register_session(session)
self.session = session
def new_session(
self,
url: ParseResult = None,
refer_from_origin: bool = True
) -> requests.Session:
header_values = self.HEADER_VALUES.copy()
if url is not None:
header_values["Host"] = url.netloc
if not refer_from_origin:
header_values["Referer"] = self.base_url(url=url)
session = requests.Session()
self._register_session(session=session, **header_values)
return session
def get_header(self, **header_values) -> Dict[str, str]: def get_header(self, **header_values) -> Dict[str, str]:
return { return {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0", "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Connection": "keep-alive", "Connection": "keep-alive",
"Host": self.HOST.netloc, "Host": self.HOST.netloc,
"Referer": self.base_url, "Referer": self.base_url(),
**header_values **header_values
} }
def set_header(self, **header_values):
self.session.headers = self.get_header(**header_values)
def rotate(self): def rotate(self):
self.rotating_proxy.rotate() self.rotating_proxy.rotate()
def get_session_from_url(self, url: str, refer_from_origin: bool = True) -> requests.Session:
parsed_url = urlparse(url)
if parsed_url.netloc in self._session_map:
print("saved session")
return self._session_map[parsed_url.netloc]
self._session_map[parsed_url.netloc] = self.new_session(
url=parsed_url,
refer_from_origin=refer_from_origin
)
return self._session_map[parsed_url.netloc]
def _request( def _request(
self, self,
request: Callable, request: Callable,
@ -99,6 +125,9 @@ class Connection:
if r.status_code in accepted_response_code: if r.status_code in accepted_response_code:
return r return r
print(r.content)
print(r.headers)
if not retry: if not retry:
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} " self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
f"at {url}. ({try_count}-{self.TRIES})") f"at {url}. ({try_count}-{self.TRIES})")
@ -111,19 +140,22 @@ class Connection:
try_count=try_count, try_count=try_count,
accepted_response_code=accepted_response_code, accepted_response_code=accepted_response_code,
url=url, url=url,
timeout=timeout,
**kwargs **kwargs
) )
def get( def get(
self, self,
url: str, url: str,
refer_from_origin: bool = True,
stream: bool = False, stream: bool = False,
accepted_response_codes: set = None, accepted_response_codes: set = None,
timeout: float = None, timeout: float = None,
**kwargs **kwargs
) -> Optional[requests.Response]: ) -> Optional[requests.Response]:
s = self.get_session_from_url(url, refer_from_origin)
r = self._request( r = self._request(
request=self.session.get, request=s.get,
try_count=0, try_count=0,
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES, accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url, url=url,
@ -139,13 +171,14 @@ class Connection:
self, self,
url: str, url: str,
json: dict, json: dict,
refer_from_origin: bool = True,
stream: bool = False, stream: bool = False,
accepted_response_codes: set = None, accepted_response_codes: set = None,
timeout: float = None, timeout: float = None,
**kwargs **kwargs
) -> Optional[requests.Response]: ) -> Optional[requests.Response]:
r = self._request( r = self._request(
request=self.session.post, request=self.get_session_from_url(url, refer_from_origin).post,
try_count=0, try_count=0,
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES, accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url, url=url,

View File

@ -1039,7 +1039,7 @@ class Musify(Page):
cls.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.") cls.LOGGER.warning(f"The source has no audio link. Falling back to {endpoint}.")
r = cls.CONNECTION.get(endpoint, stream=True) r = cls.CONNECTION.get(endpoint, stream=True, allow_redirects=True, headers={"Host": "40s.musify.club", "Referer": endpoint})
if r is None: if r is None:
return DownloadResult(error_message=f"couldn't connect to {endpoint}") return DownloadResult(error_message=f"couldn't connect to {endpoint}")