improved function to connect to the internet

This commit is contained in:
Hellow 2023-04-20 18:47:47 +02:00
parent 3892f60c7f
commit d6d20aaf1b
5 changed files with 216 additions and 4 deletions

View File

View File

@ -0,0 +1,153 @@
from typing import List, Dict, Callable, Optional, Set
from urllib.parse import urlparse, urlunsplit
import logging
import requests
from .rotating import RotatingProxy
from ..utils.shared import PROXIES_LIST
LOGGER = logging.getLogger("connection")
class Connection:
def __init__(
self,
host: str,
proxies: List[dict] = None,
tries: int = (len(PROXIES_LIST) + 1) * 2,
timeout: int = 7,
header_values: Dict[str, str] = None,
session: requests.Session = None,
accepted_response_codes: Set[int] = None,
semantic_not_found: bool = True
):
if proxies is None:
proxies = PROXIES_LIST
if header_values is None:
header_values = dict()
self.LOGGER = LOGGER
self.HOST = urlparse(host)
self.TRIES = tries
self.TIMEOUT = timeout
self.rotating_proxy = RotatingProxy(proxy_list=proxies)
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
self.SEMANTIC_NOT_FOUND = semantic_not_found
self.session: requests.Session = session
if self.session is None:
self.new_session(**header_values)
else:
self.rotating_proxy.register_session(session)
self.set_header()
@property
def base_url(self):
return urlunsplit((self.HOST.scheme, self.HOST.netloc, "", "", ""))
def new_session(self, **header_values):
session = requests.Session()
session.headers = self.get_header(**header_values)
self.rotating_proxy.register_session(session)
self.session = session
def get_header(self, **header_values) -> Dict[str, str]:
return {
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Connection": "keep-alive",
"Host": self.HOST.netloc,
"Referer": self.base_url,
**header_values
}
def set_header(self, **header_values):
self.session.headers = self.get_header(**header_values)
def rotate(self):
self.rotating_proxy.rotate()
def _request(
self,
request: Callable,
try_count: int,
accepted_response_code: set,
url: str,
**kwargs
) -> Optional[requests.Response]:
if try_count >= self.TRIES:
return
retry = False
try:
r = request(url=url, **kwargs)
except requests.exceptions.Timeout:
self.LOGGER.warning(f"Request timed out at \"{url}\": ({try_count}-{self.TRIES})")
retry = True
except requests.exceptions.ConnectionError:
self.LOGGER.warning(f"Couldn't connect to \"{url}\": ({try_count}-{self.TRIES})")
retry = True
if not retry:
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
self.LOGGER.warning(f"Couldn't find url (404): {url}")
return
if r.status_code in accepted_response_code:
return r
if not retry:
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
f"at {url}. ({try_count}-{self.TRIES})")
self.LOGGER.debug(r.content)
self.rotate()
return self._request(
request=request,
try_count=try_count,
accepted_response_code=accepted_response_code,
url=url,
**kwargs
)
def get(
self,
url: str,
stream: bool = False,
accepted_response_codes: set = None,
**kwargs
) -> Optional[requests.Response]:
r = self._request(
request=self.session.get,
try_count=0,
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url,
stream=stream,
**kwargs
)
if r is None:
self.LOGGER.warning(f"Max attempts ({self.TRIES}) exceeded for: GET:{url}")
return r
def post(
self,
url: str,
json: dict,
stream: bool = False,
accepted_response_codes: set = None,
**kwargs
) -> Optional[requests.Response]:
r = self._request(
request=self.session.post,
try_count=0,
accepted_response_code=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url,
json=json,
stream=stream,
**kwargs
)
if r is None:
self.LOGGER.warning(f"Max attempts ({self.TRIES}) exceeded for: GET:{url}")
self.LOGGER.warning(f"payload: {json}")
return r

View File

@ -0,0 +1,56 @@
from typing import Dict, List
import requests
class RotatingObject:
"""
This will be used for RotatingProxies and invidious instances.
"""
def __init__(self, object_list: list):
self._object_list: list = object_list
if len(self._object_list) <= 0:
raise ValueError("There needs to be at least one item in a Rotating structure.")
self._current_index = 0
@property
def object(self):
return self._object_list[self._current_index]
def __len__(self):
return len(self._object_list)
@property
def next(self):
self._current_index = (self._current_index + 1) % len(self._object_list)
return self._object_list[self._current_index]
class RotatingProxy(RotatingObject):
def __init__(self, proxy_list: List[Dict[str, str]], session_list: List[requests.Session] = None):
self._session_list: List[requests.Session] = session_list
if self._session_list is None:
self._session_list = []
super().__init__(proxy_list if len(proxy_list) > 0 else [{}])
def register_session(self, session: requests.Session):
self._session_list.append(session)
session.proxies = self.current_proxy
def rotate(self):
new_proxy = self.next
for session in self._session_list:
session.proxies = new_proxy
@property
def current_proxy(self) -> Dict[str, str]:
return super().object
@property
def next(self) -> Dict[str, str]:
return super().object

View File

@ -14,6 +14,7 @@ class ProxAttribute(ListAttribute):
'ftp': value 'ftp': value
} }
class UrlListAttribute(ListAttribute): class UrlListAttribute(ListAttribute):
def validate(self, value: str): def validate(self, value: str):
v = value.strip() v = value.strip()
@ -29,7 +30,6 @@ class UrlListAttribute(ListAttribute):
return urlparse(value) return urlparse(value)
class ConnectionSection(Section): class ConnectionSection(Section):
def __init__(self): def __init__(self):
self.PROXIES = ProxAttribute( self.PROXIES = ProxAttribute(
@ -80,6 +80,8 @@ class ConnectionSection(Section):
# INVIDIOUS PROXY # INVIDIOUS PROXY
self.INVIDIOUS_PROXY_VIDEOS = BoolAttribute( self.INVIDIOUS_PROXY_VIDEOS = BoolAttribute(
name="invidious_proxy_video", name="invidious_proxy_video",
value="false",
description="Downloads the videos using the given instances."
) )
self.attribute_list = [ self.attribute_list = [

View File

@ -1,7 +1,7 @@
import logging import logging
import random import random
from pathlib import Path from pathlib import Path
from typing import List, Tuple, Set from typing import List, Tuple, Set, Dict
from .path_manager import LOCATIONS from .path_manager import LOCATIONS
from .config import LOGGING_SECTION, AUDIO_SECTION, CONNECTION_SECTION, MISC_SECTION, PATHS_SECTION from .config import LOGGING_SECTION, AUDIO_SECTION, CONNECTION_SECTION, MISC_SECTION, PATHS_SECTION
@ -77,6 +77,7 @@ DEFAULT_VALUES = {
} }
TOR: bool = CONNECTION_SECTION.USE_TOR.object_from_value TOR: bool = CONNECTION_SECTION.USE_TOR.object_from_value
PROXIES_LIST: List[Dict[str, str]] = CONNECTION_SECTION.PROXIES.object_from_value
proxies = {} proxies = {}
if len(CONNECTION_SECTION.PROXIES) > 0: if len(CONNECTION_SECTION.PROXIES) > 0:
""" """