feat: build
This commit is contained in:
1
music_kraken/connection/__init__.py
Normal file
1
music_kraken/connection/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .connection import Connection
|
200
music_kraken/connection/cache.py
Normal file
200
music_kraken/connection/cache.py
Normal file
@@ -0,0 +1,200 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional
|
||||
from functools import lru_cache
|
||||
import logging
|
||||
|
||||
from ..utils.config import main_settings
|
||||
|
||||
|
||||
@dataclass
|
||||
class CacheAttribute:
|
||||
module: str
|
||||
name: str
|
||||
|
||||
created: datetime
|
||||
expires: datetime
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
return f"{self.module}_{self.name}"
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
if isinstance(self.expires, str):
|
||||
self.expires = datetime.fromisoformat(self.expires)
|
||||
return datetime.now() < self.expires
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__dict__ == other.__dict__
|
||||
|
||||
|
||||
class Cache:
|
||||
def __init__(self, module: str, logger: logging.Logger):
|
||||
self.module = module
|
||||
self.logger: logging.Logger = logger
|
||||
|
||||
self._dir = main_settings["cache_directory"]
|
||||
self.index = Path(self._dir, "index.json")
|
||||
|
||||
if not self.index.is_file():
|
||||
with self.index.open("w") as i:
|
||||
i.write(json.dumps([]))
|
||||
|
||||
self.cached_attributes: List[CacheAttribute] = []
|
||||
self._id_to_attribute = {}
|
||||
|
||||
self._time_fields = {"created", "expires"}
|
||||
with self.index.open("r") as i:
|
||||
for c in json.loads(i.read()):
|
||||
for key in self._time_fields:
|
||||
c[key] = datetime.fromisoformat(c[key])
|
||||
|
||||
ca = CacheAttribute(**c)
|
||||
self.cached_attributes.append(ca)
|
||||
self._id_to_attribute[ca.id] = ca
|
||||
|
||||
@lru_cache()
|
||||
def _init_module(self, module: str) -> Path:
|
||||
"""
|
||||
:param module:
|
||||
:return: the module path
|
||||
"""
|
||||
r = Path(self._dir, module)
|
||||
r.mkdir(exist_ok=True)
|
||||
return r
|
||||
|
||||
def _write_index(self, indent: int = 4):
|
||||
_json = []
|
||||
for c in self.cached_attributes:
|
||||
d = c.__dict__
|
||||
for key in self._time_fields:
|
||||
d[key] = d[key].isoformat()
|
||||
|
||||
_json.append(d)
|
||||
|
||||
with self.index.open("w") as f:
|
||||
f.write(json.dumps(_json, indent=indent))
|
||||
|
||||
def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool:
|
||||
existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id)
|
||||
if existing_attribute is not None:
|
||||
# the attribute exists
|
||||
if existing_attribute == cached_attribute:
|
||||
return True
|
||||
|
||||
if existing_attribute.is_valid:
|
||||
return False
|
||||
|
||||
existing_attribute.__dict__ = cached_attribute.__dict__
|
||||
else:
|
||||
self.cached_attributes.append(cached_attribute)
|
||||
self._id_to_attribute[cached_attribute.id] = cached_attribute
|
||||
|
||||
if write:
|
||||
self._write_index()
|
||||
|
||||
return True
|
||||
|
||||
def set(self, content: bytes, name: str, expires_in: float = 10, module: str = ""):
|
||||
"""
|
||||
:param content:
|
||||
:param module:
|
||||
:param name:
|
||||
:param expires_in: the unit is days
|
||||
:return:
|
||||
"""
|
||||
if name == "":
|
||||
return
|
||||
|
||||
module = self.module if module == "" else module
|
||||
|
||||
module_path = self._init_module(module)
|
||||
|
||||
cache_attribute = CacheAttribute(
|
||||
module=module,
|
||||
name=name,
|
||||
created=datetime.now(),
|
||||
expires=datetime.now() + timedelta(days=expires_in),
|
||||
)
|
||||
self._write_attribute(cache_attribute)
|
||||
|
||||
cache_path = Path(module_path, name)
|
||||
with cache_path.open("wb") as content_file:
|
||||
self.logger.debug(f"writing cache to {cache_path}")
|
||||
content_file.write(content)
|
||||
|
||||
def get(self, name: str) -> Optional[bytes]:
|
||||
path = Path(self._dir, self.module, name)
|
||||
|
||||
if not path.is_file():
|
||||
return None
|
||||
|
||||
# check if it is outdated
|
||||
existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"]
|
||||
if not existing_attribute.is_valid:
|
||||
return
|
||||
|
||||
with path.open("rb") as f:
|
||||
return f.read()
|
||||
|
||||
def clean(self):
|
||||
keep = set()
|
||||
|
||||
for ca in self.cached_attributes.copy():
|
||||
if ca.name == "":
|
||||
continue
|
||||
|
||||
file = Path(self._dir, ca.module, ca.name)
|
||||
|
||||
if not ca.is_valid:
|
||||
self.logger.debug(f"deleting cache {ca.id}")
|
||||
file.unlink()
|
||||
self.cached_attributes.remove(ca)
|
||||
del self._id_to_attribute[ca.id]
|
||||
|
||||
else:
|
||||
keep.add(file)
|
||||
|
||||
# iterate through every module (folder)
|
||||
for module_path in self._dir.iterdir():
|
||||
if not module_path.is_dir():
|
||||
continue
|
||||
|
||||
# delete all files not in keep
|
||||
for path in module_path.iterdir():
|
||||
if path not in keep:
|
||||
self.logger.info(f"Deleting cache {path}")
|
||||
path.unlink()
|
||||
|
||||
# delete all empty directories
|
||||
for path in module_path.iterdir():
|
||||
if path.is_dir() and not list(path.iterdir()):
|
||||
self.logger.debug(f"Deleting cache directory {path}")
|
||||
path.rmdir()
|
||||
|
||||
self._write_index()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
delete every file in the cache directory
|
||||
:return:
|
||||
"""
|
||||
|
||||
for path in self._dir.iterdir():
|
||||
if path.is_dir():
|
||||
for file in path.iterdir():
|
||||
file.unlink()
|
||||
path.rmdir()
|
||||
else:
|
||||
path.unlink()
|
||||
|
||||
self.cached_attributes.clear()
|
||||
self._id_to_attribute.clear()
|
||||
|
||||
self._write_index()
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Cache {self.module}>"
|
341
music_kraken/connection/connection.py
Normal file
341
music_kraken/connection/connection.py
Normal file
@@ -0,0 +1,341 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import List, Dict, Optional, Set
|
||||
from urllib.parse import urlparse, urlunsplit, ParseResult
|
||||
import copy
|
||||
import inspect
|
||||
|
||||
import requests
|
||||
import responses
|
||||
from tqdm import tqdm
|
||||
|
||||
from .cache import Cache
|
||||
from .rotating import RotatingProxy
|
||||
from ..objects import Target
|
||||
from ..utils.config import main_settings
|
||||
from ..utils.support_classes.download_result import DownloadResult
|
||||
from ..utils.hacking import merge_args
|
||||
|
||||
|
||||
class Connection:
|
||||
def __init__(
|
||||
self,
|
||||
host: str,
|
||||
proxies: List[dict] = None,
|
||||
tries: int = (len(main_settings["proxies"]) + 1) * main_settings["tries_per_proxy"],
|
||||
timeout: int = 7,
|
||||
logger: logging.Logger = logging.getLogger("connection"),
|
||||
header_values: Dict[str, str] = None,
|
||||
accepted_response_codes: Set[int] = None,
|
||||
semantic_not_found: bool = True,
|
||||
sleep_after_404: float = 0.0,
|
||||
heartbeat_interval=0,
|
||||
module: str = "general",
|
||||
cache_expiring_duration: float = 10
|
||||
):
|
||||
if proxies is None:
|
||||
proxies = main_settings["proxies"]
|
||||
|
||||
self.cache: Cache = Cache(module=module, logger=logger)
|
||||
self.cache_expiring_duration = cache_expiring_duration
|
||||
|
||||
self.HEADER_VALUES = dict() if header_values is None else header_values
|
||||
|
||||
self.LOGGER = logger
|
||||
self.HOST = urlparse(host)
|
||||
self.TRIES = tries
|
||||
self.TIMEOUT = timeout
|
||||
self.rotating_proxy = RotatingProxy(proxy_list=proxies)
|
||||
|
||||
self.ACCEPTED_RESPONSE_CODES = accepted_response_codes or {200}
|
||||
self.SEMANTIC_NOT_FOUND = semantic_not_found
|
||||
self.sleep_after_404 = sleep_after_404
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers = self.get_header(**self.HEADER_VALUES)
|
||||
self.session.proxies = self.rotating_proxy.current_proxy
|
||||
|
||||
self.heartbeat_thread = None
|
||||
self.heartbeat_interval = heartbeat_interval
|
||||
|
||||
self.lock: bool = False
|
||||
|
||||
def start_heartbeat(self):
|
||||
if self.heartbeat_interval <= 0:
|
||||
self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.")
|
||||
|
||||
self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), daemon=True)
|
||||
self.heartbeat_thread.start()
|
||||
|
||||
def heartbeat_failed(self):
|
||||
self.LOGGER.warning(f"The hearth couldn't beat.")
|
||||
|
||||
def heartbeat(self):
|
||||
# Your code to send heartbeat requests goes here
|
||||
raise NotImplementedError("please implement the heartbeat function.")
|
||||
|
||||
def _heartbeat_loop(self, interval: float):
|
||||
def heartbeat_wrapper():
|
||||
self.LOGGER.debug(f"The hearth is beating.")
|
||||
self.heartbeat()
|
||||
|
||||
while True:
|
||||
heartbeat_wrapper()
|
||||
time.sleep(interval)
|
||||
|
||||
def base_url(self, url: ParseResult = None):
|
||||
if url is None:
|
||||
url = self.HOST
|
||||
|
||||
return urlunsplit((url.scheme, url.netloc, "", "", ""))
|
||||
|
||||
def get_header(self, **header_values) -> Dict[str, str]:
|
||||
return {
|
||||
"user-agent": main_settings["user_agent"],
|
||||
"User-Agent": main_settings["user_agent"],
|
||||
"Connection": "keep-alive",
|
||||
"Host": self.HOST.netloc,
|
||||
"Referer": self.base_url(),
|
||||
"Accept-Language": main_settings["language"],
|
||||
**header_values
|
||||
}
|
||||
|
||||
def rotate(self):
|
||||
self.session.proxies = self.rotating_proxy.rotate()
|
||||
|
||||
def _update_headers(
|
||||
self,
|
||||
headers: Optional[dict],
|
||||
refer_from_origin: bool,
|
||||
url: ParseResult
|
||||
) -> Dict[str, str]:
|
||||
headers = self.get_header(**(headers or {}))
|
||||
if not refer_from_origin:
|
||||
headers["Referer"] = self.base_url(url=url)
|
||||
|
||||
return headers
|
||||
|
||||
def save(self, r: requests.Response, name: str, error: bool = False, **kwargs):
|
||||
n_kwargs = {}
|
||||
if error:
|
||||
n_kwargs["module"] = "failed_requests"
|
||||
|
||||
self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration), **n_kwargs)
|
||||
|
||||
def request(
|
||||
self,
|
||||
url: str,
|
||||
timeout: float = None,
|
||||
headers: Optional[dict] = None,
|
||||
try_count: int = 0,
|
||||
accepted_response_codes: set = None,
|
||||
refer_from_origin: bool = True,
|
||||
raw_url: bool = False,
|
||||
raw_headers: bool = False,
|
||||
sleep_after_404: float = None,
|
||||
is_heartbeat: bool = False,
|
||||
disable_cache: bool = None,
|
||||
method: str = None,
|
||||
name: str = "",
|
||||
**kwargs
|
||||
) -> Optional[requests.Response]:
|
||||
if method is None:
|
||||
raise AttributeError("method is not set.")
|
||||
method = method.upper()
|
||||
headers = dict() if headers is None else headers
|
||||
disable_cache = headers.get("Cache-Control", "").lower() == "no-cache" if disable_cache is None else disable_cache
|
||||
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes
|
||||
|
||||
current_kwargs = copy.copy(locals())
|
||||
current_kwargs.pop("kwargs")
|
||||
current_kwargs.update(**kwargs)
|
||||
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if not raw_headers:
|
||||
_headers = copy.copy(self.HEADER_VALUES)
|
||||
_headers.update(headers)
|
||||
|
||||
headers = self._update_headers(
|
||||
headers=_headers,
|
||||
refer_from_origin=refer_from_origin,
|
||||
url=parsed_url
|
||||
)
|
||||
else:
|
||||
headers = headers or {}
|
||||
|
||||
request_url = parsed_url.geturl() if not raw_url else url
|
||||
|
||||
if name != "" and not disable_cache:
|
||||
cached = self.cache.get(name)
|
||||
|
||||
if cached is not None:
|
||||
with responses.RequestsMock() as resp:
|
||||
resp.add(
|
||||
method=method,
|
||||
url=request_url,
|
||||
body=cached,
|
||||
)
|
||||
return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
|
||||
|
||||
if sleep_after_404 is None:
|
||||
sleep_after_404 = self.sleep_after_404
|
||||
if try_count >= self.TRIES:
|
||||
return
|
||||
|
||||
if timeout is None:
|
||||
timeout = self.TIMEOUT
|
||||
|
||||
r = None
|
||||
connection_failed = False
|
||||
try:
|
||||
if self.lock:
|
||||
self.LOGGER.info(f"Waiting for the heartbeat to finish.")
|
||||
while self.lock and not is_heartbeat:
|
||||
pass
|
||||
|
||||
self.lock = True
|
||||
r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
|
||||
|
||||
if r.status_code in accepted_response_codes:
|
||||
if not disable_cache:
|
||||
self.save(r, name, **kwargs)
|
||||
return r
|
||||
|
||||
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
|
||||
self.LOGGER.warning(f"Couldn't find url (404): {request_url}")
|
||||
return None
|
||||
|
||||
# the server rejected the request, or the internet is lacking
|
||||
except requests.exceptions.Timeout:
|
||||
self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})")
|
||||
connection_failed = True
|
||||
except requests.exceptions.ConnectionError:
|
||||
self.LOGGER.warning(f"Couldn't connect to \"{request_url}\": ({try_count}-{self.TRIES})")
|
||||
connection_failed = True
|
||||
|
||||
# this is important for thread safety
|
||||
finally:
|
||||
self.lock = False
|
||||
|
||||
if not connection_failed:
|
||||
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} at {url}. ({try_count}-{self.TRIES})")
|
||||
if r is not None:
|
||||
self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items()))
|
||||
self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items()))
|
||||
self.LOGGER.debug(r.content)
|
||||
|
||||
if name != "":
|
||||
self.save(r, name, error=True, **kwargs)
|
||||
|
||||
if sleep_after_404 != 0:
|
||||
self.LOGGER.warning(f"Waiting for {sleep_after_404} seconds.")
|
||||
time.sleep(sleep_after_404)
|
||||
|
||||
self.rotate()
|
||||
|
||||
current_kwargs["try_count"] = current_kwargs.get("try_count", 0) + 1
|
||||
return Connection.request(**current_kwargs)
|
||||
|
||||
@merge_args(request)
|
||||
def get(self, *args, **kwargs) -> Optional[requests.Response]:
|
||||
return self.request(
|
||||
*args,
|
||||
method="GET",
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@merge_args(request)
|
||||
def post(
|
||||
self,
|
||||
*args,
|
||||
json: dict = None,
|
||||
**kwargs
|
||||
) -> Optional[requests.Response]:
|
||||
r = self.request(
|
||||
*args,
|
||||
method="POST",
|
||||
json=json,
|
||||
**kwargs
|
||||
)
|
||||
if r is None:
|
||||
self.LOGGER.warning(f"payload: {json}")
|
||||
return r
|
||||
|
||||
@merge_args(request)
|
||||
def stream_into(
|
||||
self,
|
||||
url: str,
|
||||
target: Target,
|
||||
name: str = "download",
|
||||
chunk_size: int = main_settings["chunk_size"],
|
||||
progress: int = 0,
|
||||
method: str = "GET",
|
||||
try_count: int = 0,
|
||||
accepted_response_codes: set = None,
|
||||
**kwargs
|
||||
) -> DownloadResult:
|
||||
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes
|
||||
stream_kwargs = copy.copy(locals())
|
||||
stream_kwargs.update(stream_kwargs.pop("kwargs"))
|
||||
|
||||
if "description" in kwargs:
|
||||
name = kwargs.pop("description")
|
||||
|
||||
if progress > 0:
|
||||
headers = dict() if headers is None else headers
|
||||
headers["Range"] = f"bytes={target.size}-"
|
||||
|
||||
r = self.request(
|
||||
url=url,
|
||||
name=name,
|
||||
method=method,
|
||||
stream=True,
|
||||
accepted_response_codes=accepted_response_codes,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
if r is None:
|
||||
return DownloadResult(error_message=f"Could not establish a stream from: {url}")
|
||||
|
||||
target.create_path()
|
||||
total_size = int(r.headers.get('content-length'))
|
||||
progress = 0
|
||||
|
||||
retry = False
|
||||
|
||||
with target.open("ab") as f:
|
||||
"""
|
||||
https://en.wikipedia.org/wiki/Kilobyte
|
||||
> The internationally recommended unit symbol for the kilobyte is kB.
|
||||
"""
|
||||
|
||||
with tqdm(total=total_size, initial=target.size, unit='B', unit_scale=True, unit_divisor=1024, desc=name) as t:
|
||||
try:
|
||||
for chunk in r.iter_content(chunk_size=chunk_size):
|
||||
size = f.write(chunk)
|
||||
progress += size
|
||||
t.update(size)
|
||||
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.ChunkedEncodingError):
|
||||
if try_count >= self.TRIES:
|
||||
self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.")
|
||||
return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunk_size might help.")
|
||||
|
||||
self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})")
|
||||
retry = True
|
||||
try_count += 1
|
||||
|
||||
if total_size > progress:
|
||||
retry = True
|
||||
|
||||
if retry:
|
||||
self.LOGGER.warning(f"Retrying stream...")
|
||||
accepted_response_codes.add(206)
|
||||
return Connection.stream_into(**stream_kwargs)
|
||||
|
||||
return DownloadResult()
|
43
music_kraken/connection/rotating.py
Normal file
43
music_kraken/connection/rotating.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class RotatingObject:
|
||||
"""
|
||||
This will be used for RotatingProxies and invidious instances.
|
||||
"""
|
||||
def __init__(self, object_list: list):
|
||||
self._object_list: list = object_list
|
||||
|
||||
if len(self._object_list) <= 0:
|
||||
raise ValueError("There needs to be at least one item in a Rotating structure.")
|
||||
|
||||
self._current_index = 0
|
||||
|
||||
@property
|
||||
def object(self):
|
||||
return self._object_list[self._current_index]
|
||||
|
||||
def __len__(self):
|
||||
return len(self._object_list)
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
self._current_index = (self._current_index + 1) % len(self._object_list)
|
||||
|
||||
return self._object_list[self._current_index]
|
||||
|
||||
|
||||
class RotatingProxy(RotatingObject):
|
||||
def __init__(self, proxy_list: List[Dict[str, str]]):
|
||||
super().__init__(
|
||||
proxy_list if len(proxy_list) > 0 else [None]
|
||||
)
|
||||
|
||||
def rotate(self) -> Dict[str, str]:
|
||||
return self.next
|
||||
|
||||
@property
|
||||
def current_proxy(self) -> Dict[str, str]:
|
||||
return super().object
|
Reference in New Issue
Block a user