feat: made tries per proxy configurable

This commit is contained in:
Hazel 2024-02-28 09:31:00 +01:00
parent 42729cd585
commit 7f6db2781d
9 changed files with 44 additions and 47 deletions

View File

@ -22,3 +22,4 @@ guppy3~=3.1.3
toml~=0.10.2 toml~=0.10.2
typing_extensions~=4.7.1 typing_extensions~=4.7.1
responses~=0.24.1 responses~=0.24.1
youtube_dl

View File

@ -46,7 +46,7 @@ if __name__ == "__main__":
bandcamp_test = [ bandcamp_test = [
"s: #a Only Smile", "s: #a Only Smile",
"d: 18", "d: 7",
] ]

View File

@ -12,7 +12,8 @@ def fetch_artist():
artist = objects.Artist( artist = objects.Artist(
source_list=[ source_list=[
objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193"), objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193"),
objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM, "https://www.metal-archives.com/bands/Ghost_Bath/3540372489") objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM,
"https://www.metal-archives.com/bands/Ghost_Bath/3540372489")
] ]
) )

View File

@ -174,12 +174,12 @@ class Downloader:
page_count = 0 page_count = 0
for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options): for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options):
if isinstance(option, Option): if isinstance(option, Option):
color = BColors.BOLD if self.pages.is_downloadable(option.music_object) else BColors.GREY color = BColors.BOLD.value if self.pages.is_downloadable(option.music_object) else BColors.GREY.value
print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC}") print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC.value}")
else: else:
prefix = ALPHABET[page_count % len(ALPHABET)] prefix = ALPHABET[page_count % len(ALPHABET)]
print( print(
f"{BColors.HEADER}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC}") f"{BColors.HEADER.value}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC.value}")
self.page_dict[prefix] = option self.page_dict[prefix] = option
self.page_dict[option.__name__] = option self.page_dict[option.__name__] = option
@ -379,7 +379,7 @@ class Downloader:
return False return False
if processed_input != "help": if processed_input != "help":
print(f"{BColors.WARNING}Invalid input.{BColors.ENDC}") print(f"{BColors.WARNING.value}Invalid input.{BColors.ENDC.value}")
help_message() help_message()
return False return False
@ -402,9 +402,9 @@ def download(
if code == 0: if code == 0:
main_settings["hasnt_yet_started"] = False main_settings["hasnt_yet_started"] = False
write_config() write_config()
print(f"{BColors.OKGREEN}Restart the programm to use it.{BColors.ENDC}") print(f"{BColors.OKGREEN.value}Restart the programm to use it.{BColors.ENDC.value}")
else: else:
print(f"{BColors.FAIL}Something went wrong configuring.{BColors.ENDC}") print(f"{BColors.FAIL.value}Something went wrong configuring.{BColors.ENDC.value}")
shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway) shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway)

View File

@ -3,6 +3,7 @@ import threading
import time import time
from typing import List, Dict, Optional, Set from typing import List, Dict, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult from urllib.parse import urlparse, urlunsplit, ParseResult
import copy
import requests import requests
import responses import responses
@ -20,7 +21,7 @@ class Connection:
self, self,
host: str, host: str,
proxies: List[dict] = None, proxies: List[dict] = None,
tries: int = (len(main_settings["proxies"]) + 1) * 4, tries: int = (len(main_settings["proxies"]) + 1) * main_settings["tries_per_proxy"],
timeout: int = 7, timeout: int = 7,
logger: logging.Logger = logging.getLogger("connection"), logger: logging.Logger = logging.getLogger("connection"),
header_values: Dict[str, str] = None, header_values: Dict[str, str] = None,
@ -55,34 +56,29 @@ class Connection:
self.session.headers = self.get_header(**self.HEADER_VALUES) self.session.headers = self.get_header(**self.HEADER_VALUES)
self.session.proxies = self.rotating_proxy.current_proxy self.session.proxies = self.rotating_proxy.current_proxy
self.session_is_occupied: bool = False
self.heartbeat_thread = None self.heartbeat_thread = None
self.heartbeat_interval = heartbeat_interval self.heartbeat_interval = heartbeat_interval
self.lock: bool = False
def start_heartbeat(self): def start_heartbeat(self):
if self.heartbeat_interval <= 0: if self.heartbeat_interval <= 0:
self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.") self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.")
self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), daemon=True)
daemon=True)
self.heartbeat_thread.start() self.heartbeat_thread.start()
def heartbeat_failed(self): def heartbeat_failed(self):
self.LOGGER.warning(f"I just died... (The heartbeat failed)") self.LOGGER.warning(f"The hearth couldn't beat.")
def heartbeat(self): def heartbeat(self):
# Your code to send heartbeat requests goes here # Your code to send heartbeat requests goes here
print( raise NotImplementedError("please implement the heartbeat function.")
"the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()")
def _heartbeat_loop(self, interval: float): def _heartbeat_loop(self, interval: float):
def heartbeat_wrapper(): def heartbeat_wrapper():
self.session_is_occupied = True self.LOGGER.debug(f"The hearth is beating.")
self.LOGGER.debug(f"I am living. (sending a heartbeat)")
self.heartbeat() self.heartbeat()
self.LOGGER.debug(f"finished the heartbeat")
self.session_is_occupied = False
while True: while True:
heartbeat_wrapper() heartbeat_wrapper()
@ -100,7 +96,6 @@ class Connection:
"User-Agent": main_settings["user_agent"], "User-Agent": main_settings["user_agent"],
"Connection": "keep-alive", "Connection": "keep-alive",
"Host": self.HOST.netloc, "Host": self.HOST.netloc,
"authority": self.HOST.netloc,
"Referer": self.base_url(), "Referer": self.base_url(),
"Accept-Language": main_settings["language"], "Accept-Language": main_settings["language"],
**header_values **header_values
@ -143,6 +138,8 @@ class Connection:
name: str = "", name: str = "",
**kwargs **kwargs
) -> Optional[requests.Response]: ) -> Optional[requests.Response]:
current_kwargs = copy.copy(locals)
parsed_url = urlparse(url) parsed_url = urlparse(url)
headers = self._update_headers( headers = self._update_headers(
@ -179,12 +176,12 @@ class Connection:
r = None r = None
connection_failed = False connection_failed = False
try: try:
if self.session_is_occupied and not is_heartbeat: if self.lock:
self.LOGGER.info(f"Waiting for the heartbeat to finish.") self.LOGGER.info(f"Waiting for the heartbeat to finish.")
while self.session_is_occupied and not is_heartbeat: while self.lock and not is_heartbeat:
pass pass
print(headers) self.lock = True
r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
if r.status_code in accepted_response_codes: if r.status_code in accepted_response_codes:
@ -196,6 +193,7 @@ class Connection:
self.LOGGER.warning(f"Couldn't find url (404): {request_url}") self.LOGGER.warning(f"Couldn't find url (404): {request_url}")
return None return None
# the server rejected the request, or the internet is lacking
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})") self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})")
connection_failed = True connection_failed = True
@ -203,6 +201,10 @@ class Connection:
self.LOGGER.warning(f"Couldn't connect to \"{request_url}\": ({try_count}-{self.TRIES})") self.LOGGER.warning(f"Couldn't connect to \"{request_url}\": ({try_count}-{self.TRIES})")
connection_failed = True connection_failed = True
# this is important for thread safety
finally:
self.lock = False
if not connection_failed: if not connection_failed:
self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} " self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} "
f"at {url}. ({try_count}-{self.TRIES})") f"at {url}. ({try_count}-{self.TRIES})")
@ -210,6 +212,7 @@ class Connection:
self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items())) self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items()))
self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items())) self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items()))
self.LOGGER.debug(r.content) self.LOGGER.debug(r.content)
if name != "": if name != "":
self.save(r, name, error=True, **kwargs) self.save(r, name, error=True, **kwargs)
@ -219,21 +222,8 @@ class Connection:
self.rotate() self.rotate()
if self.heartbeat_interval > 0 and self.heartbeat_thread is None: current_kwargs["try_count"] = current_kwargs.get("try_count", 0) + 1
self.start_heartbeat() return self.request(**current_kwargs)
return self.request(
method=method,
try_count=try_count + 1,
accepted_response_codes=accepted_response_codes,
url=url,
timeout=timeout,
headers=headers,
sleep_after_404=sleep_after_404,
is_heartbeat=is_heartbeat,
name=name,
**kwargs
)
def get( def get(
self, self,

View File

@ -76,7 +76,7 @@ class YoutubeMusicConnection(Connection):
) )
def heartbeat(self): def heartbeat(self):
r = self.get("https://music.youtube.com/verify_session", is_heartbeat=True) r = self.get("https://music.youtube.com/verify_session")
if r is None: if r is None:
self.heartbeat_failed() self.heartbeat_failed()
return return
@ -516,7 +516,7 @@ class YoutubeMusic(SuperYouTube):
return self.download_connection.stream_into(source.audio_url, target, description=desc, headers={ return self.download_connection.stream_into(source.audio_url, target, description=desc, headers={
"Host": "rr1---sn-cxaf0x-nugl.googlevideo.com" "Host": "rr1---sn-cxaf0x-nugl.googlevideo.com"
}, raw_url=True) }, raw_url=True, disable_cache=True)
def __del__(self): def __del__(self):
self.ydl.__exit__() self.ydl.__exit__()

View File

@ -9,7 +9,7 @@ from ..attributes.attribute import Attribute, EmptyLine, Description
from ..attributes.special_attributes import ( from ..attributes.special_attributes import (
SelectAttribute, SelectAttribute,
PathAttribute, PathAttribute,
AudioFormatAttribute, AudioFormatAttribute
) )
config = Config(( config = Config((
@ -72,6 +72,11 @@ all the error messages are shown."""),
"Currently it just sets the User-Agent header.\n" "Currently it just sets the User-Agent header.\n"
"https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent" "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent"
), ),
Attribute(
name="tries_per_proxy",
default_value=2,
description="The retries it should do. These can be overridden by the program, at certain places, and they have to be.",
),
EmptyLine(), EmptyLine(),
@ -79,8 +84,7 @@ all the error messages are shown."""),
PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."), PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."),
PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()), PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()),
PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."), PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."),
PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), description="Set the path of the cache directory."),
description="Set the path of the cache directory."),
Attribute( Attribute(
name="not_a_genre_regex", name="not_a_genre_regex",
description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n" description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n"
@ -132,6 +136,7 @@ class SettingsStructure(TypedDict):
# connection # connection
proxies: List[dict[str, str]] proxies: List[dict[str, str]]
tries_per_proxy: int
tor: bool tor: bool
tor_port: int tor_port: int
chunk_size: int chunk_size: int

View File

@ -94,5 +94,5 @@ class DownloadResult:
return head return head
_lines = [head] _lines = [head]
_lines.extend(BColors.FAIL + s + BColors.ENDC for s in self._error_message_list) _lines.extend(BColors.FAIL.value + s + BColors.ENDC.value for s in self._error_message_list)
return "\n".join(_lines) return "\n".join(_lines)