From 7f6db2781d9720da62142c86ffaf95e4e1d9c1c7 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 28 Feb 2024 09:31:00 +0100 Subject: [PATCH] feat: made tries per proxy configurable --- requirements.txt | 3 +- src/actual_donwload.py | 2 +- src/metal_archives.py | 3 +- src/music_kraken/cli/main_downloader.py | 12 ++--- src/music_kraken/connection/connection.py | 54 ++++++++----------- .../pages/youtube_music/youtube_music.py | 4 +- .../utils/config/attributes/__init__,py | 0 .../utils/config/config_files/main_config.py | 11 ++-- .../utils/support_classes/download_result.py | 2 +- 9 files changed, 44 insertions(+), 47 deletions(-) delete mode 100644 src/music_kraken/utils/config/attributes/__init__,py diff --git a/requirements.txt b/requirements.txt index 200fa36..da260dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,5 @@ guppy3~=3.1.3 toml~=0.10.2 typing_extensions~=4.7.1 -responses~=0.24.1 \ No newline at end of file +responses~=0.24.1 +youtube_dl \ No newline at end of file diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 61345a2..28e071d 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -46,7 +46,7 @@ if __name__ == "__main__": bandcamp_test = [ "s: #a Only Smile", - "d: 18", + "d: 7", ] diff --git a/src/metal_archives.py b/src/metal_archives.py index e603849..fefebac 100644 --- a/src/metal_archives.py +++ b/src/metal_archives.py @@ -12,7 +12,8 @@ def fetch_artist(): artist = objects.Artist( source_list=[ objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193"), - objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM, "https://www.metal-archives.com/bands/Ghost_Bath/3540372489") + objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM, + "https://www.metal-archives.com/bands/Ghost_Bath/3540372489") ] ) diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index ce048ca..be53252 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -174,12 +174,12 @@ class Downloader: page_count = 0 for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options): if isinstance(option, Option): - color = BColors.BOLD if self.pages.is_downloadable(option.music_object) else BColors.GREY - print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC}") + color = BColors.BOLD.value if self.pages.is_downloadable(option.music_object) else BColors.GREY.value + print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC.value}") else: prefix = ALPHABET[page_count % len(ALPHABET)] print( - f"{BColors.HEADER}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC}") + f"{BColors.HEADER.value}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC.value}") self.page_dict[prefix] = option self.page_dict[option.__name__] = option @@ -379,7 +379,7 @@ class Downloader: return False if processed_input != "help": - print(f"{BColors.WARNING}Invalid input.{BColors.ENDC}") + print(f"{BColors.WARNING.value}Invalid input.{BColors.ENDC.value}") help_message() return False @@ -402,9 +402,9 @@ def download( if code == 0: main_settings["hasnt_yet_started"] = False write_config() - print(f"{BColors.OKGREEN}Restart the programm to use it.{BColors.ENDC}") + print(f"{BColors.OKGREEN.value}Restart the programm to use it.{BColors.ENDC.value}") else: - print(f"{BColors.FAIL}Something went wrong configuring.{BColors.ENDC}") + print(f"{BColors.FAIL.value}Something went wrong configuring.{BColors.ENDC.value}") shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway) diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index 9a66a12..29ed3d3 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -3,6 +3,7 @@ import threading import time from typing import List, Dict, Optional, Set from urllib.parse import urlparse, urlunsplit, ParseResult +import copy import requests import responses @@ -20,7 +21,7 @@ class Connection: self, host: str, proxies: List[dict] = None, - tries: int = (len(main_settings["proxies"]) + 1) * 4, + tries: int = (len(main_settings["proxies"]) + 1) * main_settings["tries_per_proxy"], timeout: int = 7, logger: logging.Logger = logging.getLogger("connection"), header_values: Dict[str, str] = None, @@ -55,34 +56,29 @@ class Connection: self.session.headers = self.get_header(**self.HEADER_VALUES) self.session.proxies = self.rotating_proxy.current_proxy - self.session_is_occupied: bool = False - self.heartbeat_thread = None self.heartbeat_interval = heartbeat_interval + self.lock: bool = False + def start_heartbeat(self): if self.heartbeat_interval <= 0: self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.") - self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), - daemon=True) + self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), daemon=True) self.heartbeat_thread.start() def heartbeat_failed(self): - self.LOGGER.warning(f"I just died... (The heartbeat failed)") + self.LOGGER.warning(f"The hearth couldn't beat.") def heartbeat(self): # Your code to send heartbeat requests goes here - print( - "the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()") + raise NotImplementedError("please implement the heartbeat function.") def _heartbeat_loop(self, interval: float): def heartbeat_wrapper(): - self.session_is_occupied = True - self.LOGGER.debug(f"I am living. (sending a heartbeat)") + self.LOGGER.debug(f"The hearth is beating.") self.heartbeat() - self.LOGGER.debug(f"finished the heartbeat") - self.session_is_occupied = False while True: heartbeat_wrapper() @@ -100,7 +96,6 @@ class Connection: "User-Agent": main_settings["user_agent"], "Connection": "keep-alive", "Host": self.HOST.netloc, - "authority": self.HOST.netloc, "Referer": self.base_url(), "Accept-Language": main_settings["language"], **header_values @@ -143,6 +138,8 @@ class Connection: name: str = "", **kwargs ) -> Optional[requests.Response]: + current_kwargs = copy.copy(locals) + parsed_url = urlparse(url) headers = self._update_headers( @@ -179,12 +176,12 @@ class Connection: r = None connection_failed = False try: - if self.session_is_occupied and not is_heartbeat: + if self.lock: self.LOGGER.info(f"Waiting for the heartbeat to finish.") - while self.session_is_occupied and not is_heartbeat: + while self.lock and not is_heartbeat: pass - - print(headers) + + self.lock = True r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) if r.status_code in accepted_response_codes: @@ -196,6 +193,7 @@ class Connection: self.LOGGER.warning(f"Couldn't find url (404): {request_url}") return None + # the server rejected the request, or the internet is lacking except requests.exceptions.Timeout: self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})") connection_failed = True @@ -203,6 +201,10 @@ class Connection: self.LOGGER.warning(f"Couldn't connect to \"{request_url}\": ({try_count}-{self.TRIES})") connection_failed = True + # this is important for thread safety + finally: + self.lock = False + if not connection_failed: self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} " f"at {url}. ({try_count}-{self.TRIES})") @@ -210,6 +212,7 @@ class Connection: self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items())) self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items())) self.LOGGER.debug(r.content) + if name != "": self.save(r, name, error=True, **kwargs) @@ -219,21 +222,8 @@ class Connection: self.rotate() - if self.heartbeat_interval > 0 and self.heartbeat_thread is None: - self.start_heartbeat() - - return self.request( - method=method, - try_count=try_count + 1, - accepted_response_codes=accepted_response_codes, - url=url, - timeout=timeout, - headers=headers, - sleep_after_404=sleep_after_404, - is_heartbeat=is_heartbeat, - name=name, - **kwargs - ) + current_kwargs["try_count"] = current_kwargs.get("try_count", 0) + 1 + return self.request(**current_kwargs) def get( self, diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 7d18f63..fdac1f5 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -76,7 +76,7 @@ class YoutubeMusicConnection(Connection): ) def heartbeat(self): - r = self.get("https://music.youtube.com/verify_session", is_heartbeat=True) + r = self.get("https://music.youtube.com/verify_session") if r is None: self.heartbeat_failed() return @@ -516,7 +516,7 @@ class YoutubeMusic(SuperYouTube): return self.download_connection.stream_into(source.audio_url, target, description=desc, headers={ "Host": "rr1---sn-cxaf0x-nugl.googlevideo.com" - }, raw_url=True) + }, raw_url=True, disable_cache=True) def __del__(self): self.ydl.__exit__() diff --git a/src/music_kraken/utils/config/attributes/__init__,py b/src/music_kraken/utils/config/attributes/__init__,py deleted file mode 100644 index e69de29..0000000 diff --git a/src/music_kraken/utils/config/config_files/main_config.py b/src/music_kraken/utils/config/config_files/main_config.py index 4508571..395e271 100644 --- a/src/music_kraken/utils/config/config_files/main_config.py +++ b/src/music_kraken/utils/config/config_files/main_config.py @@ -9,7 +9,7 @@ from ..attributes.attribute import Attribute, EmptyLine, Description from ..attributes.special_attributes import ( SelectAttribute, PathAttribute, - AudioFormatAttribute, + AudioFormatAttribute ) config = Config(( @@ -72,6 +72,11 @@ all the error messages are shown."""), "Currently it just sets the User-Agent header.\n" "https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent" ), + Attribute( + name="tries_per_proxy", + default_value=2, + description="The retries it should do. These can be overridden by the program, at certain places, and they have to be.", + ), EmptyLine(), @@ -79,8 +84,7 @@ all the error messages are shown."""), PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."), PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()), PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."), - PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), - description="Set the path of the cache directory."), + PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), description="Set the path of the cache directory."), Attribute( name="not_a_genre_regex", description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n" @@ -132,6 +136,7 @@ class SettingsStructure(TypedDict): # connection proxies: List[dict[str, str]] + tries_per_proxy: int tor: bool tor_port: int chunk_size: int diff --git a/src/music_kraken/utils/support_classes/download_result.py b/src/music_kraken/utils/support_classes/download_result.py index 7180b12..5458a34 100644 --- a/src/music_kraken/utils/support_classes/download_result.py +++ b/src/music_kraken/utils/support_classes/download_result.py @@ -94,5 +94,5 @@ class DownloadResult: return head _lines = [head] - _lines.extend(BColors.FAIL + s + BColors.ENDC for s in self._error_message_list) + _lines.extend(BColors.FAIL.value + s + BColors.ENDC.value for s in self._error_message_list) return "\n".join(_lines)