From e25f788b50c77ba702050c943de0b0bad73e5a85 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 12 Sep 2023 13:56:40 +0200 Subject: [PATCH 001/104] default config thingie --- src/music_kraken/utils/config/config.py | 23 ++++++++++++++++--- .../utils/config/config_files/main_config.py | 16 ------------- src/music_kraken/utils/shared.py | 6 ++--- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/music_kraken/utils/config/config.py b/src/music_kraken/utils/config/config.py index d140ec9..7f5c4b9 100644 --- a/src/music_kraken/utils/config/config.py +++ b/src/music_kraken/utils/config/config.py @@ -1,6 +1,7 @@ -from typing import Any, Tuple, Union +from typing import Any, Tuple, Union, List from pathlib import Path import logging +from datetime import datetime import toml @@ -31,10 +32,26 @@ class ConfigDict(dict): class Config: - def __init__(self, componet_list: Tuple[Union[Attribute, Description, EmptyLine]], config_file: Path) -> None: + def __init__(self, component_list: Tuple[Union[Attribute, Description, EmptyLine]], config_file: Path) -> None: self.config_file: Path = config_file - self.component_list: Tuple[Union[Attribute, Description, EmptyLine]] = componet_list + self.component_list: List[Union[Attribute, Description, EmptyLine]] = [ + Description(f"""IMPORTANT: If you modify this file, the changes for the actual setting, will be kept as is. +The changes you make to the comments, will be discarded, next time you run music-kraken. Have fun! + +Latest reset: {datetime.now()} + + _____ + / ____| + | | __ __ _ _ _ + | | |_ | / _` || | | | + | |__| || (_| || |_| | + \_____| \__,_| \__, | + __/ | + |___/ +""")] + + self.component_list.extend(component_list) self.loaded_settings: ConfigDict = ConfigDict(self) self.attribute_map = {} diff --git a/src/music_kraken/utils/config/config_files/main_config.py b/src/music_kraken/utils/config/config_files/main_config.py index 164a08a..ba6ef91 100644 --- a/src/music_kraken/utils/config/config_files/main_config.py +++ b/src/music_kraken/utils/config/config_files/main_config.py @@ -1,5 +1,4 @@ from typing import TypedDict, List -from datetime import datetime from urllib.parse import ParseResult from logging import Logger from pathlib import Path @@ -14,21 +13,6 @@ from ..attributes.special_attributes import ( ) config = Config([ - Description(f"""IMPORTANT: If you modify this file, the changes for the actual setting, will be kept as is. -The changes you make to the comments, will be discarded, next time you run music-kraken. Have fun! - -Latest reset: {datetime.now()} - - _____ - / ____| - | | __ __ _ _ _ - | | |_ | / _` || | | | - | |__| || (_| || |_| | - \_____| \__,_| \__, | - __/ | - |___/ -"""), - Attribute(name="hasnt_yet_started", default_value=False, description="This will be set automatically, to look if it needs to run the scripts that run on start."), Attribute(name="bitrate", default_value=125, description="Streams the audio with given bitrate [kB/s]. Can't stream with a higher Bitrate, than the audio source provides."), AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 20726f1..749b975 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -16,8 +16,7 @@ def get_random_message() -> str: HIGHEST_ID = 2**main_settings['id_bits'] -HELP_MESSAGE = """ -to search: +HELP_MESSAGE = """to search: > s: {query or url} > s: https://musify.club/release/some-random-release-183028492 > s: #a {artist} #r {release} #t {track} @@ -28,5 +27,4 @@ to download: > d: 1 > d: https://musify.club/release/some-random-release-183028492 -have fun :3 -""".strip() +have fun :3""".strip() From 8846eacf0685bc25c3134c063f3dfbf7dd383c2d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 12 Sep 2023 14:17:58 +0200 Subject: [PATCH 002/104] layed out page boilerplate for bandcamp --- .vscode/settings.json | 5 +- src/actual_donwload.py | 3 +- src/music_kraken/download/page_attributes.py | 22 +-- src/music_kraken/pages/__init__.py | 1 + src/music_kraken/pages/abstract.py | 2 +- src/music_kraken/pages/bandcamp.py | 65 ++++++++ .../config/config_files/logging_config.py | 8 +- .../utils/config/sections/__init__.py | 0 .../utils/config/sections/audio.py | 154 ----------------- .../utils/config/sections/connection.py | 157 ------------------ .../utils/config/sections/logging.py | 130 --------------- .../utils/config/sections/misc.py | 72 -------- .../utils/config/sections/paths.py | 59 ------- src/music_kraken/utils/shared.py | 4 +- 14 files changed, 93 insertions(+), 589 deletions(-) create mode 100644 src/music_kraken/pages/bandcamp.py delete mode 100644 src/music_kraken/utils/config/sections/__init__.py delete mode 100644 src/music_kraken/utils/config/sections/audio.py delete mode 100644 src/music_kraken/utils/config/sections/connection.py delete mode 100644 src/music_kraken/utils/config/sections/logging.py delete mode 100644 src/music_kraken/utils/config/sections/misc.py delete mode 100644 src/music_kraken/utils/config/sections/paths.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 664d07e..3f0bf4f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -14,5 +14,8 @@ "[python]": { "editor.defaultFormatter": "ms-python.autopep8" }, - "python.formatting.provider": "none" + "python.formatting.provider": "none", + "cSpell.words": [ + "Bandcamp" + ] } \ No newline at end of file diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 78ecefd..7557324 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -29,8 +29,7 @@ if __name__ == "__main__": ] youtube_music_test = [ - "s: #a Favorite #r Anarcho", - "0" + "s: #a Favorite #r Anarcho" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 4ab45d0..17c8a91 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -9,20 +9,14 @@ from ..utils.support_classes import Query, DownloadResult from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES -from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, INDEPENDENT_DB_OBJECTS - -if DEBUG_PAGES: - DEBUGGING_PAGE = YoutubeMusic - print(f"Only downloading from page {DEBUGGING_PAGE}.") - - ALL_PAGES = {DEBUGGING_PAGE} - AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES) +from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, INDEPENDENT_DB_OBJECTS ALL_PAGES: Set[Type[Page]] = { EncyclopaediaMetallum, Musify, - YoutubeMusic + YoutubeMusic, + Bandcamp } if youtube_settings["use_youtube_alongside_youtube_music"]: @@ -31,13 +25,21 @@ if youtube_settings["use_youtube_alongside_youtube_music"]: AUDIO_PAGES: Set[Type[Page]] = { Musify, YouTube, - YoutubeMusic + YoutubeMusic, + Bandcamp } SHADY_PAGES: Set[Type[Page]] = { Musify, } +if DEBUG_PAGES: + DEBUGGING_PAGE = Bandcamp + print(f"Only downloading from page {DEBUGGING_PAGE}.") + + ALL_PAGES = {DEBUGGING_PAGE} + AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES) + class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: # initialize all page instances diff --git a/src/music_kraken/pages/__init__.py b/src/music_kraken/pages/__init__.py index a317147..5757a2c 100644 --- a/src/music_kraken/pages/__init__.py +++ b/src/music_kraken/pages/__init__.py @@ -2,5 +2,6 @@ from .encyclopaedia_metallum import EncyclopaediaMetallum from .musify import Musify from .youtube import YouTube from .youtube_music import YoutubeMusic +from .bandcamp import Bandcamp from .abstract import Page, INDEPENDENT_DB_OBJECTS diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index babd08c..25a589c 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -220,7 +220,7 @@ class Page: if type(music_object) in search_functions: r = search_functions[type(music_object)](music_object) - if len(r) > 0: + if r is not None and len(r) > 0: return r r = [] diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py new file mode 100644 index 0000000..950e5fa --- /dev/null +++ b/src/music_kraken/pages/bandcamp.py @@ -0,0 +1,65 @@ +from typing import List, Optional, Type +from urllib.parse import urlparse +import logging + + +from ..objects import Source, DatabaseObject +from .abstract import Page +from ..objects import ( + Artist, + Source, + SourcePages, + Song, + Album, + Label, + Target +) +from ..connection import Connection +from ..utils.support_classes import DownloadResult +from ..utils.config import main_settings, logging_settings + +class Bandcamp(Page): + # CHANGE + SOURCE_TYPE = SourcePages.BANDCAMP + LOGGER = logging_settings["bandcamp_logger"] + + def __init__(self, *args, **kwargs): + self.connection: Connection = Connection( + host="https://bandcamp.com/", + logger=self.LOGGER + ) + + super().__init__(*args, **kwargs) + + def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: + return super().get_source_type(source) + + def general_search(self, search_query: str) -> List[DatabaseObject]: + return [] + + def label_search(self, label: Label) -> List[Label]: + return [] + + def artist_search(self, artist: Artist) -> List[Artist]: + return [] + + def album_search(self, album: Album) -> List[Album]: + return [] + + def song_search(self, song: Song) -> List[Song]: + return [] + + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: + return Song() + + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: + return Album() + + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: + return Artist() + + def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: + return Label() + + def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: + return DownloadResult() diff --git a/src/music_kraken/utils/config/config_files/logging_config.py b/src/music_kraken/utils/config/config_files/logging_config.py index 5ff0cd6..c068fe3 100644 --- a/src/music_kraken/utils/config/config_files/logging_config.py +++ b/src/music_kraken/utils/config/config_files/logging_config.py @@ -79,6 +79,11 @@ Reference for the logging formats: https://docs.python.org/3/library/logging.htm description="The logger for the genius scraper", default_value="genius" ), + LoggerAttribute( + name="bandcamp_logger", + description="The logger for the bandcamp scraper", + default_value="bandcamp" + ) ], LOCATIONS.get_config_file("logging")) @@ -96,4 +101,5 @@ class SettingsStructure(TypedDict): youtube_logger: Logger youtube_music_logger: Logger metal_archives_logger: Logger - genius_logger: Logger \ No newline at end of file + genius_logger: Logger + bandcamp_logger: Logger \ No newline at end of file diff --git a/src/music_kraken/utils/config/sections/__init__.py b/src/music_kraken/utils/config/sections/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/music_kraken/utils/config/sections/audio.py b/src/music_kraken/utils/config/sections/audio.py deleted file mode 100644 index def1348..0000000 --- a/src/music_kraken/utils/config/sections/audio.py +++ /dev/null @@ -1,154 +0,0 @@ -import logging - -from ..base_classes import ( - SingleAttribute, - FloatAttribute, - StringAttribute, - Section, - Description, - EmptyLine, - BoolAttribute, - ListAttribute -) -from ...enums.album import AlbumType -from ...exception.config import SettingValueError - -# Only the formats with id3 metadata can be used -# https://www.audioranger.com/audio-formats.php -# https://web.archive.org/web/20230322234434/https://www.audioranger.com/audio-formats.php -ID3_2_FILE_FORMATS = frozenset(( - "mp3", "mp2", "mp1", # MPEG-1 ID3.2 - "wav", "wave", "rmi", # RIFF (including WAV) ID3.2 - "aiff", "aif", "aifc", # AIFF ID3.2 - "aac", "aacp", # Raw AAC ID3.2 - "tta", # True Audio ID3.2 -)) -_sorted_id3_2_formats = sorted(ID3_2_FILE_FORMATS) - -ID3_1_FILE_FORMATS = frozenset(( - "ape", # Monkey's Audio ID3.1 - "mpc", "mpp", "mp+", # MusePack ID3.1 - "wv", # WavPack ID3.1 - "ofr", "ofs" # OptimFrog ID3.1 -)) -_sorted_id3_1_formats = sorted(ID3_1_FILE_FORMATS) - - -class AudioFormatAttribute(SingleAttribute): - def validate(self, value: str): - v = self.value.strip().lower() - if v not in ID3_1_FILE_FORMATS and v not in ID3_2_FILE_FORMATS: - raise SettingValueError( - setting_name=self.name, - setting_value=value, - rule="has to be a valid audio format, supporting id3 metadata" - ) - - @property - def object_from_value(self) -> str: - v = self.value.strip().lower() - if v in ID3_2_FILE_FORMATS: - return v - if v in ID3_1_FILE_FORMATS: - logging.debug(f"setting audio format to a format that only supports ID3.1: {v}") - return v - - raise ValueError(f"Invalid Audio Format: {v}") - - -class AlbumTypeListAttribute(ListAttribute): - def validate(self, value: str): - try: - AlbumType(value.strip()) - except ValueError: - raise SettingValueError( - setting_name=self.name, - setting_value=value, - rule="has to be an existing album type" - ) - - def single_object_from_element(self, value: str) -> AlbumType: - return AlbumType(value) - - -class AudioSection(Section): - def __init__(self): - self.BITRATE = FloatAttribute( - name="bitrate", - description="Streams the audio with given bitrate [kB/s]. " - "Can't stream with a higher Bitrate, than the audio source provides.", - value="125" - ) - - self.AUDIO_FORMAT = AudioFormatAttribute(name="audio_format", value="mp3", description=f""" -Music Kraken will stream the audio into this format. -You can use Audio formats which support ID3.2 and ID3.1, -but you will have cleaner Metadata using ID3.2. -ID3.2: {', '.join(_sorted_id3_2_formats)} -ID3.1: {', '.join(_sorted_id3_1_formats)} - """.strip()) - - self.SORT_BY_DATE = BoolAttribute( - name="sort_by_date", - description="If this is set to true, it will set the albumsort attribute such that,\n" - "the albums are sorted by date.", - value="true" - ) - - self.SORT_BY_ALBUM_TYPE = BoolAttribute( - name="sort_album_by_type", - description="If this is set to true, it will set the albumsort attribute such that,\n" - "the albums are put into categories before being sorted.\n" - "This means for example, the Studio Albums and EP's are always in front of Singles, " - "and Compilations are in the back.", - value="true" - ) - - self.DOWNLOAD_PATH = StringAttribute( - name="download_path", - value="{genre}/{artist}/{album}", - description="The folder music kraken should put the songs into." - ) - - self.DOWNLOAD_FILE = StringAttribute( - name="download_file", - value="{song}.{audio_format}", - description="The filename of the audio file." - ) - - - self.ALBUM_TYPE_BLACKLIST = AlbumTypeListAttribute( - name="album_type_blacklist", - description="Music Kraken ignores all albums of those types.\n" - "Following album types exist in the programm:\n" - f"{', '.join(album.value for album in AlbumType)}", - value=[ - AlbumType.COMPILATION_ALBUM.value, - AlbumType.LIVE_ALBUM.value, - AlbumType.MIXTAPE.value - ] - ) - - self.attribute_list = [ - self.BITRATE, - self.AUDIO_FORMAT, - EmptyLine(), - self.SORT_BY_DATE, - self.SORT_BY_ALBUM_TYPE, - Description(""" -There are multiple fields, you can use for the path and file name: -- genre -- label -- artist -- album -- song -- album_type - """.strip()), - self.DOWNLOAD_PATH, - self.DOWNLOAD_FILE, - self.ALBUM_TYPE_BLACKLIST, - ] - super().__init__() - - -AUDIO_SECTION = AudioSection() diff --git a/src/music_kraken/utils/config/sections/connection.py b/src/music_kraken/utils/config/sections/connection.py deleted file mode 100644 index b29ac05..0000000 --- a/src/music_kraken/utils/config/sections/connection.py +++ /dev/null @@ -1,157 +0,0 @@ -from urllib.parse import urlparse, ParseResult -import re - -from ..base_classes import Section, FloatAttribute, IntAttribute, BoolAttribute, ListAttribute, StringAttribute -from ...regex import URL_PATTERN -from ...exception.config import SettingValueError - - -class ProxAttribute(ListAttribute): - def single_object_from_element(self, value) -> dict: - return { - 'http': value, - 'https': value, - 'ftp': value - } - - -class UrlStringAttribute(StringAttribute): - def validate(self, value: str): - v = value.strip() - url = re.match(URL_PATTERN, v) - if url is None: - raise SettingValueError( - setting_name=self.name, - setting_value=v, - rule="has to be a valid url" - ) - - @property - def object_from_value(self) -> ParseResult: - return urlparse(self.value) - - -class UrlListAttribute(ListAttribute): - def validate(self, value: str): - v = value.strip() - url = re.match(URL_PATTERN, v) - if url is None: - raise SettingValueError( - setting_name=self.name, - setting_value=v, - rule="has to be a valid url" - ) - - def single_object_from_element(self, value: str): - return urlparse(value) - - - -class ConnectionSection(Section): - def __init__(self): - self.PROXIES = ProxAttribute( - name="proxies", - description="Set your proxies.\n" - "Must be valid for http, as well as https.", - value=[] - ) - - self.USE_TOR = BoolAttribute( - name="tor", - description="Route ALL traffic through Tor.\n" - "If you use Tor, make sure the Tor browser is installed, and running." - "I can't guarantee maximum security though!", - value="false" - ) - self.TOR_PORT = IntAttribute( - name="tor_port", - description="The port, tor is listening. If tor is already working, don't change it.", - value="9150" - ) - self.CHUNK_SIZE = IntAttribute( - name="chunk_size", - description="Size of the chunks that are streamed.", - value="1024" - ) - self.SHOW_DOWNLOAD_ERRORS_THRESHOLD = FloatAttribute( - name="show_download_errors_threshold", - description="If the percentage of failed downloads goes over this threshold,\n" - "all the error messages are shown.", - value="0.3" - ) - - # INVIDIOUS INSTANCES LIST - self.INVIDIOUS_INSTANCE = UrlStringAttribute( - name="invidious_instance", - description="This is an attribute, where you can define the invidious instances,\n" - "the youtube downloader should use.\n" - "Here is a list of active ones: https://docs.invidious.io/instances/\n" - "Instances that use cloudflare or have source code changes could cause issues.\n" - "Hidden instances (.onion) will only work, when setting 'tor=true'.", - value="https://yt.artemislena.eu/" - ) - - self.PIPED_INSTANCE = UrlStringAttribute( - name="piped_instance", - description="This is an attribute, where you can define the pioed instances,\n" - "the youtube downloader should use.\n" - "Here is a list of active ones: https://github.com/TeamPiped/Piped/wiki/Instances\n" - "Instances that use cloudflare or have source code changes could cause issues.\n" - "Hidden instances (.onion) will only work, when setting 'tor=true'.", - value="https://pipedapi.kavin.rocks" - ) - - self.SLEEP_AFTER_YOUTUBE_403 = FloatAttribute( - name="sleep_after_youtube_403", - description="The time to wait, after youtube returned 403 (in seconds)", - value="20" - ) - - self.YOUTUBE_MUSIC_API_KEY = StringAttribute( - name="youtube_music_api_key", - description="This is the API key used by YouTube-Music internally.\nDw. if it is empty, Rachel will fetch it automatically for you <333\n(she will also update outdated api keys/those that don't work)", - value="AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30" - ) - - self.YOUTUBE_MUSIC_CLEAN_DATA = BoolAttribute( - name="youtube_music_clean_data", - description="If set to true, it exclusively fetches artists/albums/songs, not things like user channels etc.", - value="true" - ) - - self.ALL_YOUTUBE_URLS = UrlListAttribute( - name="youtube_url", - description="This is used to detect, if an url is from youtube, or any alternativ frontend.\n" - "If any instance seems to be missing, run music kraken with the -f flag.", - value=[ - "https://www.youtube.com/", - "https://www.youtu.be/", - "https://redirect.invidious.io/", - "https://piped.kavin.rocks/" - ] - ) - - self.SPONSOR_BLOCK = BoolAttribute( - name="use_sponsor_block", - value="true", - description="Use sponsor block to remove adds or simmilar from the youtube videos." - ) - - self.attribute_list = [ - self.USE_TOR, - self.TOR_PORT, - self.CHUNK_SIZE, - self.SHOW_DOWNLOAD_ERRORS_THRESHOLD, - self.INVIDIOUS_INSTANCE, - self.PIPED_INSTANCE, - self.SLEEP_AFTER_YOUTUBE_403, - self.YOUTUBE_MUSIC_API_KEY, - self.YOUTUBE_MUSIC_CLEAN_DATA, - self.ALL_YOUTUBE_URLS, - self.SPONSOR_BLOCK - ] - - super().__init__() - - -CONNECTION_SECTION = ConnectionSection() diff --git a/src/music_kraken/utils/config/sections/logging.py b/src/music_kraken/utils/config/sections/logging.py deleted file mode 100644 index 17c0969..0000000 --- a/src/music_kraken/utils/config/sections/logging.py +++ /dev/null @@ -1,130 +0,0 @@ -import logging -from typing import Callable - -from ..base_classes import SingleAttribute, StringAttribute, Section, Description, EmptyLine - -LOG_LEVELS = { - "CRITICAL": 50, - "ERROR": 40, - "WARNING": 30, - "INFO": 20, - "DEBUG": 10, - "NOTSET": 0 -} - - -class LoggerAttribute(SingleAttribute): - @property - def object_from_value(self) -> logging.Logger: - return logging.getLogger(self.value) - - -class LogLevelAttribute(SingleAttribute): - @property - def object_from_value(self) -> int: - """ - gets the numeric value of a log level - :return: - """ - if self.value.isnumeric(): - return int(self.value) - - v = self.value.strip().upper() - - if v not in LOG_LEVELS: - raise ValueError( - f"{self.name} can only been either one of the following levels, or an integer:\n" - f"{';'.join(key for key in LOG_LEVELS)}" - ) - - return LOG_LEVELS[v] - - -class LoggingSection(Section): - def __init__(self): - self.FORMAT = StringAttribute( - name="logging_format", - description="Reference for the logging formats: " - "https://docs.python.org/3/library/logging.html#logrecord-attributes", - value=logging.BASIC_FORMAT - ) - self.LOG_LEVEL = LogLevelAttribute( - name="log_level", - description=f"can only been either one of the following levels, or an integer:\n" - f"{';'.join(key for key in LOG_LEVELS)}", - value=str(logging.INFO) - ) - - self.DOWNLOAD_LOGGER = LoggerAttribute( - name="download_logger", - description="The logger for downloading.", - value="download" - ) - self.TAGGING_LOGGER = LoggerAttribute( - name="tagging_logger", - description="The logger for tagging id3 containers.", - value="tagging" - ) - self.CODEX_LOGGER = LoggerAttribute( - name="codex_logger", - description="The logger for streaming the audio into an uniform codex.", - value="codex" - ) - self.OBJECT_LOGGER = LoggerAttribute( - name="object_logger", - description="The logger for creating Data-Objects.", - value="object" - ) - self.DATABASE_LOGGER = LoggerAttribute( - name="database_logger", - description="The logger for Database operations.", - value="database" - ) - self.MUSIFY_LOGGER = LoggerAttribute( - name="musify_logger", - description="The logger for the musify scraper.", - value="musify" - ) - self.YOUTUBE_LOGGER = LoggerAttribute( - name="youtube_logger", - description="The logger for the youtube scraper.", - value="youtube" - ) - self.YOUTUBE_MUSIC_LOGGER = LoggerAttribute( - name="youtube_music_logger", - description="The logger for the youtube music scraper.\n(The scraper is seperate to the youtube scraper)", - value="youtube_music" - ) - self.ENCYCLOPAEDIA_METALLUM_LOGGER = LoggerAttribute( - name="metal_archives_logger", - description="The logger for the metal archives scraper.", - value="metal_archives" - ) - self.GENIUS_LOGGER = LoggerAttribute( - name="genius_logger", - description="The logger for the genius scraper", - value="genius" - ) - - self.attribute_list = [ - Description("Logging settings for the actual logging:"), - self.FORMAT, - self.LOG_LEVEL, - EmptyLine(), - Description("Just the names for different logger, for different parts of the programm:"), - self.DOWNLOAD_LOGGER, - self.TAGGING_LOGGER, - self.CODEX_LOGGER, - self.OBJECT_LOGGER, - self.DATABASE_LOGGER, - self.MUSIFY_LOGGER, - self.YOUTUBE_LOGGER, - self.YOUTUBE_MUSIC_LOGGER, - self.ENCYCLOPAEDIA_METALLUM_LOGGER, - self.GENIUS_LOGGER - ] - - super().__init__() - - -LOGGING_SECTION = LoggingSection() diff --git a/src/music_kraken/utils/config/sections/misc.py b/src/music_kraken/utils/config/sections/misc.py deleted file mode 100644 index ad8641a..0000000 --- a/src/music_kraken/utils/config/sections/misc.py +++ /dev/null @@ -1,72 +0,0 @@ -from ..base_classes import Section, IntAttribute, ListAttribute, BoolAttribute - - -class MiscSection(Section): - def __init__(self): - self.HASNT_YET_STARTED = BoolAttribute( - name="hasnt_yet_started", - description="If you did already run, and configured everything, this is false.", - value="true" - ) - - self.ENABLE_RESULT_HISTORY = BoolAttribute( - name="result_history", - description="If enabled, you can go back to the previous results.\n" - "The consequence is a higher meory consumption, because every result is saved.", - value="false" - ) - - self.HISTORY_LENGTH = IntAttribute( - name="history_length", - description="You can choose how far back you can go in the result history.\n" - "The further you choose to be able to go back, the higher the memory usage.\n" - "'-1' removes the Limit entirely.", - value="8" - ) - - self.HAPPY_MESSAGES = ListAttribute( - name="happy_messages", - description="Just some nice and wholesome messages.\n" - "If your mindset has traits of a [file corruption], you might not agree.\n" - "But anyways... Freedom of thought, so go ahead and change the messages.", - value=[ - "Support the artist.", - "Star Me: https://github.com/HeIIow2/music-downloader", - "🏳️‍⚧️🏳️‍⚧️ Trans rights are human rights. 🏳️‍⚧️🏳️‍⚧️", - "🏳️‍⚧️🏳️‍⚧️ Trans women are women, trans men are men, and enbies are enbies. 🏳️‍⚧️🏳️‍⚧️", - "🏴‍☠️🏴‍☠️ Unite under one flag, fck borders. 🏴‍☠️🏴‍☠️", - "Join my Matrix Space: https://matrix.to/#/#music-kraken:matrix.org", - "Gotta love the BPJM ;-;", - "🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️", - ] - ) - - self.MODIFY_GC = BoolAttribute( - name="modify_gc", - description="If set to true, it will modify the gc for the sake of performance.\n" - "This should not drive up ram usage, but if it is, then turn it of.\n" - "Here a blog post about that matter:\n" - "https://mkennedy.codes/posts/python-gc-settings-change-this-and-make-your-app-go-20pc-faster/\n" - "https://web.archive.org/web/20221124122222/https://mkennedy.codes/posts/python-gc-settings-change-this-and-make-your-app-go-20pc-faster/", - value="true" - ) - - self.ID_BITS = IntAttribute( - name="id_bits", - description="I really dunno why I even made this a setting.. Modifying this is a REALLY dumb idea.", - value="64" - ) - - self.attribute_list = [ - self.HASNT_YET_STARTED, - self.ENABLE_RESULT_HISTORY, - self.HISTORY_LENGTH, - self.HAPPY_MESSAGES, - self.MODIFY_GC, - self.ID_BITS - ] - - super().__init__() - - -MISC_SECTION = MiscSection() diff --git a/src/music_kraken/utils/config/sections/paths.py b/src/music_kraken/utils/config/sections/paths.py deleted file mode 100644 index cd35a4b..0000000 --- a/src/music_kraken/utils/config/sections/paths.py +++ /dev/null @@ -1,59 +0,0 @@ -from pathlib import Path - -from ...path_manager import LOCATIONS -from ..base_classes import Section, StringAttribute, ListAttribute - - -class PathAttribute(StringAttribute): - @property - def object_from_value(self) -> Path: - return Path(self.value.strip()) - - -class PathsSection(Section): - def __init__(self): - self.MUSIC_DIRECTORY = PathAttribute( - name="music_directory", - description="The directory, all the music will be downloaded to.", - value=str(LOCATIONS.MUSIC_DIRECTORY) - ) - - self.TEMP_DIRECTORY = PathAttribute( - name="temp_directory", - description="All temporary stuff is gonna be dumped in this directory.", - value=str(LOCATIONS.TEMP_DIRECTORY) - ) - - self.LOG_PATH = PathAttribute( - name="log_file", - description="The path to the logging file", - value=str(LOCATIONS.get_log_file("download_logs.log")) - ) - - self.NOT_A_GENRE_REGEX = ListAttribute( - name="not_a_genre_regex", - description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n" - "it should ignore, and not count to genres", - value=[ - r'^\.' # is hidden/starts with a "." - ] - ) - - self.FFMPEG_BINARY = PathAttribute( - name="ffmpeg_binary", - description="Set the path to the ffmpeg binary.", - value=str(LOCATIONS.FFMPEG_BIN) - ) - - self.attribute_list = [ - self.MUSIC_DIRECTORY, - self.TEMP_DIRECTORY, - self.LOG_PATH, - self.NOT_A_GENRE_REGEX, - self.FFMPEG_BINARY - ] - - super().__init__() - - -PATHS_SECTION = PathsSection() diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 749b975..33702b6 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -2,9 +2,9 @@ import random from .config import main_settings -DEBUG = False +DEBUG = True DEBUG_YOUTUBE_INITIALIZING = DEBUG and False -DEBUG_PAGES = DEBUG and False +DEBUG_PAGES = DEBUG and True if DEBUG: print("DEBUG ACTIVE") From 1bb65be56e00c6ca11c46ca5d81cde4e06a23995 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 12 Sep 2023 15:12:36 +0200 Subject: [PATCH 003/104] layed out general search --- src/actual_donwload.py | 2 +- src/music_kraken/pages/bandcamp.py | 79 +++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 7557324..a952094 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -29,7 +29,7 @@ if __name__ == "__main__": ] youtube_music_test = [ - "s: #a Favorite #r Anarcho" + "s: hello" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 950e5fa..332d658 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -1,6 +1,7 @@ from typing import List, Optional, Type from urllib.parse import urlparse import logging +from enum import Enum from ..objects import Source, DatabaseObject @@ -17,6 +18,16 @@ from ..objects import ( from ..connection import Connection from ..utils.support_classes import DownloadResult from ..utils.config import main_settings, logging_settings +from ..utils.shared import DEBUG +if DEBUG: + from ..utils.debug_utils import dump_to_file + + +class BandcampTypes(Enum): + ARTIST = "b" + ALBUM = "a" + SONG = "t" + class Bandcamp(Page): # CHANGE @@ -33,9 +44,73 @@ class Bandcamp(Page): def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return super().get_source_type(source) + + def _parse_autocomplete_api_result(self, data: dict) -> DatabaseObject: + try: + object_type = BandcampTypes(data["type"]) + except ValueError: + print(data["type"]) + return + + url = data["item_url_root"] + if "item_url_path" in data: + url = data["item_url_path"] + + source_list = [Source(self.SOURCE_TYPE, url)] + name = data["name"] + + if data.get("is_label", False): + return Label( + name=name, + source_list=source_list + ) + + if object_type is BandcampTypes.ARTIST: + return Artist( + name=name, + source_list=source_list + ) + + if object_type is BandcampTypes.ALBUM: + return Album( + title=name, + source_list=source_list, + artist_list=[ + Artist( + name=data["band_name"], + source_list=[ + Source(self.SOURCE_TYPE, data["item_url_root"]) + ] + ) + ] + ) + + if object_type is BandcampTypes.SONG: + print("NEEDING TO IMPLEMENT SONG") - def general_search(self, search_query: str) -> List[DatabaseObject]: - return [] + def general_search(self, search_query: str, filter_string: str = "") -> List[DatabaseObject]: + results = [] + + r = self.connection.post("https://bandcamp.com/api/bcsearch_public_api/1/autocomplete_elastic", json={ + "fan_id": None, + "full_page": True, + "search_filter": filter_string, + "search_text": search_query, + }) + if r is None: + return results + + if DEBUG: + dump_to_file("bandcamp_response.json", r.text, is_json=True, exit_after_dump=False) + + data = r.json() + + for element in data.get("auto", {}).get("results", []): + r = self._parse_autocomplete_api_result(element) + if r is not None: + results.append(r) + + return results def label_search(self, label: Label) -> List[Label]: return [] From a45db5cfa914fe00b6653da0475803f5ca7bced3 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 12 Sep 2023 15:14:44 +0200 Subject: [PATCH 004/104] implemented specific searches --- src/music_kraken/pages/bandcamp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 332d658..9644646 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -113,16 +113,16 @@ class Bandcamp(Page): return results def label_search(self, label: Label) -> List[Label]: - return [] + return self.general_search(artist.name, filter_string="b") def artist_search(self, artist: Artist) -> List[Artist]: - return [] + return self.general_search(artist.name, filter_string="b") def album_search(self, album: Album) -> List[Album]: - return [] + return self.general_search(artist.name, filter_string="a") def song_search(self, song: Song) -> List[Song]: - return [] + return self.general_search(artist.name, filter_string="t") def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: return Song() From fbc20d7f4dfa61575543039c7b51d170193f812f Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:30:08 +0200 Subject: [PATCH 005/104] feat: implemented search for track --- src/actual_donwload.py | 2 +- src/music_kraken/pages/bandcamp.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index a952094..bc587f6 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -29,7 +29,7 @@ if __name__ == "__main__": ] youtube_music_test = [ - "s: hello" + "s: #t Self Loather" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 9644646..8c3459b 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -86,7 +86,18 @@ class Bandcamp(Page): ) if object_type is BandcampTypes.SONG: - print("NEEDING TO IMPLEMENT SONG") + return Song( + title=name, + source_list=source_list, + main_artist_list=[ + Artist( + name=data["band_name"], + source_list=[ + Source(self.SOURCE_TYPE, data["item_url_root"]) + ] + ) + ] + ) def general_search(self, search_query: str, filter_string: str = "") -> List[DatabaseObject]: results = [] @@ -113,16 +124,16 @@ class Bandcamp(Page): return results def label_search(self, label: Label) -> List[Label]: - return self.general_search(artist.name, filter_string="b") + return self.general_search(label.name, filter_string="b") def artist_search(self, artist: Artist) -> List[Artist]: return self.general_search(artist.name, filter_string="b") def album_search(self, album: Album) -> List[Album]: - return self.general_search(artist.name, filter_string="a") + return self.general_search(album.title, filter_string="a") def song_search(self, song: Song) -> List[Song]: - return self.general_search(artist.name, filter_string="t") + return self.general_search(song.title, filter_string="t") def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: return Song() From 1a5fbdc0c25c1072e38b08f8227d2952b47a3948 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:59:04 +0200 Subject: [PATCH 006/104] feat: reversengeneered bandcamp artist html structure --- documentation/html/bandcamp/artist_page.html | 125 +++++++++++++++++++ src/actual_donwload.py | 3 +- src/music_kraken/pages/bandcamp.py | 31 ++++- 3 files changed, 155 insertions(+), 4 deletions(-) create mode 100644 documentation/html/bandcamp/artist_page.html diff --git a/documentation/html/bandcamp/artist_page.html b/documentation/html/bandcamp/artist_page.html new file mode 100644 index 0000000..06504a2 --- /dev/null +++ b/documentation/html/bandcamp/artist_page.html @@ -0,0 +1,125 @@ + + + + + + + + Music | Only Smile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+ +
+ + + + +
+ + +
+

about

+ +

+ Only Smile + Russia +

+ + + +
+ +

+ + Contact Only Smile + +

+ + +

+ + Streaming and +
+ Download help +
+

+
+
+
+
+ + diff --git a/src/actual_donwload.py b/src/actual_donwload.py index bc587f6..59e7667 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -29,7 +29,8 @@ if __name__ == "__main__": ] youtube_music_test = [ - "s: #t Self Loather" + "s: #a Only Smile", + "0" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 8c3459b..983053f 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -43,6 +43,15 @@ class Bandcamp(Page): super().__init__(*args, **kwargs) def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: + parsed_url = urlparse(source.url) + + if parsed_url.path == "": + return Artist + if parsed_url.path.startswith("/album/"): + return Album + if parsed_url.path.startswith("/track/"): + return Song + return super().get_source_type(source) def _parse_autocomplete_api_result(self, data: dict) -> DatabaseObject: @@ -135,15 +144,31 @@ class Bandcamp(Page): def song_search(self, song: Song) -> List[Song]: return self.general_search(song.title, filter_string="t") + + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: + artist = Artist() + + r = self.connection.get(source.url) + if r is None: + return artist + + soup = self.get_soup_from_response(r) + data_container = soup.find("div", {"id": "pagedata"}) + data = data_container["data-blob"] + + if DEBUG: + dump_to_file("artist_page.html", r.text, exit_after_dump=False) + dump_to_file("bandcamp_artis.json", data, is_json=True, exit_after_dump=False) + + return artist + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: + print(source) return Song() def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: return Album() - def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: - return Artist() - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() From 06f5411c472c1ea5021f86f4d481d4e2f2d99165 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:21:24 +0200 Subject: [PATCH 007/104] feat: added contact collection in the artist --- src/music_kraken/objects/song.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 3f46d4b..b7d021e 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -8,6 +8,7 @@ from ..utils.enums.album import AlbumType, AlbumStatus from .collection import Collection from .formatted_text import FormattedText from .lyrics import Lyrics +from .contact import Contact from .metadata import ( Mapping as id3Mapping, ID3Timestamp, @@ -481,6 +482,7 @@ class Artist(MainObject): source_list: List[Source] = None, feature_song_list: List[Song] = None, main_album_list: List[Album] = None, + contact_list: List[Contact] = None, notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = "", @@ -516,6 +518,8 @@ class Artist(MainObject): self.main_album_collection: Collection[Album] = Collection(data=main_album_list, element_type=Album) self.label_collection: Collection[Label] = Collection(data=label_list, element_type=Label) + self.contact_collection: Collection[Label] = Collection(data=contact_list, element_type=Contact) + def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): if object_type is Song: # this doesn't really make sense @@ -628,7 +632,8 @@ class Artist(MainObject): return [ ('id', self.id), ('name', self.unified_name), - *[('url', source.url) for source in self.source_collection] + *[('url', source.url) for source in self.source_collection], + *[('contact', contact.value) for contact in self.contact_collection] ] @property From 6e82c1e5cb41c2a91f13f96de2805ba8345901b3 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:21:30 +0200 Subject: [PATCH 008/104] feat: added contact collection in the artist --- src/music_kraken/objects/contact.py | 37 +++++++++++++++++++++++++ src/music_kraken/utils/enums/contact.py | 7 +++++ 2 files changed, 44 insertions(+) create mode 100644 src/music_kraken/objects/contact.py create mode 100644 src/music_kraken/utils/enums/contact.py diff --git a/src/music_kraken/objects/contact.py b/src/music_kraken/objects/contact.py new file mode 100644 index 0000000..cdad4da --- /dev/null +++ b/src/music_kraken/objects/contact.py @@ -0,0 +1,37 @@ +from typing import Optional, List, Tuple + +from ..utils.enums.contact import ContactMethod +from .parents import DatabaseObject + + +class Contact(DatabaseObject): + COLLECTION_ATTRIBUTES = tuple() + SIMPLE_ATTRIBUTES = { + "contact_method": None, + "value": None, + } + + @property + def indexing_values(self) -> List[Tuple[str, object]]: + return [ + ('id', self.id), + ('value', self.value), + ] + + def __init__(self, contact_method: ContactMethod, value: str) -> None: + self.contact_method: ContactMethod = contact_method + self.value: str = value + + @classmethod + def match_url(cls, url: str) -> Optional["Contact"]: + url = url.strip() + + if url.startswith("mailto:"): + return cls(ContactMethod.EMAIL, url.replace("mailto:", "", 1)) + + if url.startswith("tel:"): + return cls(ContactMethod.PHONE, url.replace("tel:", "", 1)) + + if url.startswith("fax:"): + return cls(ContactMethod.FAX, url.replace("fax:", "", 1)) + diff --git a/src/music_kraken/utils/enums/contact.py b/src/music_kraken/utils/enums/contact.py new file mode 100644 index 0000000..938c801 --- /dev/null +++ b/src/music_kraken/utils/enums/contact.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class ContactMethod(Enum): + EMAIL = "email" + PHONE = "phone" + FAX = "fax" From e9e9e61e7c38f9dcf1f5e16a3b1c86a85a58636c Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:50:32 +0200 Subject: [PATCH 009/104] feat: implemented the fetching of the artist --- src/music_kraken/objects/__init__.py | 1 + src/music_kraken/pages/bandcamp.py | 53 ++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/music_kraken/objects/__init__.py b/src/music_kraken/objects/__init__.py index 4a17b9b..bdca840 100644 --- a/src/music_kraken/objects/__init__.py +++ b/src/music_kraken/objects/__init__.py @@ -18,3 +18,4 @@ from .formatted_text import FormattedText from .collection import Collection from .country import Country +from .contact import Contact diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 983053f..f32e9a2 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -2,7 +2,7 @@ from typing import List, Optional, Type from urllib.parse import urlparse import logging from enum import Enum - +from bs4 import BeautifulSoup from ..objects import Source, DatabaseObject from .abstract import Page @@ -13,7 +13,8 @@ from ..objects import ( Song, Album, Label, - Target + Target, + Contact ) from ..connection import Connection from ..utils.support_classes import DownloadResult @@ -33,6 +34,7 @@ class Bandcamp(Page): # CHANGE SOURCE_TYPE = SourcePages.BANDCAMP LOGGER = logging_settings["bandcamp_logger"] + HOST = "https://onlysmile.bandcamp.com" def __init__(self, *args, **kwargs): self.connection: Connection = Connection( @@ -143,7 +145,45 @@ class Bandcamp(Page): def song_search(self, song: Song) -> List[Song]: return self.general_search(song.title, filter_string="t") + + def _parse_artist_details(self, soup: BeautifulSoup) -> Artist: + name: str = None + source_list: List[Source] = [] + contact_list: List[Contact] = [] + + band_name_location: BeautifulSoup = soup.find("p", {"id": "band-name-location"}) + if band_name_location is not None: + title_span = band_name_location.find("span", {"class": "title"}) + if title_span is not None: + name = title_span.text.strip() + + link_container: BeautifulSoup = soup.find("ol", {"id": "band-links"}) + if link_container is not None: + li: BeautifulSoup + for li in link_container.find_all("a"): + if li is None and li['href'] is not None: + continue + + source_list.append(Source.match_url(li['href'], referer_page=self.SOURCE_TYPE)) + + return Artist( + name=name, + source_list=source_list + ) + def _parse_song_list(self, soup: BeautifulSoup) -> List[Album]: + title = None + source_list: List[Source] = [] + + a = soup.find("a") + if a is not None and a["href"] is not None: + source_list.append(Source(self.SOURCE_TYPE, self.HOST + a["href"])) + + title_p = soup.find("p", {"class": "title"}) + if title_p is not None: + title = title_p.text.strip() + + return Album(title=title, source_list=source_list) def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist() @@ -153,12 +193,13 @@ class Bandcamp(Page): return artist soup = self.get_soup_from_response(r) - data_container = soup.find("div", {"id": "pagedata"}) - data = data_container["data-blob"] - + if DEBUG: dump_to_file("artist_page.html", r.text, exit_after_dump=False) - dump_to_file("bandcamp_artis.json", data, is_json=True, exit_after_dump=False) + + artist = self._parse_artist_details(soup=soup.find("div", {"id": "bio-container"})) + for subsoup in soup.find("ol", {"id": "music-grid"}).find_all("li"): + artist.main_album_collection.append(self._parse_song_list(soup=subsoup)) return artist From 6a37351da1c0a778c23bf87abef44734f6c723da Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 20:18:51 +0200 Subject: [PATCH 010/104] feat: implemented parsing of tracklist of album --- src/actual_donwload.py | 4 +-- src/music_kraken/objects/song.py | 6 +++- src/music_kraken/pages/bandcamp.py | 51 ++++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 59e7667..bb8d1b1 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -29,8 +29,8 @@ if __name__ == "__main__": ] youtube_music_test = [ - "s: #a Only Smile", - "0" + "s: #a Only Smile #r Your best friend", + "8" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index b7d021e..75fa351 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -467,7 +467,8 @@ class Artist(MainObject): "formed_in": ID3Timestamp(), "notes": FormattedText(), "lyrical_themes": [], - "general_genre": "" + "general_genre": "", + "unformated_location": None, } DOWNWARDS_COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection") @@ -489,6 +490,7 @@ class Artist(MainObject): country: CountryTyping = None, formed_in: ID3Timestamp = None, label_list: List['Label'] = None, + unformated_location: str = None, **kwargs ): MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) @@ -520,6 +522,8 @@ class Artist(MainObject): self.contact_collection: Collection[Label] = Collection(data=contact_list, element_type=Contact) + self.unformated_location: Optional[str] = unformated_location + def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): if object_type is Song: # this doesn't really make sense diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index f32e9a2..925bf0b 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -1,6 +1,6 @@ from typing import List, Optional, Type from urllib.parse import urlparse -import logging +import json from enum import Enum from bs4 import BeautifulSoup @@ -145,6 +145,10 @@ class Bandcamp(Page): def song_search(self, song: Song) -> List[Song]: return self.general_search(song.title, filter_string="t") + + + def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: + return Label() def _parse_artist_details(self, soup: BeautifulSoup) -> Artist: name: str = None @@ -203,15 +207,48 @@ class Bandcamp(Page): return artist + def _parse_track_element(self, track: dict) -> Optional[Song]: + return Song( + title=track["item"]["name"], + source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])], + tracksort=track["position"] + ) + + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: + print(source) + album = Album() + + r = self.connection.get(source.url) + if r is None: + return album + + soup = self.get_soup_from_response(r) + + if DEBUG: + # dump_to_file("album_page.html", r.text, exit_after_dump=False) + pass + + data_container = soup.find("script", {"type": "application/ld+json"}) + + if DEBUG: + dump_to_file("album_data.json", data_container.text, is_json=True, exit_after_dump=False) + + data = json.loads(data_container.text) + + for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): + if DEBUG: + dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False) + + try: + album.song_collection.append(self._parse_track_element(track_json)) + except KeyError: + continue + + return album + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: print(source) return Song() - def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - return Album() - - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: - return Label() - def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: return DownloadResult() From fd9a93f5553f670840a9aa02e4ac245b74312ebb Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 22:16:08 +0200 Subject: [PATCH 011/104] feat: implemented artist details --- src/music_kraken/pages/bandcamp.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 925bf0b..039e4eb 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -14,7 +14,8 @@ from ..objects import ( Album, Label, Target, - Contact + Contact, + ID3Timestamp ) from ..connection import Connection from ..utils.support_classes import DownloadResult @@ -224,16 +225,23 @@ class Bandcamp(Page): soup = self.get_soup_from_response(r) - if DEBUG: - # dump_to_file("album_page.html", r.text, exit_after_dump=False) - pass - data_container = soup.find("script", {"type": "application/ld+json"}) if DEBUG: dump_to_file("album_data.json", data_container.text, is_json=True, exit_after_dump=False) data = json.loads(data_container.text) + artist_data = data["byArtist"] + + album = Album( + title=data["name"], + source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], + date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), + artist_list=[Artist( + name=artist_data["name"], + source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] + )] + ) for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): if DEBUG: From a3934a384e8e0b424064c70c654c12f9157369a2 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 22:48:52 +0200 Subject: [PATCH 012/104] feat: fetching song details of bandcamp --- src/actual_donwload.py | 3 ++- src/music_kraken/pages/bandcamp.py | 32 +++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index bb8d1b1..286299a 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -30,7 +30,8 @@ if __name__ == "__main__": youtube_music_test = [ "s: #a Only Smile #r Your best friend", - "8" + "8", + "2" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 039e4eb..276f640 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -256,7 +256,37 @@ class Bandcamp(Page): def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: print(source) - return Song() + + r = self.connection.get(source.url) + if r is None: + return Song() + + soup = self.get_soup_from_response(r) + + data_container = soup.find("script", {"type": "application/ld+json"}) + + if DEBUG: + dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False) + + data = json.loads(data_container.text) + album_data = data["inAlbum"] + artist_data = data["byArtist"] + + song = Song( + title=data["name"], + source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], + album_list=[Album( + title=album_data["name"], + date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), + source_list=[Source(self.SOURCE_TYPE, album_data["@id"])] + )], + main_artist_list=[Artist( + name=artist_data["name"], + source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] + )] + ) + + return song def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: return DownloadResult() From 8091a9ffb0e51d4c741365c2f99e390e7212ecb1 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 12 Sep 2023 23:39:22 +0200 Subject: [PATCH 013/104] feat: implemented downloading from bandcamp --- src/actual_donwload.py | 3 ++- src/music_kraken/pages/bandcamp.py | 40 ++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 286299a..60b8c8a 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -31,7 +31,8 @@ if __name__ == "__main__": youtube_music_test = [ "s: #a Only Smile #r Your best friend", "8", - "2" + "2", + "d: 2" ] music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 276f640..00e33c3 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -3,6 +3,8 @@ from urllib.parse import urlparse import json from enum import Enum from bs4 import BeautifulSoup +import pycountry +import demjson3 from ..objects import Source, DatabaseObject from .abstract import Page @@ -15,7 +17,9 @@ from ..objects import ( Label, Target, Contact, - ID3Timestamp + ID3Timestamp, + Lyrics, + FormattedText ) from ..connection import Connection from ..utils.support_classes import DownloadResult @@ -254,6 +258,17 @@ class Bandcamp(Page): return album + def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]: + track_lyrics = soup.find("div", {"class": "lyricsText"}) + if track_lyrics: + self.LOGGER.debug(" Lyrics retrieved..") + return [Lyrics(FormattedText( + html=track_lyrics.prettify() + ), pycountry.languages.get(alpha_2="en"))] + + return [] + + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: print(source) @@ -264,17 +279,28 @@ class Bandcamp(Page): soup = self.get_soup_from_response(r) data_container = soup.find("script", {"type": "application/ld+json"}) - + other_data = {} + + other_data_list = soup.select("script[data-tralbum]") + if len(other_data_list) > 0: + other_data = json.loads(other_data_list[0]["data-tralbum"]) + if DEBUG: dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False) + dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False) + dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False) data = json.loads(data_container.text) album_data = data["inAlbum"] artist_data = data["byArtist"] + mp3_url = None + for key, value in other_data.get("trackinfo", [{}])[0].get("file", {"": None}).items(): + mp3_url = value + song = Song( title=data["name"], - source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], + source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), adio_url=mp3_url)], album_list=[Album( title=album_data["name"], date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), @@ -283,10 +309,14 @@ class Bandcamp(Page): main_artist_list=[Artist( name=artist_data["name"], source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] - )] + )], + lyrics_list=self._fetch_lyrics(soup=soup) ) + return song def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: - return DownloadResult() + if source.audio_url is None: + return DownloadResult(error_message="Couldn't find download link.") + return self.connection.stream_into(url=source.audio_url, target=target, description=desc) From 13b9c0b35eb895b6c16c91e65a976c9c4f36c5dd Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 13 Sep 2023 16:01:01 +0200 Subject: [PATCH 014/104] fix: metal archives artist sources --- src/actual_donwload.py | 8 ++- src/music_kraken/download/page_attributes.py | 8 ++- src/music_kraken/download/results.py | 2 +- src/music_kraken/objects/collection.py | 2 + src/music_kraken/objects/parents.py | 2 +- src/music_kraken/objects/song.py | 9 +-- src/music_kraken/pages/abstract.py | 43 +++++++++----- src/music_kraken/pages/bandcamp.py | 59 +++++++++++++++---- .../pages/encyclopaedia_metallum.py | 35 ++++++++--- .../pages/youtube_music/youtube_music.py | 6 +- src/music_kraken/utils/shared.py | 2 +- 11 files changed, 129 insertions(+), 47 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 60b8c8a..4475034 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -35,4 +35,10 @@ if __name__ == "__main__": "d: 2" ] - music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True) + cross_download = [ + "s: #a Psychonaut 4", + "2", + "d: 0" + ] + + music_kraken.cli.download(genre="test", command_list=cross_download, process_metadata_anyway=True) diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 17c8a91..014a984 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -15,7 +15,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, ALL_PAGES: Set[Type[Page]] = { EncyclopaediaMetallum, Musify, - YoutubeMusic, + # YoutubeMusic, Bandcamp } @@ -97,7 +97,11 @@ class Pages: if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") - _page_types = set(self._source_to_page[src] for src in music_object.source_collection.source_pages) + _page_types = set() + for src in music_object.source_collection.source_pages: + if src in self._source_to_page: + _page_types.add(self._source_to_page[src]) + audio_pages = self._audio_pages_set.intersection(_page_types) for download_page in audio_pages: diff --git a/src/music_kraken/download/results.py b/src/music_kraken/download/results.py index 631ad48..46911b1 100644 --- a/src/music_kraken/download/results.py +++ b/src/music_kraken/download/results.py @@ -27,7 +27,7 @@ class Results: self._page_by_index = dict() def get_music_object_by_index(self, index: int) -> Tuple[Type[Page], DatabaseObject]: - # if this throws a key error, either the formated generator needs to be iterated, or the option doesn't exist. + # if this throws a key error, either the formatted generator needs to be iterated, or the option doesn't exist. return self._page_by_index[index], self._by_index[index] def delete_details(self, exclude_index: int): diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 2b9ce72..5310211 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -81,6 +81,8 @@ class Collection(Generic[T]): :param merge_into_existing: :return did_not_exist: """ + if element is None: + return AppendResult(False, None, False) # if the element type has been defined in the initializer it checks if the type matches if self.element_type is not None and not isinstance(element, self.element_type): diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index a553700..7297b17 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -31,7 +31,7 @@ class DatabaseObject: """ _id = random.randint(0, HIGHEST_ID) self.automatic_id = True - LOGGER.debug(f"Id for {type(self).__name__} isn't set. Setting to {_id}") + # LOGGER.debug(f"Id for {type(self).__name__} isn't set. Setting to {_id}") # The id can only be None, if the object is dynamic (self.dynamic = True) self.id: Optional[int] = _id diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 75fa351..36cd77a 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -649,11 +649,12 @@ class Artist(MainObject): return metadata - def __str__(self): + def __str__(self, include_notes: bool = False): string = self.name or "" - plaintext_notes = self.notes.get_plaintext() - if plaintext_notes is not None: - string += "\n" + plaintext_notes + if include_notes: + plaintext_notes = self.notes.get_plaintext() + if plaintext_notes is not None: + string += "\n" + plaintext_notes return string def __repr__(self): diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 25a589c..f068482 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -153,11 +153,12 @@ def build_new_object(new_object: DatabaseObject) -> DatabaseObject: return new_object -def merge_together(old_object: DatabaseObject, new_object: DatabaseObject) -> DatabaseObject: +def merge_together(old_object: DatabaseObject, new_object: DatabaseObject, do_compile: bool = True) -> DatabaseObject: new_object = clean_object(new_object) old_object.merge(new_object) - old_object.compile(merge_into=False) + if do_compile: + old_object.compile(merge_into=False) return old_object @@ -246,7 +247,7 @@ class Page: return [] - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1) -> DatabaseObject: + def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1, post_process: bool = True) -> DatabaseObject: """ when a music object with lacking data is passed in, it returns the SAME object **(no copy)** with more detailed data. @@ -270,22 +271,22 @@ class Page: if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): - new_music_object.merge( - self.fetch_object_from_source( - source=source, - enforce_type=type(music_object), - stop_at_level=stop_at_level, - post_process=False - ) - ) + new_music_object.merge(self.fetch_object_from_source( + source=source, + enforce_type=type(music_object), + stop_at_level=stop_at_level, + post_process=False + )) - return merge_together(music_object, new_music_object) + return merge_together(music_object, new_music_object, do_compile=post_process) def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: - obj_type = self.get_source_type(source) - + obj_type = self.get_source_type( + source) + print("obj type", obj_type, self) if obj_type is None: return None + if enforce_type != obj_type and enforce_type is not None: self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") return None @@ -298,13 +299,21 @@ class Page: Artist: self.fetch_artist, Label: self.fetch_label } - + if obj_type in fetch_map: music_object = fetch_map[obj_type](source, stop_at_level) else: self.LOGGER.warning(f"Can't fetch details of type: {obj_type}") return None + if stop_at_level > 1: + collection: Collection + for collection_str in music_object.DOWNWARDS_COLLECTION_ATTRIBUTES: + collection = music_object.__getattribute__(collection_str) + + for sub_element in collection: + sub_element.merge(self.fetch_details(sub_element, stop_at_level=stop_at_level-1, post_process=False)) + if post_process and music_object: return build_new_object(music_object) @@ -323,6 +332,10 @@ class Page: return Label() def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: + # print("downloading") + + self.fetch_details(music_object, stop_at_level=2) + naming_dict: NamingDict = NamingDict({"genre": genre}) def fill_naming_objects(naming_music_object: DatabaseObject): diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 00e33c3..ea3a09f 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -1,10 +1,9 @@ from typing import List, Optional, Type -from urllib.parse import urlparse +from urllib.parse import urlparse, urlunparse import json from enum import Enum from bs4 import BeautifulSoup import pycountry -import demjson3 from ..objects import Source, DatabaseObject from .abstract import Page @@ -51,12 +50,13 @@ class Bandcamp(Page): def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: parsed_url = urlparse(source.url) + path = parsed_url.path.replace("/", "") - if parsed_url.path == "": + if path == "": return Artist - if parsed_url.path.startswith("/album/"): + if path.startswith("album"): return Album - if parsed_url.path.startswith("/track/"): + if path.startswith("track"): return Song return super().get_source_type(source) @@ -65,7 +65,6 @@ class Bandcamp(Page): try: object_type = BandcampTypes(data["type"]) except ValueError: - print(data["type"]) return url = data["item_url_root"] @@ -180,7 +179,7 @@ class Bandcamp(Page): source_list=source_list ) - def _parse_song_list(self, soup: BeautifulSoup) -> List[Album]: + def _parse_album(self, soup: BeautifulSoup) -> List[Album]: title = None source_list: List[Source] = [] @@ -194,6 +193,25 @@ class Bandcamp(Page): return Album(title=title, source_list=source_list) + def _parse_artist_data_blob(self, data_blob: dict, artist_url: str): + if DEBUG: + dump_to_file("bandcamp_data_blob.json", json.dumps(data_blob), is_json=True, exit_after_dump=False) + + parsed_artist_url = urlparse(artist_url) + album_list: List[Album] = [] + + for album_json in data_blob.get("buyfulldisco", {}).get("tralbums", []): + album_list.append(Album( + title=album_json["title"], + source_list=[Source( + self.SOURCE_TYPE, + urlunparse((parsed_artist_url.scheme, parsed_artist_url.netloc, album_json["page_url"], "", "", "")) + )] + )) + + return album_list + + def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist() @@ -207,9 +225,22 @@ class Bandcamp(Page): dump_to_file("artist_page.html", r.text, exit_after_dump=False) artist = self._parse_artist_details(soup=soup.find("div", {"id": "bio-container"})) - for subsoup in soup.find("ol", {"id": "music-grid"}).find_all("li"): - artist.main_album_collection.append(self._parse_song_list(soup=subsoup)) + html_music_grid = soup.find("ol", {"id": "music-grid"}) + if html_music_grid is not None: + for subsoup in html_music_grid.find_all("li"): + artist.main_album_collection.append(self._parse_album(soup=subsoup)) + + data_blob_soup = soup.find("div", {"id": "pagedata"}) + if data_blob_soup is not None: + data_blob = data_blob_soup["data-blob"] + if data_blob is not None: + artist.main_album_collection.extend( + + self._parse_artist_data_blob(json.loads(data_blob), source.url) + ) + + artist.source_collection.append(source) return artist def _parse_track_element(self, track: dict) -> Optional[Song]: @@ -220,7 +251,6 @@ class Bandcamp(Page): ) def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: - print(source) album = Album() r = self.connection.get(source.url) @@ -237,13 +267,16 @@ class Bandcamp(Page): data = json.loads(data_container.text) artist_data = data["byArtist"] + artist_source_list = [] + if "@id" in artist_data: + artist_source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] album = Album( title=data["name"], source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), artist_list=[Artist( name=artist_data["name"], - source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] + source_list=artist_source_list )] ) @@ -256,6 +289,7 @@ class Bandcamp(Page): except KeyError: continue + album.source_collection.append(source) return album def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]: @@ -270,8 +304,6 @@ class Bandcamp(Page): def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - print(source) - r = self.connection.get(source.url) if r is None: return Song() @@ -313,6 +345,7 @@ class Bandcamp(Page): lyrics_list=self._fetch_lyrics(soup=soup) ) + song.source_collection.append(source) return song diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 87c0f69..2025af6 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -22,6 +22,10 @@ from ..objects import ( Options, DatabaseObject ) +from ..utils.shared import DEBUG +if DEBUG: + from ..utils.debug_utils import dump_to_file + ALBUM_TYPE_MAP: Dict[str, AlbumType] = defaultdict(lambda: AlbumType.OTHER, { @@ -264,20 +268,33 @@ class EncyclopaediaMetallum(Page): soup = self.get_soup_from_response(r) + if DEBUG: + dump_to_file(f"ma_artist_sources_{ma_artist_id}.html", soup.prettify(), exit_after_dump=False) + if soup.find("span", {"id": "noLinks"}) is not None: return [] - artist_source = soup.find("div", {"id": "band_links_Official"}) - """ - TODO - add a Label object to add the label sources from - TODO - maybe do merchandice stuff - """ + source_list = [] + + link_table: BeautifulSoup = soup.find("table", {"id": "linksTablemain"}) + if link_table is not None: + for tr in link_table.find_all("tr"): + anchor: BeautifulSoup = tr.find("a") + if anchor is None: + continue + + href = anchor["href"] + if href is not None: + source_list.append(Source.match_url(href, referer_page=self.SOURCE_TYPE)) + + # The following code is only legacy code, which I just kep because it doesn't harm. + # The way ma returns sources changed. + artist_source = soup.find("div", {"id": "band_links"}) + merchandice_source = soup.find("div", {"id": "band_links_Official_merchandise"}) label_source = soup.find("div", {"id": "band_links_Labels"}) - source_list = [] + if artist_source is not None: for tr in artist_source.find_all("td"): @@ -288,6 +305,8 @@ class EncyclopaediaMetallum(Page): source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE)) + print(source_list) + return source_list def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist: diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index ef6f14d..851cfc3 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -71,8 +71,9 @@ class YoutubeMusicConnection(Connection): r = self.get("https://music.youtube.com/verify_session", is_heartbeat=True) if r is None: self.heartbeat_failed() + return - string = r.content.decode("utf-8") + string = r.text data = json.loads(string[string.index("{"):]) success: bool = data["success"] @@ -248,6 +249,9 @@ class YoutubeMusic(SuperYouTube): } ) + if r is None: + return [] + renderer_list = r.json().get("contents", {}).get("tabbedSearchResultsRenderer", {}).get("tabs", [{}])[0].get("tabRenderer").get("content", {}).get("sectionListRenderer", {}).get("contents", []) if DEBUG: diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 33702b6..80efee8 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -4,7 +4,7 @@ from .config import main_settings DEBUG = True DEBUG_YOUTUBE_INITIALIZING = DEBUG and False -DEBUG_PAGES = DEBUG and True +DEBUG_PAGES = DEBUG and False if DEBUG: print("DEBUG ACTIVE") From 1794c0535ec128a0f2ad962d19463b0c825cc151 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Wed, 13 Sep 2023 18:55:04 +0200 Subject: [PATCH 015/104] fix: sanatizing file names correctly --- src/actual_donwload.py | 8 ++- src/music_kraken/__init__.py | 4 +- src/music_kraken/objects/target.py | 5 +- src/music_kraken/pages/abstract.py | 7 +-- src/music_kraken/pages/bandcamp.py | 62 ++++++++++++--------- src/music_kraken/utils/shared.py | 1 + src/music_kraken/utils/string_processing.py | 29 ++++++---- 7 files changed, 70 insertions(+), 46 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 4475034..3b575cc 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -41,4 +41,10 @@ if __name__ == "__main__": "d: 0" ] - music_kraken.cli.download(genre="test", command_list=cross_download, process_metadata_anyway=True) + bandcamp_test = [ + "s: #a Ghost Bath", + "d: 0" + ] + + + music_kraken.cli.download(genre="test", command_list=cross_download, process_metadata_anyway=True) \ No newline at end of file diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index acf551f..748a628 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -3,7 +3,7 @@ import logging import gc import musicbrainzngs -from .utils.shared import DEBUG +from .utils.shared import DEBUG, DEBUG_LOGGIN from .utils.config import logging_settings, main_settings, read_config read_config() from . import cli @@ -11,7 +11,7 @@ from . import cli # configure logger default logging.basicConfig( - level=logging_settings['log_level'] if not DEBUG else logging.DEBUG, + level=logging_settings['log_level'] if not DEBUG_LOGGIN else logging.DEBUG, format=logging_settings['logging_format'], handlers=[ logging.FileHandler(main_settings['log_file']), diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index 94cb2d8..fa06177 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -7,6 +7,7 @@ from tqdm import tqdm from .parents import DatabaseObject from ..utils.config import main_settings, logging_settings +from ..utils.string_processing import fit_to_file_system LOGGER = logging.getLogger("target") @@ -35,8 +36,8 @@ class Target(DatabaseObject): relative_to_music_dir: bool = False ) -> None: super().__init__(dynamic=dynamic) - self._file: Path = Path(file) - self._path: Path = Path(main_settings["music_directory"], path) if relative_to_music_dir else Path(path) + self._file: Path = Path(fit_to_file_system(file)) + self._path: Path = fit_to_file_system(Path(main_settings["music_directory"], path) if relative_to_music_dir else Path(path)) self.is_relative_to_music_dir: bool = relative_to_music_dir diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index f068482..99d56e9 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -281,9 +281,8 @@ class Page: return merge_together(music_object, new_music_object, do_compile=post_process) def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: - obj_type = self.get_source_type( - source) - print("obj type", obj_type, self) + obj_type = self.get_source_type(source) + if obj_type is None: return None @@ -334,7 +333,7 @@ class Page: def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: # print("downloading") - self.fetch_details(music_object, stop_at_level=2) + # self.fetch_details(music_object, stop_at_level=1) naming_dict: NamingDict = NamingDict({"genre": genre}) diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index ea3a09f..a179489 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -28,6 +28,17 @@ if DEBUG: from ..utils.debug_utils import dump_to_file +def _parse_artist_url(url: str) -> str: + parsed = urlparse(url) + return urlunparse((parsed.scheme, parsed.netloc, "/music/", "", "", "")) + + +def _get_host(source: Source) -> str: + parsed = urlparse(source.url) + return urlunparse((parsed.scheme, parsed.netloc, "", "", "", "")) + + + class BandcampTypes(Enum): ARTIST = "b" ALBUM = "a" @@ -38,7 +49,6 @@ class Bandcamp(Page): # CHANGE SOURCE_TYPE = SourcePages.BANDCAMP LOGGER = logging_settings["bandcamp_logger"] - HOST = "https://onlysmile.bandcamp.com" def __init__(self, *args, **kwargs): self.connection: Connection = Connection( @@ -52,7 +62,7 @@ class Bandcamp(Page): parsed_url = urlparse(source.url) path = parsed_url.path.replace("/", "") - if path == "": + if path == "" or path.startswith("music"): return Artist if path.startswith("album"): return Album @@ -81,6 +91,7 @@ class Bandcamp(Page): ) if object_type is BandcampTypes.ARTIST: + source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(url))] return Artist( name=name, source_list=source_list @@ -92,7 +103,7 @@ class Bandcamp(Page): source_list=source_list, artist_list=[ Artist( - name=data["band_name"], + name=data["band_name"].strip(), source_list=[ Source(self.SOURCE_TYPE, data["item_url_root"]) ] @@ -102,7 +113,7 @@ class Bandcamp(Page): if object_type is BandcampTypes.SONG: return Song( - title=name, + title=name.strip(), source_list=source_list, main_artist_list=[ Artist( @@ -127,7 +138,7 @@ class Bandcamp(Page): return results if DEBUG: - dump_to_file("bandcamp_response.json", r.text, is_json=True, exit_after_dump=False) + dump_to_file("bandcamp_search_response.json", r.text, is_json=True, exit_after_dump=False) data = r.json() @@ -172,20 +183,20 @@ class Bandcamp(Page): if li is None and li['href'] is not None: continue - source_list.append(Source.match_url(li['href'], referer_page=self.SOURCE_TYPE)) + source_list.append(Source.match_url(_parse_artist_url(li['href']), referer_page=self.SOURCE_TYPE)) return Artist( name=name, source_list=source_list ) - def _parse_album(self, soup: BeautifulSoup) -> List[Album]: + def _parse_album(self, soup: BeautifulSoup, initial_source: Source) -> List[Album]: title = None source_list: List[Source] = [] a = soup.find("a") if a is not None and a["href"] is not None: - source_list.append(Source(self.SOURCE_TYPE, self.HOST + a["href"])) + source_list.append(Source(self.SOURCE_TYPE, _get_host(initial_source) + a["href"])) title_p = soup.find("p", {"class": "title"}) if title_p is not None: @@ -194,15 +205,12 @@ class Bandcamp(Page): return Album(title=title, source_list=source_list) def _parse_artist_data_blob(self, data_blob: dict, artist_url: str): - if DEBUG: - dump_to_file("bandcamp_data_blob.json", json.dumps(data_blob), is_json=True, exit_after_dump=False) - parsed_artist_url = urlparse(artist_url) album_list: List[Album] = [] for album_json in data_blob.get("buyfulldisco", {}).get("tralbums", []): album_list.append(Album( - title=album_json["title"], + title=album_json["title"].strip(), source_list=[Source( self.SOURCE_TYPE, urlunparse((parsed_artist_url.scheme, parsed_artist_url.netloc, album_json["page_url"], "", "", "")) @@ -229,15 +237,17 @@ class Bandcamp(Page): html_music_grid = soup.find("ol", {"id": "music-grid"}) if html_music_grid is not None: for subsoup in html_music_grid.find_all("li"): - artist.main_album_collection.append(self._parse_album(soup=subsoup)) + artist.main_album_collection.append(self._parse_album(soup=subsoup, initial_source=source)) - data_blob_soup = soup.find("div", {"id": "pagedata"}) - if data_blob_soup is not None: + for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): data_blob = data_blob_soup["data-blob"] + + if DEBUG: + dump_to_file(f"bandcamp_artist_data_blob_{i}.json", data_blob, is_json=True, exit_after_dump=False) + if data_blob is not None: artist.main_album_collection.extend( - - self._parse_artist_data_blob(json.loads(data_blob), source.url) + self._parse_artist_data_blob(json.loads(data_blob), source.url) ) artist.source_collection.append(source) @@ -245,9 +255,9 @@ class Bandcamp(Page): def _parse_track_element(self, track: dict) -> Optional[Song]: return Song( - title=track["item"]["name"], + title=track["item"]["name"].strip(), source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])], - tracksort=track["position"] + tracksort=int(track["position"]) ) def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: @@ -269,13 +279,13 @@ class Bandcamp(Page): artist_source_list = [] if "@id" in artist_data: - artist_source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] + artist_source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] album = Album( - title=data["name"], + title=data["name"].strip(), source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), artist_list=[Artist( - name=artist_data["name"], + name=artist_data["name"].strip(), source_list=artist_source_list )] ) @@ -331,16 +341,16 @@ class Bandcamp(Page): mp3_url = value song = Song( - title=data["name"], + title=data["name"].strip(), source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), adio_url=mp3_url)], album_list=[Album( - title=album_data["name"], + title=album_data["name"].strip(), date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), source_list=[Source(self.SOURCE_TYPE, album_data["@id"])] )], main_artist_list=[Artist( - name=artist_data["name"], - source_list=[Source(self.SOURCE_TYPE, artist_data["@id"])] + name=artist_data["name"].strip(), + source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] )], lyrics_list=self._fetch_lyrics(soup=soup) ) diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 80efee8..2c2013c 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -3,6 +3,7 @@ import random from .config import main_settings DEBUG = True +DEBUG_LOGGIN = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False diff --git a/src/music_kraken/utils/string_processing.py b/src/music_kraken/utils/string_processing.py index e5dbb93..3c1cedc 100644 --- a/src/music_kraken/utils/string_processing.py +++ b/src/music_kraken/utils/string_processing.py @@ -1,4 +1,5 @@ -from typing import Tuple +from typing import Tuple, Union +from pathlib import Path from transliterate.exceptions import LanguageDetectionError from transliterate import translit @@ -25,20 +26,26 @@ def unify(string: str) -> str: return string.lower() -def fit_to_file_system(string: str) -> str: - string = string.strip() +def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]: + def fit_string(string: str) -> str: + if string == "/": + return "/" + string = string.strip() - while string[0] == ".": - if len(string) == 0: - return string + while string[0] == ".": + if len(string) == 0: + return string - string = string[1:] + string = string[1:] - string = string.replace("/", "_").replace("\\", "_") + string = string.replace("/", "_").replace("\\", "_") + string = sanitize_filename(string) + return string - string = sanitize_filename(string) - - return string + if isinstance(string, Path): + return Path(*(fit_string(part) for part in string.parts)) + else: + return fit_string(string) def clean_song_title(raw_song_title: str, artist_name: str) -> str: From a039643ba2704e845c65433f8fa6eaab74c2611d Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Wed, 13 Sep 2023 20:02:36 +0200 Subject: [PATCH 016/104] fix: calling the proper artist page --- src/actual_donwload.py | 3 ++- src/music_kraken/pages/abstract.py | 12 ++---------- src/music_kraken/pages/bandcamp.py | 2 +- src/music_kraken/pages/encyclopaedia_metallum.py | 6 ++---- src/music_kraken/utils/shared.py | 2 +- 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 3b575cc..5070293 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -43,8 +43,9 @@ if __name__ == "__main__": bandcamp_test = [ "s: #a Ghost Bath", + "3", "d: 0" ] - music_kraken.cli.download(genre="test", command_list=cross_download, process_metadata_anyway=True) \ No newline at end of file + music_kraken.cli.download(genre="test", command_list=bandcamp_test, process_metadata_anyway=True) \ No newline at end of file diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 99d56e9..c8c3973 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -24,6 +24,7 @@ from ..utils.enums.album import AlbumType from ..audio import write_metadata_to_target, correct_codec from ..utils.config import main_settings from ..utils.support_classes import Query, DownloadResult +from ..utils.string_processing import fit_to_file_system INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] @@ -53,17 +54,12 @@ class NamingDict(dict): return type(self)(super().copy(), self.object_mappings.copy()) def __getitem__(self, key: str) -> str: - return super().__getitem__(key) + return fit_to_file_system(super().__getitem__(key)) def default_value_for_name(self, name: str) -> str: return f'Various {name.replace("_", " ").title()}' def __missing__(self, key: str) -> str: - """ - TODO - add proper logging - """ - if "." not in key: if key not in self.CUSTOM_KEYS: return self.default_value_for_name(key) @@ -331,10 +327,6 @@ class Page: return Label() def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: - # print("downloading") - - # self.fetch_details(music_object, stop_at_level=1) - naming_dict: NamingDict = NamingDict({"genre": genre}) def fill_naming_objects(naming_music_object: DatabaseObject): diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index a179489..24eb37b 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -223,7 +223,7 @@ class Bandcamp(Page): def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist() - r = self.connection.get(source.url) + r = self.connection.get(_parse_artist_url(source.url)) if r is None: return artist diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 2025af6..41fbb22 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -92,7 +92,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al # Self Loather' soup = BeautifulSoup(album_html, 'html.parser') anchor = soup.find('a') - album_name = anchor.text + album_name = anchor.text.strip() album_url = anchor.get('href') album_id = album_url.split("/")[-1] @@ -304,9 +304,7 @@ class EncyclopaediaMetallum(Page): continue source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE)) - - print(source_list) - + return source_list def _parse_artist_attributes(self, artist_soup: BeautifulSoup) -> Artist: diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 2c2013c..bfe483e 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -2,7 +2,7 @@ import random from .config import main_settings -DEBUG = True +DEBUG = False DEBUG_LOGGIN = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False From 82ffdda6424822413baa1ff85a5ec8cd142869b3 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 14 Sep 2023 10:09:51 +0200 Subject: [PATCH 017/104] removed debug --- src/music_kraken/download/page_attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 014a984..f7bc619 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -15,7 +15,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, ALL_PAGES: Set[Type[Page]] = { EncyclopaediaMetallum, Musify, - # YoutubeMusic, + YoutubeMusic, Bandcamp } From 33f97662d7195d397cb662218432fcf8451300ae Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 14 Sep 2023 18:04:43 +0200 Subject: [PATCH 018/104] dsaf --- src/music_kraken/pages/encyclopaedia_metallum.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 41fbb22..289434c 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -462,10 +462,6 @@ class EncyclopaediaMetallum(Page): artist.notes = band_notes discography: List[Album] = self._fetch_artist_discography(artist_id) - if stop_at_level > 1: - for album in discography: - for source in album.source_collection.get_sources_from_page(self.SOURCE_TYPE): - album.merge(self._fetch_album_from_source(source, stop_at_level=stop_at_level-1)) artist.main_album_collection.extend(discography) return artist @@ -586,13 +582,7 @@ class EncyclopaediaMetallum(Page): soup = self.get_soup_from_response(r) - album = self._parse_album_attributes(soup, stop_at_level=stop_at_level) - - if stop_at_level > 1: - for song in album.song_collection: - for source in song.source_collection.get_sources_from_page(self.SOURCE_TYPE): - song.merge(self._fetch_song_from_source(source=source, stop_at_level=stop_at_level-1)) - + album = self._parse_album_attributes(soup, stop_at_level=stop_at_level) return album def _fetch_lyrics(self, song_id: str) -> Optional[Lyrics]: From ad4328dd11e65c99fc04d0f0ba5b4e2eeb000366 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Thu, 14 Sep 2023 23:35:37 +0200 Subject: [PATCH 019/104] feat: new attribute system --- documentation/objects.md | 2 +- src/music_kraken/objects/contact.py | 4 +- src/music_kraken/objects/lyrics.py | 4 +- src/music_kraken/objects/parents.py | 80 ++++++++++++++++++++++--- src/music_kraken/objects/song.py | 90 ++++++++++++++++++++++++----- src/music_kraken/objects/source.py | 4 +- src/music_kraken/objects/target.py | 4 +- src/music_kraken/pages/abstract.py | 6 +- 8 files changed, 158 insertions(+), 36 deletions(-) diff --git a/documentation/objects.md b/documentation/objects.md index 3891898..2b54dab 100644 --- a/documentation/objects.md +++ b/documentation/objects.md @@ -19,7 +19,7 @@ Additionally it provides an **Interface** to: ### DatabaseObject.merge() -To merge the data of two instances of the same type, the attributes defined in `DatabaseObject.COLLECTION_ATTRIBUTES` and `SIMPLE_ATTRIBUTES` are used. +To merge the data of two instances of the same type, the attributes defined in `DatabaseObject.COLLECTION_STRING_ATTRIBUTES` and `SIMPLE_STRING_ATTRIBUTES` are used. The simple attributes just get carried from the other instance, to the self instance. diff --git a/src/music_kraken/objects/contact.py b/src/music_kraken/objects/contact.py index cdad4da..2041297 100644 --- a/src/music_kraken/objects/contact.py +++ b/src/music_kraken/objects/contact.py @@ -5,8 +5,8 @@ from .parents import DatabaseObject class Contact(DatabaseObject): - COLLECTION_ATTRIBUTES = tuple() - SIMPLE_ATTRIBUTES = { + COLLECTION_STRING_ATTRIBUTES = tuple() + SIMPLE_STRING_ATTRIBUTES = { "contact_method": None, "value": None, } diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index 465b96d..bcd1c1e 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -8,8 +8,8 @@ from .formatted_text import FormattedText class Lyrics(DatabaseObject): - COLLECTION_ATTRIBUTES = ("source_collection",) - SIMPLE_ATTRIBUTES = { + COLLECTION_STRING_ATTRIBUTES = ("source_collection",) + SIMPLE_STRING_ATTRIBUTES = { "text": FormattedText(), "language": None } diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 7297b17..e0f629d 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -1,6 +1,7 @@ import random from collections import defaultdict -from typing import Optional, Dict, Tuple, List, Type +from typing import Optional, Dict, Tuple, List, Type, Generic, TypeVar, Any +from dataclasses import dataclass from .metadata import Metadata from .option import Options @@ -10,15 +11,46 @@ from ..utils.config import main_settings, logging_settings LOGGER = logging_settings["object_logger"] +T = TypeVar('T') + +@dataclass +class StaticAttribute(Generic[T]): + name: str + + default_value: Any = None + weight: float = 0 + + is_simple: bool = True + + is_collection: bool = False + is_downwards_collection: bool = False + is_upwards_collection: bool = False + + + +class Attribute(Generic[T]): + def __init__(self, database_object: "DatabaseObject", static_attribute: StaticAttribute) -> None: + self.database_object: DatabaseObject = database_object + self.static_attribute: StaticAttribute = static_attribute + + def get(self) -> T: + return self.database_object.__getattribute__(self.name) + + def set(self, value: T): + self.database_object.__setattr__(self.name, value) + + class DatabaseObject: - COLLECTION_ATTRIBUTES: tuple = tuple() - SIMPLE_ATTRIBUTES: dict = dict() + COLLECTION_STRING_ATTRIBUTES: tuple = tuple() + SIMPLE_STRING_ATTRIBUTES: dict = dict() # contains all collection attributes, which describe something "smaller" # e.g. album has songs, but not artist. - DOWNWARDS_COLLECTION_ATTRIBUTES: tuple = tuple() - UPWARDS_COLLECTION_ATTRIBUTES: tuple = tuple() + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: tuple = tuple() + UPWARDS_COLLECTION_STRING_ATTRIBUTES: tuple = tuple() + + STATIC_ATTRIBUTES: List[StaticAttribute] = list() def __init__(self, _id: int = None, dynamic: bool = False, **kwargs) -> None: self.automatic_id: bool = False @@ -33,13 +65,43 @@ class DatabaseObject: self.automatic_id = True # LOGGER.debug(f"Id for {type(self).__name__} isn't set. Setting to {_id}") + self._attributes: List[Attribute] = [] + self._simple_attribute_list: List[Attribute] = [] + self._collection_attributes: List[Attribute] = [] + self._downwards_collection_attributes: List[Attribute] = [] + self._upwards_collection_attributes: List[Attribute] = [] + + for static_attribute in self.STATIC_ATTRIBUTES: + attribute: Attribute = Attribute(self, static_attribute) + self._attributes.append(attribute) + + if static_attribute.is_simple: + self._simple_attribute_list.append(attribute) + else: + if static_attribute.is_collection: + self._collection_attributes.append(attribute) + if static_attribute.is_upwards_collection: + self._upwards_collection_attributes.append(attribute) + if static_attribute.is_downwards_collection: + self._downwards_collection_attributes.append(attribute) + + # The id can only be None, if the object is dynamic (self.dynamic = True) self.id: Optional[int] = _id self.dynamic = dynamic - self.build_version = -1 + @property + def upwards_collection(self) -> "Collection": + for attribute in self._upwards_collection_attributes: + yield attribute.get() + + @property + def downwards_collection(self) -> "Collection": + for attribute in self._downwards_collection_attributes: + yield attribute.get() + def __hash__(self): if self.dynamic: raise TypeError("Dynamic DatabaseObjects are unhashable.") @@ -89,10 +151,10 @@ class DatabaseObject: LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"") return - for collection in type(self).COLLECTION_ATTRIBUTES: + for collection in type(self).COLLECTION_STRING_ATTRIBUTES: getattr(self, collection).extend(getattr(other, collection)) - for simple_attribute, default_value in type(self).SIMPLE_ATTRIBUTES.items(): + for simple_attribute, default_value in type(self).SIMPLE_STRING_ATTRIBUTES.items(): if getattr(other, simple_attribute) == default_value: continue @@ -100,7 +162,7 @@ class DatabaseObject: setattr(self, simple_attribute, getattr(other, simple_attribute)) def strip_details(self): - for collection in type(self).DOWNWARDS_COLLECTION_ATTRIBUTES: + for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: getattr(self, collection).clear() @property diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 36cd77a..dae43ea 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -15,7 +15,7 @@ from .metadata import ( Metadata ) from .option import Options -from .parents import MainObject, DatabaseObject +from .parents import MainObject, DatabaseObject, StaticAttribute from .source import Source, SourceCollection from .target import Target from ..utils.string_processing import unify @@ -36,10 +36,10 @@ class Song(MainObject): tracksort, genre, source_list, target, lyrics_list, album, main_artist_list, and feature_artist_list. """ - COLLECTION_ATTRIBUTES = ( + COLLECTION_STRING_ATTRIBUTES = ( "lyrics_collection", "album_collection", "main_artist_collection", "feature_artist_collection", "source_collection") - SIMPLE_ATTRIBUTES = { + SIMPLE_STRING_ATTRIBUTES = { "title": None, "unified_title": None, "isrc": None, @@ -49,7 +49,23 @@ class Song(MainObject): "notes": FormattedText() } - UPWARDS_COLLECTION_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") + + STATIC_ATTRIBUTES = [ + StaticAttribute(name="title", weight=.5), + StaticAttribute(name="unified_title", weight=.3), + StaticAttribute(name="isrc", weight=1), + StaticAttribute(name="length"), + StaticAttribute(name="tracksort", default_value=0), + StaticAttribute(name="genre"), + StaticAttribute(name="notes", default_value=FormattedText()), + + StaticAttribute(name="source_collection", is_collection=True), + StaticAttribute(name="lyrics_collection", is_collection=True), + StaticAttribute(name="album_collection", is_collection=True, is_upwards_collection=True), + StaticAttribute(name="main_artist_collection", is_collection=True, is_upwards_collection=True), + StaticAttribute(name="feature_artist_collection", is_collection=True, is_upwards_collection=True) + ] def __init__( self, @@ -212,8 +228,8 @@ All objects dependent on Album class Album(MainObject): - COLLECTION_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection") - SIMPLE_ATTRIBUTES = { + COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection") + SIMPLE_STRING_ATTRIBUTES = { "title": None, "unified_title": None, "album_status": None, @@ -225,8 +241,25 @@ class Album(MainObject): "notes": FormattedText() } - DOWNWARDS_COLLECTION_ATTRIBUTES = ("song_collection", ) - UPWARDS_COLLECTION_ATTRIBUTES = ("artist_collection", "label_collection") + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") + + STATIC_ATTRIBUTES = [ + StaticAttribute(name="title", weight=.5), + StaticAttribute(name="unified_title", weight=.3), + StaticAttribute(name="language"), + StaticAttribute(name="barcode", weight=1), + StaticAttribute(name="albumsort"), + StaticAttribute(name="album_status"), + StaticAttribute(name="album_type", default_value=AlbumType.OTHER), + StaticAttribute(name="date", default_value=ID3Timestamp()), + StaticAttribute(name="notes", default_value=FormattedText()), + + StaticAttribute(name="source_collection", is_collection=True), + StaticAttribute(name="song_collection", is_collection=True, is_downwards_collection=True), + StaticAttribute(name="artist_collection", is_collection=True, is_upwards_collection=True), + StaticAttribute(name="label_collection", is_collection=True, is_upwards_collection=True), + ] def __init__( self, @@ -454,13 +487,13 @@ All objects dependent on Artist class Artist(MainObject): - COLLECTION_ATTRIBUTES = ( + COLLECTION_STRING_ATTRIBUTES = ( "feature_song_collection", "main_album_collection", "label_collection", "source_collection" ) - SIMPLE_ATTRIBUTES = { + SIMPLE_STRING_ATTRIBUTES = { "name": None, "unified_name": None, "country": None, @@ -471,8 +504,26 @@ class Artist(MainObject): "unformated_location": None, } - DOWNWARDS_COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection") - UPWARDS_COLLECTION_ATTRIBUTES = ("label_collection", ) + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", ) + + + STATIC_ATTRIBUTES = [ + StaticAttribute(name="name", weight=.5), + StaticAttribute(name="unified_name", weight=.3), + StaticAttribute(name="country"), + StaticAttribute(name="formed_in", default_value=ID3Timestamp()), + StaticAttribute(name="lyrical_themes", default_value=[]), + StaticAttribute(name="general_genre", default_value=""), + StaticAttribute(name="notes", default_value=FormattedText()), + StaticAttribute(name="unformated_location"), + + StaticAttribute(name="source_collection", is_collection=True), + StaticAttribute(name="contact_collection", is_collection=True), + StaticAttribute(name="feature_song_collection", is_collection=True, is_downwards_collection=True), + StaticAttribute(name="main_album_collection", is_collection=True, is_downwards_collection=True), + StaticAttribute(name="label_collection", is_collection=True, is_upwards_collection=True), + ] def __init__( self, @@ -713,14 +764,23 @@ Label class Label(MainObject): - COLLECTION_ATTRIBUTES = ("album_collection", "current_artist_collection") - SIMPLE_ATTRIBUTES = { + COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection") + SIMPLE_STRING_ATTRIBUTES = { "name": None, "unified_name": None, "notes": FormattedText() } - DOWNWARDS_COLLECTION_ATTRIBUTES = COLLECTION_ATTRIBUTES + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = COLLECTION_STRING_ATTRIBUTES + + STATIC_ATTRIBUTES = [ + StaticAttribute(name="name", weight=.5), + StaticAttribute(name="unified_name", weight=.3), + StaticAttribute(name="notes", default_value=FormattedText()), + + StaticAttribute(name="album_collection", is_collection=True, is_downwards_collection=True), + StaticAttribute(name="current_artist_collection", is_collection=True, is_downwards_collection=True), + ] def __init__( self, diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 38fd062..fd0dd0f 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -19,8 +19,8 @@ class Source(DatabaseObject): Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd") ``` """ - COLLECTION_ATTRIBUTES = tuple() - SIMPLE_ATTRIBUTES = { + COLLECTION_STRING_ATTRIBUTES = tuple() + SIMPLE_STRING_ATTRIBUTES = { "page_enum": None, "url": None, "referer_page": None, diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index fa06177..4faee32 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -22,11 +22,11 @@ class Target(DatabaseObject): ``` """ - SIMPLE_ATTRIBUTES = { + SIMPLE_STRING_ATTRIBUTES = { "_file": None, "_path": None } - COLLECTION_ATTRIBUTES = tuple() + COLLECTION_STRING_ATTRIBUTES = tuple() def __init__( self, diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index c8c3973..4995b99 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -303,7 +303,7 @@ class Page: if stop_at_level > 1: collection: Collection - for collection_str in music_object.DOWNWARDS_COLLECTION_ATTRIBUTES: + for collection_str in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: collection = music_object.__getattribute__(collection_str) for sub_element in collection: @@ -332,7 +332,7 @@ class Page: def fill_naming_objects(naming_music_object: DatabaseObject): nonlocal naming_dict - for collection_name in naming_music_object.UPWARDS_COLLECTION_ATTRIBUTES: + for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES: collection: Collection = getattr(naming_music_object, collection_name) if collection.empty: @@ -368,7 +368,7 @@ class Page: download_result: DownloadResult = DownloadResult() - for collection_name in music_object.DOWNWARDS_COLLECTION_ATTRIBUTES: + for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: collection: Collection = getattr(music_object, collection_name) sub_ordered_music_object: DatabaseObject From 0bf0754f6efd5d704b839f408c60fca743127ace Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Thu, 12 Oct 2023 19:24:35 +0200 Subject: [PATCH 020/104] try: ljdsfal --- src/create_custom_objects.py | 149 +++++++++++++++-------- src/music_kraken/__init__.py | 7 +- src/music_kraken/objects/collection.py | 61 +++++++++- src/music_kraken/objects/parents.py | 36 ++++-- src/music_kraken/objects/song.py | 52 ++++++-- src/music_kraken/pages/abstract.py | 12 +- src/music_kraken/utils/enums/__init__.py | 1 + src/music_kraken/utils/hooks.py | 29 +++++ 8 files changed, 258 insertions(+), 89 deletions(-) create mode 100644 src/music_kraken/utils/hooks.py diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 47017be..b3551b6 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -1,63 +1,104 @@ -from music_kraken import objects, recurse - -import pycountry +from music_kraken.objects import ( + Song, + Album, + Artist, + Label, + Source, + DatabaseObject +) +from music_kraken.utils.enums import SourcePages -song = objects.Song( - genre="HS Core", - title="Vein Deep in the Solution", - length=666, - isrc="US-S1Z-99-00001", - tracksort=2, - target=[ - objects.Target(file="song.mp3", path="example") - ], - lyrics_list=[ - objects.Lyrics(text="these are some depressive lyrics", language="en"), - objects.Lyrics(text="Dies sind depressive Lyrics", language="de") - ], - source_list=[ - objects.Source(objects.SourcePages.YOUTUBE, "https://youtu.be/dfnsdajlhkjhsd"), - objects.Source(objects.SourcePages.MUSIFY, "https://ln.topdf.de/Music-Kraken/") - ], - album_list=[ - objects.Album( - title="One Final Action", - date=objects.ID3Timestamp(year=1986, month=3, day=1), - language=pycountry.languages.get(alpha_2="en"), - label_list=[ - objects.Label(name="an album label") +only_smile = Artist( + name="Only Smile", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], + main_album_list=[ + Album( + title="Few words...", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], + song_list=[ + Song(title="Everything will be fine"), + Song(title="Only Smile"), + Song(title="Dear Diary"), + Song(title="Sad Story") ], - source_list=[ - objects.Source(objects.SourcePages.ENCYCLOPAEDIA_METALLUM, "https://www.metal-archives.com/albums/I%27m_in_a_Coffin/One_Final_Action/207614") - ] - ), - ], - main_artist_list=[ - objects.Artist( - name="I'm in a coffin", - source_list=[ - objects.Source( - objects.SourcePages.ENCYCLOPAEDIA_METALLUM, - "https://www.metal-archives.com/bands/I%27m_in_a_Coffin/127727" - ) - ], - label_list=[ - objects.Label(name="Depressive records") + artist_list=[ + Artist( + name="Only Smile", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], + main_album_list=[ + Album( + title="Few words...", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], + song_list=[ + Song(title="Everything will be fine"), + Song(title="Only Smile"), + Song(title="Dear Diary"), + Song(title="Sad Story") + ], + artist_list=[ + Artist( + name="Only Smile", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")] + ) + ] + ), + Album( + title="Your best friend", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] + ) + ] + ), + Artist( + name="Only Smile", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], + main_album_list=[ + Album( + title="Few words...", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], + song_list=[ + Song(title="Everything will be fine"), + Song(title="Only Smile"), + Song(title="Dear Diary"), + Song(title="Sad Story") + ], + artist_list=[ + Artist( + name="Only Smile", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")] + ) + ] + ), + Album( + title="Your best friend", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] + ) + ] + ) ] ), - objects.Artist(name="some_split_artist") - ], - feature_artist_list=[ - objects.Artist( - name="Ruffiction", - label_list=[ - objects.Label(name="Ruffiction Productions") - ] + Album( + title="Your best friend", + source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] ) - ], + ] ) -song.compile() -print(song.options) +objects_by_id = {} + +def add_to_objects_dump(db_obj: DatabaseObject): + objects_by_id[db_obj.id] = db_obj + + for collection in db_obj.all_collections: + for new_db_obj in collection: + if new_db_obj.id not in objects_by_id: + add_to_objects_dump(new_db_obj) + + +add_to_objects_dump(only_smile) + +for _id, _object in objects_by_id.items(): + print(_id, _object, sep=": ") + +print(only_smile) diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index 748a628..555b154 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -1,7 +1,6 @@ import logging - import gc -import musicbrainzngs +import sys from .utils.shared import DEBUG, DEBUG_LOGGIN from .utils.config import logging_settings, main_settings, read_config @@ -9,6 +8,10 @@ read_config() from . import cli +# I am SO sorry +print(sys.setrecursionlimit(500)) + + # configure logger default logging.basicConfig( level=logging_settings['log_level'] if not DEBUG_LOGGIN else logging.DEBUG, diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 5310211..4f50ff1 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -3,6 +3,11 @@ from collections import defaultdict from dataclasses import dataclass from .parents import DatabaseObject +from ..utils.hooks import HookEventTypes, Hooks, Event + + +class CollectionHooks(HookEventTypes): + APPEND_NEW = "append_new" T = TypeVar('T', bound=DatabaseObject) @@ -46,6 +51,8 @@ class Collection(Generic[T]): self._attribute_to_object_map: Dict[str, Dict[object, T]] = defaultdict(dict) self._used_ids: set = set() + self.hooks: Hooks = Hooks(self) + if data is not None: self.extend(data, merge_on_conflict=True) @@ -74,7 +81,7 @@ class Collection(Generic[T]): pass def append(self, element: T, merge_on_conflict: bool = True, - merge_into_existing: bool = True) -> AppendResult: + merge_into_existing: bool = True, no_hook: bool = False) -> AppendResult: """ :param element: :param merge_on_conflict: @@ -84,6 +91,10 @@ class Collection(Generic[T]): if element is None: return AppendResult(False, None, False) + for existing_element in self._data: + if element is existing_element: + return AppendResult(False, None, False) + # if the element type has been defined in the initializer it checks if the type matches if self.element_type is not None and not isinstance(element, self.element_type): raise TypeError(f"{type(element)} is not the set type {self.element_type}") @@ -117,18 +128,60 @@ class Collection(Generic[T]): self.map_element(element) return AppendResult(True, existing_object, False) + if not no_hook: + self.hooks.trigger_event(CollectionHooks.APPEND_NEW, new_object=element) self._data.append(element) self.map_element(element) return AppendResult(False, element, False) def extend(self, element_list: Iterable[T], merge_on_conflict: bool = True, - merge_into_existing: bool = True): + merge_into_existing: bool = True, no_hook: bool = False): + if element_list is None: + return + if len(element_list) <= 0: + return + if element_list is self: + return for element in element_list: - self.append(element, merge_on_conflict=merge_on_conflict, merge_into_existing=merge_into_existing) + self.append(element, merge_on_conflict=merge_on_conflict, merge_into_existing=merge_into_existing, no_hook=no_hook) + + def sync_collection(self, collection_attribute: str): + def on_append(event: Event, new_object: T, *args, **kwargs): + new_collection = new_object.__getattribute__(collection_attribute) + if self is new_collection: + return + + self.extend(new_object.__getattribute__(collection_attribute), no_hook=True) + new_object.__setattr__(collection_attribute, self) + + self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + + def sync_main_collection(self, main_collection: "Collection", collection_attribute: str): + def on_append(event: Event, new_object: T, *args, **kwargs): + new_collection = new_object.__getattribute__(collection_attribute) + if main_collection is new_collection: + return + + main_collection.extend(new_object.__getattribute__(collection_attribute), no_hook=True) + new_object.__setattr__(collection_attribute, main_collection) + + self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + + """ + def on_append(event: Event, new_object: T, *args, **kwargs): + new_collection: Collection = new_object.__getattribute__(collection_attribute) + if self is new_collection: + return + + self.extend(new_collection.shallow_list, no_hook=False) + new_object.__setattr__(collection_attribute, self) + + self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + """ def __iter__(self) -> Iterator[T]: - for element in self.shallow_list: + for element in self._data: yield element def __str__(self) -> str: diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index e0f629d..24642bf 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -11,32 +11,36 @@ from ..utils.config import main_settings, logging_settings LOGGER = logging_settings["object_logger"] -T = TypeVar('T') +P = TypeVar('P') @dataclass -class StaticAttribute(Generic[T]): +class StaticAttribute(Generic[P]): name: str default_value: Any = None weight: float = 0 - is_simple: bool = True - is_collection: bool = False is_downwards_collection: bool = False is_upwards_collection: bool = False -class Attribute(Generic[T]): + + +class Attribute(Generic[P]): def __init__(self, database_object: "DatabaseObject", static_attribute: StaticAttribute) -> None: self.database_object: DatabaseObject = database_object self.static_attribute: StaticAttribute = static_attribute - def get(self) -> T: + @property + def name(self) -> str: + return self.static_attribute.name + + def get(self) -> P: return self.database_object.__getattribute__(self.name) - def set(self, value: T): + def set(self, value: P): self.database_object.__setattr__(self.name, value) @@ -75,16 +79,15 @@ class DatabaseObject: attribute: Attribute = Attribute(self, static_attribute) self._attributes.append(attribute) - if static_attribute.is_simple: - self._simple_attribute_list.append(attribute) - else: + if static_attribute.is_collection: if static_attribute.is_collection: self._collection_attributes.append(attribute) if static_attribute.is_upwards_collection: self._upwards_collection_attributes.append(attribute) if static_attribute.is_downwards_collection: self._downwards_collection_attributes.append(attribute) - + else: + self._simple_attribute_list.append(attribute) # The id can only be None, if the object is dynamic (self.dynamic = True) self.id: Optional[int] = _id @@ -102,6 +105,11 @@ class DatabaseObject: for attribute in self._downwards_collection_attributes: yield attribute.get() + @property + def all_collections(self) -> "Collection": + for attribute in self._collection_attributes: + yield attribute.get() + def __hash__(self): if self.dynamic: raise TypeError("Dynamic DatabaseObjects are unhashable.") @@ -151,8 +159,10 @@ class DatabaseObject: LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"") return - for collection in type(self).COLLECTION_STRING_ATTRIBUTES: - getattr(self, collection).extend(getattr(other, collection)) + for collection in self._collection_attributes: + if hasattr(self, collection.name) and hasattr(other, collection.name): + if collection.get() is not getattr(other, collection.name): + collection.get().extend(getattr(other, collection.name)) for simple_attribute, default_value in type(self).SIMPLE_STRING_ATTRIBUTES.items(): if getattr(other, simple_attribute) == default_value: diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index dae43ea..ddc883f 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -5,7 +5,7 @@ from typing import List, Optional, Dict, Tuple, Type import pycountry from ..utils.enums.album import AlbumType, AlbumStatus -from .collection import Collection +from .collection import Collection, CollectionHooks from .formatted_text import FormattedText from .lyrics import Lyrics from .contact import Contact @@ -86,7 +86,7 @@ class Song(MainObject): notes: FormattedText = None, **kwargs ) -> None: - MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) + super().__init__(_id=_id, dynamic=dynamic, **kwargs) # attributes self.title: str = title self.unified_title: str = unified_title @@ -102,9 +102,23 @@ class Song(MainObject): self.source_collection: SourceCollection = SourceCollection(source_list) self.target_collection: Collection[Target] = Collection(data=target_list, element_type=Target) self.lyrics_collection: Collection[Lyrics] = Collection(data=lyrics_list, element_type=Lyrics) - self.album_collection: Collection[Album] = Collection(data=album_list, element_type=Album) + + # main_artist_collection = album.artist collection self.main_artist_collection: Collection[Artist] = Collection(data=main_artist_list, element_type=Artist) - self.feature_artist_collection: Collection[Artist] = Collection(data=feature_artist_list, element_type=Artist) + + # this album_collection equals no collection + self.album_collection: Collection[Album] = Collection(data=[], element_type=Album) + self.album_collection.sync_main_collection(self.main_artist_collection, "artist_collection") + self.album_collection.extend(album_list) + # self.album_collection.sync_collection("song_collection") + # self.album_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_album_append) + + # on feature_artist_collection append, append self to artist self + self.feature_artist_collection: Collection[Artist] = Collection(data=[], element_type=Artist) + def on_feature_artist_append(event, new_object: Artist, *args, **kwargs): + new_object.feature_song_collection.append(self, no_hook=True) + self.feature_artist_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_feature_artist_append) + self.feature_artist_collection.extend(feature_artist_list) def _build_recursive_structures(self, build_version: int, merge: bool): if build_version == self.build_version: @@ -307,8 +321,13 @@ class Album(MainObject): self.notes = notes or FormattedText() self.source_collection: SourceCollection = SourceCollection(source_list) - self.song_collection: Collection[Song] = Collection(data=song_list, element_type=Song) + self.artist_collection: Collection[Artist] = Collection(data=artist_list, element_type=Artist) + + self.song_collection: Collection[Song] = Collection(data=[], element_type=Song) + self.song_collection.sync_main_collection(self.artist_collection, "main_artist_collection") + self.song_collection.extend(song_list) + self.label_collection: Collection[Label] = Collection(data=label_list, element_type=Label) def _build_recursive_structures(self, build_version: int, merge: bool): @@ -565,15 +584,28 @@ class Artist(MainObject): self.lyrical_themes: List[str] = lyrical_themes or [] self.general_genre = general_genre + self.unformated_location: Optional[str] = unformated_location self.source_collection: SourceCollection = SourceCollection(source_list) - self.feature_song_collection: Collection[Song] = Collection(data=feature_song_list, element_type=Song) - self.main_album_collection: Collection[Album] = Collection(data=main_album_list, element_type=Album) - self.label_collection: Collection[Label] = Collection(data=label_list, element_type=Label) - self.contact_collection: Collection[Label] = Collection(data=contact_list, element_type=Contact) - self.unformated_location: Optional[str] = unformated_location + self.feature_song_collection: Collection[Song] = Collection(data=[], element_type=Song) + def on_feature_song_append(event, new_object: Song, *args, **kwargs): + new_object.feature_artist_collection.append(self, no_hook=True) + self.feature_song_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_feature_song_append) + self.feature_song_collection.extend(feature_song_list) + + self.main_album_collection: Collection[Album] = Collection(data=[], element_type=Album) + def on_album_append(event, new_object: Album, *args, **kwargs): + new_object.artist_collection.append(self, no_hook=True) + self.main_album_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_album_append) + self.main_album_collection.extend(main_album_list) + + self.label_collection: Collection[Label] = Collection(data=label_list, element_type=Label) + def on_label_append(event, new_object: Label, *args, **kwargs): + new_object.current_artist_collection.append(self, no_hook=True) + self.label_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_label_append) + def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): if object_type is Song: diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 4995b99..16d127c 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -140,6 +140,9 @@ def clean_object(dirty_object: DatabaseObject) -> DatabaseObject: Song: Collection(element_type=Song) } + if isinstance(dirty_object, Song): + return dirty_object + _clean_music_object(dirty_object, collections) return dirty_object @@ -153,7 +156,7 @@ def merge_together(old_object: DatabaseObject, new_object: DatabaseObject, do_co new_object = clean_object(new_object) old_object.merge(new_object) - if do_compile: + if do_compile and False: old_object.compile(merge_into=False) return old_object @@ -274,7 +277,7 @@ class Page: post_process=False )) - return merge_together(music_object, new_music_object, do_compile=post_process) + return music_object.merge(new_music_object) def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: obj_type = self.get_source_type(source) @@ -308,10 +311,7 @@ class Page: for sub_element in collection: sub_element.merge(self.fetch_details(sub_element, stop_at_level=stop_at_level-1, post_process=False)) - - if post_process and music_object: - return build_new_object(music_object) - + return music_object def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: diff --git a/src/music_kraken/utils/enums/__init__.py b/src/music_kraken/utils/enums/__init__.py index e69de29..b90f9aa 100644 --- a/src/music_kraken/utils/enums/__init__.py +++ b/src/music_kraken/utils/enums/__init__.py @@ -0,0 +1 @@ +from .source import SourcePages \ No newline at end of file diff --git a/src/music_kraken/utils/hooks.py b/src/music_kraken/utils/hooks.py new file mode 100644 index 0000000..e3cd954 --- /dev/null +++ b/src/music_kraken/utils/hooks.py @@ -0,0 +1,29 @@ +from typing import List, Iterable, Dict, TypeVar, Generic, Iterator, Any, Type +from enum import Enum +from dataclasses import dataclass +from collections import defaultdict + + +class HookEventTypes(Enum): + pass + + +@dataclass +class Event: + target: Any + + +class Hooks: + def __init__(self, target) -> None: + self.target = target + + self._callbacks: Dict[HookEventTypes, List[callable]] = defaultdict(list) + + def add_event_listener(self, event_type: HookEventTypes, callback: callable): + self._callbacks[event_type].append(callback) + + def trigger_event(self, event_type: HookEventTypes, *args, **kwargs): + event: Event = Event(target=self.target) + + for callback in self._callbacks[event_type]: + callback(event, *args, **kwargs) From 59d73ed07eb87e4d40b358821a4d8044b6639df3 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Thu, 12 Oct 2023 22:00:58 +0200 Subject: [PATCH 021/104] feat: layed out promising collection syncing --- requirements.txt | 1 + src/create_custom_objects.py | 47 ++++++++++- src/music_kraken/objects/new_collection.py | 93 ++++++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 src/music_kraken/objects/new_collection.py diff --git a/requirements.txt b/requirements.txt index 48443cf..0d644fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ regex~=2022.9.13 pyffmpeg~=2.4.2.18 ffmpeg-progress-yield~=0.7.8 pathvalidate~=2.5.2 +guppy3~=3.1.3 diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index b3551b6..306aa97 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -6,9 +6,10 @@ from music_kraken.objects import ( Source, DatabaseObject ) +from music_kraken.objects.new_collection import Collection from music_kraken.utils.enums import SourcePages - +""" only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], @@ -102,3 +103,47 @@ for _id, _object in objects_by_id.items(): print(_id, _object, sep=": ") print(only_smile) +""" + +c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) +c1 = Collection([Song(title="he"), Song(title="hi5")]) +c11 = Collection([Song(title="wow how ultra subby")]) +c2 = Collection([Song(title="heeee")]) + +b = Collection([Song(title="some b"), Song(title="other b")]) +b1 = Collection([Song(title="sub b")]) +b11 = Collection([Song(title="This shouldn't work")]) + +b1.contain_collection_inside(b11) + +b.contain_collection_inside(b1) +b.contain_collection_inside(c1) + + +c.contain_collection_inside(c1) +c.contain_collection_inside(c2) + +c1.contain_collection_inside(c11) +c1.contain_collection_inside(c11) + +print(c.data) +print(c1.data) + +c11.append(Song(title="after creation")) + +print() +print(c.data, len(c)) +print(c1.data) + +print() +print("c: ", c) +print("b: ", b) + +c.sync_with_other_collection(b) +print("synced: ") + +print("c: ", c) +print("b: ", b) + +print(c.data) +print(c._data) diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py new file mode 100644 index 0000000..c6fa1aa --- /dev/null +++ b/src/music_kraken/objects/new_collection.py @@ -0,0 +1,93 @@ +from typing import List, Iterable, Iterator, Optional, TypeVar, Generic +import guppy +from guppy.heapy import Path + +from .parents import DatabaseObject + + +T = TypeVar('T', bound=DatabaseObject) + + +hp = guppy.hpy() + +def _replace_all_refs(replace_with, replace): + """ + NO + I have a very good reason to use this here + DONT use this anywhere else... + + This replaces **ALL** references to replace with a reference to replace_with. + + https://benkurtovic.com/2015/01/28/python-object-replacement.html + """ + for path in hp.iso(replace).pathsin: + relation = path.path[1] + if isinstance(relation, Path.R_INDEXVAL): + path.src.theone[relation.r] = replace_with + + +class Collection(Generic[T]): + _data: List[T] + + shallow_list = property(fget=lambda self: self.data) + + def __init__(self, data: Optional[Iterable[T]]) -> None: + self._data = [] + self.contained_collections: List[Collection[T]] = [] + + self.extend(data) + + def append(self, __object: T): + self._data.append(__object) + + def extend(self, __iterable: Optional[Iterable[T]]): + if __iterable is None: + return + + for __object in __iterable: + self.append(__object) + + def sync_with_other_collection(self, equal_collection: "Collection"): + """ + If two collections always need to have the same values, this can be used. + + Internally: + 1. import the data from other to self + - _data + - contained_collections + 2. replace all refs from the other object, with refs from this object + """ + if equal_collection is self: + return + + # don't add the elements from the subelements from the other collection. + # this will be done in the next step. + self.extend(equal_collection._data) + # add all submodules + for equal_sub_collection in equal_collection.contained_collections: + self.contain_collection_inside(equal_sub_collection) + + # now the ugly part + # replace all refs of the other element with this one + _replace_all_refs(self, equal_collection) + + + def contain_collection_inside(self, sub_collection: "Collection"): + """ + This collection will ALWAYS contain everything from the passed in collection + """ + if sub_collection in self.contained_collections: + return + + self.contained_collections.append(sub_collection) + + @property + def data(self) -> List[T]: + return [*self._data, *(__object for collection in self.contained_collections for __object in collection.shallow_list)] + + def __len__(self) -> int: + return len(self._data) + sum(len(collection) for collection in self.contained_collections) + + def __iter__(self) -> Iterator[T]: + for element in self._data: + yield element \ No newline at end of file From 7725ebc306ac51aa9e42e0c7c6fbd6414be7af0e Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Thu, 12 Oct 2023 23:17:52 +0200 Subject: [PATCH 022/104] feat: cusom obj --- src/create_custom_objects.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 306aa97..e4cb12f 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -119,7 +119,6 @@ b1.contain_collection_inside(b11) b.contain_collection_inside(b1) b.contain_collection_inside(c1) - c.contain_collection_inside(c1) c.contain_collection_inside(c2) From a6cea55eb2efde2df14e6df11bd09ac8545abfc9 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 17 Oct 2023 22:29:55 +0200 Subject: [PATCH 023/104] feat: contains in collection --- src/music_kraken/objects/new_collection.py | 86 ++++++++++++++++------ src/music_kraken/objects/parents.py | 9 ++- src/music_kraken/utils/functions.py | 20 +++++ 3 files changed, 88 insertions(+), 27 deletions(-) diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py index c6fa1aa..58f672a 100644 --- a/src/music_kraken/objects/new_collection.py +++ b/src/music_kraken/objects/new_collection.py @@ -1,45 +1,85 @@ -from typing import List, Iterable, Iterator, Optional, TypeVar, Generic -import guppy -from guppy.heapy import Path +from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type +from collections import defaultdict from .parents import DatabaseObject +from ..utils.functions import replace_all_refs T = TypeVar('T', bound=DatabaseObject) -hp = guppy.hpy() - -def _replace_all_refs(replace_with, replace): - """ - NO - I have a very good reason to use this here - DONT use this anywhere else... - - This replaces **ALL** references to replace with a reference to replace_with. - - https://benkurtovic.com/2015/01/28/python-object-replacement.html - """ - for path in hp.iso(replace).pathsin: - relation = path.path[1] - if isinstance(relation, Path.R_INDEXVAL): - path.src.theone[relation.r] = replace_with - - class Collection(Generic[T]): _data: List[T] + _indexed_values: Dict[str, set] + _indexed_to_objects: Dict[any, list] + shallow_list = property(fget=lambda self: self.data) def __init__(self, data: Optional[Iterable[T]]) -> None: self._data = [] self.contained_collections: List[Collection[T]] = [] + + self._indexed_values = defaultdict(set) + self._indexed_to_objects = defaultdict(list) self.extend(data) - def append(self, __object: T): + def _map_element(self, __object: T): + for name, value in __object.indexing_values: + if value is None: + continue + + self._indexed_values[name].add(value) + self._indexed_to_objects[value].append(__object) + + def _unmap_element(self, __object: T): + for name, value in __object.indexing_values: + if value is None: + continue + if value not in self._indexed_values[name]: + continue + + try: + self._indexed_to_objects[value].remove(__object) + except ValueError: + continue + + if not len(self._indexed_to_objects[value]): + self._indexed_values[name].remove(value) + + def _contained_in_self(self, __object: T) -> bool: + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + return True + return False + + def _contained_in(self, __object: T) -> Optional["Collection"]: + if self._contained_in_self(__object): + return self + + for collection in self.contained_collections: + if collection._contained_in_self(__object): + return collection + + return None + + def contains(self, __object: T) -> bool: + return self._contained_in(__object) is not None + + + def _append(self, __object: T): + self._map_element(__object) self._data.append(__object) + def append(self, __object: Optional[T]): + if __object is None: + return + + self._append(__object) + def extend(self, __iterable: Optional[Iterable[T]]): if __iterable is None: return @@ -69,7 +109,7 @@ class Collection(Generic[T]): # now the ugly part # replace all refs of the other element with this one - _replace_all_refs(self, equal_collection) + replace_all_refs(self, equal_collection) def contain_collection_inside(self, sub_collection: "Collection"): diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 24642bf..808cff1 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -7,6 +7,7 @@ from .metadata import Metadata from .option import Options from ..utils.shared import HIGHEST_ID from ..utils.config import main_settings, logging_settings +from ..utils.functions import replace_all_refs LOGGER = logging_settings["object_logger"] @@ -25,9 +26,6 @@ class StaticAttribute(Generic[P]): is_upwards_collection: bool = False - - - class Attribute(Generic[P]): def __init__(self, database_object: "DatabaseObject", static_attribute: StaticAttribute) -> None: self.database_object: DatabaseObject = database_object @@ -148,7 +146,7 @@ class DatabaseObject: return list() - def merge(self, other, override: bool = False): + def merge(self, other, override: bool = False, replace_all_refs: bool = False): if other is None: return @@ -171,6 +169,9 @@ class DatabaseObject: if override or getattr(self, simple_attribute) == default_value: setattr(self, simple_attribute, getattr(other, simple_attribute)) + if replace_all_refs: + replace_all_refs(self, other) + def strip_details(self): for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: getattr(self, collection).clear() diff --git a/src/music_kraken/utils/functions.py b/src/music_kraken/utils/functions.py index f773213..7e1362e 100644 --- a/src/music_kraken/utils/functions.py +++ b/src/music_kraken/utils/functions.py @@ -1,5 +1,25 @@ import os from datetime import datetime +import guppy +from guppy.heapy import Path + + +hp = guppy.hpy() + +def replace_all_refs(replace_with, replace): + """ + NO + I have a very good reason to use this here + DONT use this anywhere else... + + This replaces **ALL** references to replace with a reference to replace_with. + + https://benkurtovic.com/2015/01/28/python-object-replacement.html + """ + for path in hp.iso(replace).pathsin: + relation = path.path[1] + if isinstance(relation, Path.R_INDEXVAL): + path.src.theone[relation.r] = replace_with def clear_console(): From 208d6943b4b0883dbaa43acfb3fe92db9df0cce7 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 17 Oct 2023 22:46:06 +0200 Subject: [PATCH 024/104] feat: realtime duplicate check in instance --- src/music_kraken/objects/new_collection.py | 39 ++++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py index 58f672a..5569855 100644 --- a/src/music_kraken/objects/new_collection.py +++ b/src/music_kraken/objects/new_collection.py @@ -25,13 +25,14 @@ class Collection(Generic[T]): self.extend(data) - def _map_element(self, __object: T): + def _map_element(self, __object: T, no_append: bool = True): for name, value in __object.indexing_values: if value is None: continue self._indexed_values[name].add(value) - self._indexed_to_objects[value].append(__object) + if not no_append: + self._indexed_to_objects[value].append(__object) def _unmap_element(self, __object: T): for name, value in __object.indexing_values: @@ -66,6 +67,33 @@ class Collection(Generic[T]): return None + + def _merge_in_self(self, __object: T): + """ + 1. find existing objects + 2. merge into existing object + 3. remap existing object + """ + existing_object: DatabaseObject = None + + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + existing_object = self._indexed_to_objects[value] + break + + if existing_object is None: + return None + + existing_object.merge(__object, replace_all_refs=True) + + if existing_object is not __object: + raise ValueError("This should NEVER happen. Merging doesn't work.") + + self._map_element(existing_object) + + def contains(self, __object: T) -> bool: return self._contained_in(__object) is not None @@ -77,8 +105,13 @@ class Collection(Generic[T]): def append(self, __object: Optional[T]): if __object is None: return + + exists_in_collection = self._contained_in(__object) - self._append(__object) + if exists_in_collection is None: + self._append(__object) + else: + exists_in_collection._merge_in_self(__object) def extend(self, __iterable: Optional[Iterable[T]]): if __iterable is None: From cae2ecffcbcfc5bee31f2deb0bb50af3ab30c0ad Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 23 Oct 2023 16:21:44 +0200 Subject: [PATCH 025/104] fix: merging and replacing instances --- src/music_kraken/cli/main_downloader.py | 3 ++- src/music_kraken/connection/connection.py | 2 +- src/music_kraken/download/page_attributes.py | 3 ++- src/music_kraken/objects/new_collection.py | 6 +++--- src/music_kraken/objects/parents.py | 6 +++--- src/music_kraken/pages/abstract.py | 3 ++- src/music_kraken/pages/bandcamp.py | 2 +- .../pages/encyclopaedia_metallum.py | 2 +- src/music_kraken/pages/musify.py | 3 ++- src/music_kraken/pages/preset.py | 3 ++- src/music_kraken/pages/youtube.py | 2 +- .../pages/youtube_music/super_youtube.py | 2 +- .../pages/youtube_music/youtube_music.py | 2 +- src/music_kraken/utils/functions.py | 20 ------------------- .../utils/support_classes/__init__.py | 2 -- 15 files changed, 22 insertions(+), 39 deletions(-) diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index 4c3a106..4c5ced2 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -8,7 +8,8 @@ from .options.first_config import initial_config from ..utils.config import write_config, main_settings from ..utils.regex import URL_PATTERN from ..utils.string_processing import fit_to_file_system -from ..utils.support_classes import Query, DownloadResult +from ..utils.support_classes.query import Query +from ..utils.support_classes.download_result import DownloadResult from ..utils.exception.download import UrlNotFoundException from ..download.results import Results, Option, PageResults from ..download.page_attributes import Pages diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index 44d54b7..3f294e9 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -9,7 +9,7 @@ from tqdm import tqdm from .rotating import RotatingProxy from ..utils.config import main_settings -from ..utils.support_classes import DownloadResult +from ..utils.support_classes.download_result import DownloadResult from ..objects import Target diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index f7bc619..8409660 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -5,7 +5,8 @@ from ..objects import DatabaseObject, Source from ..utils.config import youtube_settings from ..utils.enums.source import SourcePages -from ..utils.support_classes import Query, DownloadResult +from ..utils.support_classes.download_result import DownloadResult +from ..utils.support_classes.query import Query from ..utils.exception.download import UrlNotFoundException from ..utils.shared import DEBUG_PAGES diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py index 58f672a..785d9d3 100644 --- a/src/music_kraken/objects/new_collection.py +++ b/src/music_kraken/objects/new_collection.py @@ -2,13 +2,13 @@ from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, T from collections import defaultdict from .parents import DatabaseObject -from ..utils.functions import replace_all_refs +from ..utils.support_classes.hacking import MetaClass T = TypeVar('T', bound=DatabaseObject) -class Collection(Generic[T]): +class Collection(Generic[T], metaclass=MetaClass): _data: List[T] _indexed_values: Dict[str, set] @@ -109,7 +109,7 @@ class Collection(Generic[T]): # now the ugly part # replace all refs of the other element with this one - replace_all_refs(self, equal_collection) + self.merge(equal_collection) def contain_collection_inside(self, sub_collection: "Collection"): diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 808cff1..f3cd41c 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -7,7 +7,7 @@ from .metadata import Metadata from .option import Options from ..utils.shared import HIGHEST_ID from ..utils.config import main_settings, logging_settings -from ..utils.functions import replace_all_refs +from ..utils.support_classes.hacking import MetaClass LOGGER = logging_settings["object_logger"] @@ -43,7 +43,7 @@ class Attribute(Generic[P]): -class DatabaseObject: +class DatabaseObject(metaclass=MetaClass): COLLECTION_STRING_ATTRIBUTES: tuple = tuple() SIMPLE_STRING_ATTRIBUTES: dict = dict() @@ -170,7 +170,7 @@ class DatabaseObject: setattr(self, simple_attribute, getattr(other, simple_attribute)) if replace_all_refs: - replace_all_refs(self, other) + self.merge(other) def strip_details(self): for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 16d127c..bffc15c 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -23,7 +23,8 @@ from ..utils.enums.source import SourcePages from ..utils.enums.album import AlbumType from ..audio import write_metadata_to_target, correct_codec from ..utils.config import main_settings -from ..utils.support_classes import Query, DownloadResult +from ..utils.support_classes.query import Query +from ..utils.support_classes.download_result import DownloadResult from ..utils.string_processing import fit_to_file_system diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 24eb37b..4cfb706 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -21,7 +21,7 @@ from ..objects import ( FormattedText ) from ..connection import Connection -from ..utils.support_classes import DownloadResult +from ..utils.support_classes.download_result import DownloadResult from ..utils.config import main_settings, logging_settings from ..utils.shared import DEBUG if DEBUG: diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 289434c..d37b013 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -9,7 +9,7 @@ from ..utils.config import logging_settings from .abstract import Page from ..utils.enums.source import SourcePages from ..utils.enums.album import AlbumType -from ..utils.support_classes import Query +from ..utils.support_classes.query import Query from ..objects import ( Lyrics, Artist, diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index ddb1a7f..44b61d9 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -25,7 +25,8 @@ from ..objects import ( ) from ..utils.config import logging_settings from ..utils import string_processing, shared -from ..utils.support_classes import DownloadResult, Query +from ..utils.support_classes.query import Query +from ..utils.support_classes.download_result import DownloadResult """ https://musify.club/artist/ghost-bath-280348?_pjax=#bodyContent diff --git a/src/music_kraken/pages/preset.py b/src/music_kraken/pages/preset.py index 5e940ba..0755089 100644 --- a/src/music_kraken/pages/preset.py +++ b/src/music_kraken/pages/preset.py @@ -15,7 +15,8 @@ from ..objects import ( Target ) from ..connection import Connection -from ..utils.support_classes import DownloadResult +from ..utils.support_classes.query import Query +from ..utils.support_classes.download_result import DownloadResult class Preset(Page): # CHANGE diff --git a/src/music_kraken/pages/youtube.py b/src/music_kraken/pages/youtube.py index 4c420f0..4ce6633 100644 --- a/src/music_kraken/pages/youtube.py +++ b/src/music_kraken/pages/youtube.py @@ -20,7 +20,7 @@ from ..objects import ( ) from ..connection import Connection from ..utils.string_processing import clean_song_title -from ..utils.support_classes import DownloadResult +from ..utils.support_classes.download_result import DownloadResult from ..utils.config import youtube_settings, main_settings, logging_settings from .youtube_music.super_youtube import SuperYouTube, YouTubeUrl, get_invidious_url, YouTubeUrlType diff --git a/src/music_kraken/pages/youtube_music/super_youtube.py b/src/music_kraken/pages/youtube_music/super_youtube.py index a60d416..676a06e 100644 --- a/src/music_kraken/pages/youtube_music/super_youtube.py +++ b/src/music_kraken/pages/youtube_music/super_youtube.py @@ -19,7 +19,7 @@ from ...objects import ( ID3Timestamp ) from ...connection import Connection -from ...utils.support_classes import DownloadResult +from ...utils.support_classes.download_result import DownloadResult from ...utils.config import youtube_settings, logging_settings, main_settings diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 851cfc3..7eb139d 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -25,7 +25,7 @@ from ...objects import ( Target ) from ...connection import Connection -from ...utils.support_classes import DownloadResult +from ...utils.support_classes.download_result import DownloadResult from ._list_render import parse_renderer from .super_youtube import SuperYouTube diff --git a/src/music_kraken/utils/functions.py b/src/music_kraken/utils/functions.py index 7e1362e..f773213 100644 --- a/src/music_kraken/utils/functions.py +++ b/src/music_kraken/utils/functions.py @@ -1,25 +1,5 @@ import os from datetime import datetime -import guppy -from guppy.heapy import Path - - -hp = guppy.hpy() - -def replace_all_refs(replace_with, replace): - """ - NO - I have a very good reason to use this here - DONT use this anywhere else... - - This replaces **ALL** references to replace with a reference to replace_with. - - https://benkurtovic.com/2015/01/28/python-object-replacement.html - """ - for path in hp.iso(replace).pathsin: - relation = path.path[1] - if isinstance(relation, Path.R_INDEXVAL): - path.src.theone[relation.r] = replace_with def clear_console(): diff --git a/src/music_kraken/utils/support_classes/__init__.py b/src/music_kraken/utils/support_classes/__init__.py index 4a04f30..84edf54 100644 --- a/src/music_kraken/utils/support_classes/__init__.py +++ b/src/music_kraken/utils/support_classes/__init__.py @@ -1,3 +1 @@ -from .download_result import DownloadResult -from .query import Query from .thread_classes import EndThread, FinishedSearch From 0db00f934b5880da9a2ce837d5a365eacb644e59 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 23 Oct 2023 16:24:19 +0200 Subject: [PATCH 026/104] fix: merging and replacing instances --- .../utils/support_classes/hacking.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/music_kraken/utils/support_classes/hacking.py diff --git a/src/music_kraken/utils/support_classes/hacking.py b/src/music_kraken/utils/support_classes/hacking.py new file mode 100644 index 0000000..1cb1d78 --- /dev/null +++ b/src/music_kraken/utils/support_classes/hacking.py @@ -0,0 +1,69 @@ +from types import FunctionType +from functools import wraps + +from typing import Dict + +class Lake: + def __init__(self): + self.redirects: Dict[int, int] = {} + self.id_to_object: Dict[int, object] = {} + + def get_real_object(self, db_object: object) -> object: + def _get_real_id(_id: int) -> int: + if _id in self.redirects: + return _get_real_id(self.redirects[_id]) + return _id + + _id = _get_real_id(id(db_object)) + if _id not in self.id_to_object: + self.add(db_object) + + return self.id_to_object[_id] + + def add(self, db_object: object): + self.id_to_object[id(db_object)] = db_object + + def override(self, to_override: object, new_db_object: object): + self.redirects[id(to_override)] = id(new_db_object) + del self.id_to_object[id(to_override)] + + +lake = Lake() + + +def wrapper(method): + @wraps(method) + def wrapped(*args, **kwargs): + if len(args) >= 0 and method.__name__ != "__init__": + _self = lake.get_real_object(args[0]) + args = (_self, *args[1:]) + + return method(*args, **kwargs) + return wrapped + + + +class BaseClass: + def merge(self, to_replace): + lake.override(to_replace, self) + + +class MetaClass(type): + def __new__(meta, classname, bases, classDict): + bases = (*bases, BaseClass) + newClassDict = {} + + for attributeName, attribute in classDict.items(): + if isinstance(attribute, FunctionType) and attributeName not in ("__new__", "__init__"): + attribute = wrapper(attribute) + newClassDict[attributeName] = attribute + + for key, value in object.__dict__.items( ): + if hasattr( value, '__call__' ) and value not in newClassDict and key not in ("__new__", "__repr__", "__init__"): + newClassDict[key] = wrapper(value) + + new_instance = type.__new__(meta, classname, bases, newClassDict) + + lake.add(new_instance) + + return new_instance \ No newline at end of file From b1ac14f2f278fc1d39d476e50d307514ccf23c43 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 24 Oct 2023 11:44:00 +0200 Subject: [PATCH 027/104] feat: start --- src/create_custom_objects.py | 2 +- src/music_kraken/objects/collection.py | 308 +++++++----------- src/music_kraken/objects/new_collection.py | 133 -------- src/music_kraken/objects/parents.py | 2 + src/music_kraken/objects/song.py | 79 +++-- .../utils/support_classes/hacking.py | 4 +- 6 files changed, 168 insertions(+), 360 deletions(-) delete mode 100644 src/music_kraken/objects/new_collection.py diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index e4cb12f..7521c35 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -107,7 +107,7 @@ print(only_smile) c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) -c11 = Collection([Song(title="wow how ultra subby")]) +c11 = Collection([Song(title="wow how ultra subby"),Song(title="wow how ultra subby")]) c2 = Collection([Song(title="heeee")]) b = Collection([Song(title="some b"), Song(title="other b")]) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 4f50ff1..48c671e 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -1,221 +1,151 @@ -from typing import List, Iterable, Dict, TypeVar, Generic, Iterator +from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type from collections import defaultdict -from dataclasses import dataclass from .parents import DatabaseObject -from ..utils.hooks import HookEventTypes, Hooks, Event - - -class CollectionHooks(HookEventTypes): - APPEND_NEW = "append_new" +from ..utils.support_classes.hacking import MetaClass T = TypeVar('T', bound=DatabaseObject) -@dataclass -class AppendResult: - was_in_collection: bool - current_element: DatabaseObject - was_the_same: bool - - -class Collection(Generic[T]): - """ - This a class for the iterables - like tracklist or discography - """ +class Collection(Generic[T], metaclass=MetaClass): _data: List[T] - _by_url: dict - _by_attribute: dict + _indexed_values: Dict[str, set] + _indexed_to_objects: Dict[any, list] - def __init__(self, data: List[T] = None, element_type=None, *args, **kwargs) -> None: - # Attribute needs to point to - self.element_type = element_type + shallow_list = property(fget=lambda self: self.data) - self._data: List[T] = list() + def __init__( + self, data: Optional[Iterable[T]], + sync_on_append: Dict[str, Collection] = None, + contain_given_in_attribute: Dict[str, Collection] = None, + contain_attribute_in_given: Dict[str, Collection] = None, + append_object_to_attribute: Dict[str, DatabaseObject] = None + ) -> None: + self._data = [] + self.upper_collections: List[Collection[T]] = [] + self.contained_collections: List[Collection[T]] = [] - """ - example of attribute_to_object_map - the song objects are references pointing to objects - in _data + # List of collection attributes that should be modified on append + # Key: collection attribute (str) of appended element + # Value: main collection to sync to + self.sync_on_append: Dict[str, Collection] = sync_on_append or {} + self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} + self.contain_attribute_in_given: Dict[str, Collection] = contain_given_in_attribute or {} + self.append_object_to_attribute: Dict[str, DatabaseObject] = append_object_to_attribute or {} + + self.contain_self_on_append: List[str] = [] + + self._indexed_values = defaultdict(set) + self._indexed_to_objects = defaultdict(list) - ```python - { - 'id': {323: song_1, 563: song_2, 666: song_3}, - 'url': {'www.song_2.com': song_2} - } - ``` - """ - self._attribute_to_object_map: Dict[str, Dict[object, T]] = defaultdict(dict) - self._used_ids: set = set() + self.extend(data) - self.hooks: Hooks = Hooks(self) - - if data is not None: - self.extend(data, merge_on_conflict=True) - - def sort(self, reverse: bool = False, **kwargs): - self._data.sort(reverse=reverse, **kwargs) - - def map_element(self, element: T): - for name, value in element.indexing_values: + def _map_element(self, __object: T): + for name, value in __object.indexing_values: if value is None: continue - self._attribute_to_object_map[name][value] = element + self._indexed_values[name].add(value) + self._indexed_to_objects[value].append(__object) - self._used_ids.add(element.id) - - def unmap_element(self, element: T): - for name, value in element.indexing_values: + def _unmap_element(self, __object: T): + for name, value in __object.indexing_values: if value is None: continue + if value not in self._indexed_values[name]: + continue + + try: + self._indexed_to_objects[value].remove(__object) + except ValueError: + continue - if value in self._attribute_to_object_map[name]: - if element is self._attribute_to_object_map[name][value]: - try: - self._attribute_to_object_map[name].pop(value) - except KeyError: - pass + if not len(self._indexed_to_objects[value]): + self._indexed_values[name].remove(value) - def append(self, element: T, merge_on_conflict: bool = True, - merge_into_existing: bool = True, no_hook: bool = False) -> AppendResult: - """ - :param element: - :param merge_on_conflict: - :param merge_into_existing: - :return did_not_exist: - """ - if element is None: - return AppendResult(False, None, False) + def _contained_in_self(self, __object: T) -> bool: + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + return True + return False - for existing_element in self._data: - if element is existing_element: - return AppendResult(False, None, False) - - # if the element type has been defined in the initializer it checks if the type matches - if self.element_type is not None and not isinstance(element, self.element_type): - raise TypeError(f"{type(element)} is not the set type {self.element_type}") + def _contained_in(self, __object: T) -> Optional["Collection"]: + if self._contained_in_self(__object): + return self - # return if the same instance of the object is in the list - for existing in self._data: - if element is existing: - return AppendResult(True, element, True) - - for name, value in element.indexing_values: - if value in self._attribute_to_object_map[name]: - existing_object = self._attribute_to_object_map[name][value] - - if not merge_on_conflict: - return AppendResult(True, existing_object, False) - - # if the object does already exist - # thus merging and don't add it afterward - if merge_into_existing: - existing_object.merge(element) - # in case any relevant data has been added (e.g. it remaps the old object) - self.map_element(existing_object) - return AppendResult(True, existing_object, False) - - element.merge(existing_object) - - exists_at = self._data.index(existing_object) - self._data[exists_at] = element - - self.unmap_element(existing_object) - self.map_element(element) - return AppendResult(True, existing_object, False) - - if not no_hook: - self.hooks.trigger_event(CollectionHooks.APPEND_NEW, new_object=element) - self._data.append(element) - self.map_element(element) - - return AppendResult(False, element, False) - - def extend(self, element_list: Iterable[T], merge_on_conflict: bool = True, - merge_into_existing: bool = True, no_hook: bool = False): - if element_list is None: - return - if len(element_list) <= 0: - return - if element_list is self: - return - for element in element_list: - self.append(element, merge_on_conflict=merge_on_conflict, merge_into_existing=merge_into_existing, no_hook=no_hook) - - def sync_collection(self, collection_attribute: str): - def on_append(event: Event, new_object: T, *args, **kwargs): - new_collection = new_object.__getattribute__(collection_attribute) - if self is new_collection: - return - - self.extend(new_object.__getattribute__(collection_attribute), no_hook=True) - new_object.__setattr__(collection_attribute, self) - - self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) - - def sync_main_collection(self, main_collection: "Collection", collection_attribute: str): - def on_append(event: Event, new_object: T, *args, **kwargs): - new_collection = new_object.__getattribute__(collection_attribute) - if main_collection is new_collection: - return + for collection in self.contained_collections: + if collection._contained_in_self(__object): + return collection + + return None - main_collection.extend(new_object.__getattribute__(collection_attribute), no_hook=True) - new_object.__setattr__(collection_attribute, main_collection) + def contains(self, __object: T) -> bool: + return self._contained_in(__object) is not None - self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) - """ - def on_append(event: Event, new_object: T, *args, **kwargs): - new_collection: Collection = new_object.__getattribute__(collection_attribute) - if self is new_collection: - return + def _append(self, __object: T): + self._map_element(__object) + self._data.append(__object) + + def append(self, __object: Optional[T]): + if __object is None: + return + + self._append(__object) + + def extend(self, __iterable: Optional[Iterable[T]]): + if __iterable is None: + return + + for __object in __iterable: + self.append(__object) + + def sync_with_other_collection(self, equal_collection: "Collection"): + """ + If two collections always need to have the same values, this can be used. + + Internally: + 1. import the data from other to self + - _data + - contained_collections + 2. replace all refs from the other object, with refs from this object + """ + if equal_collection is self: + return + + # don't add the elements from the subelements from the other collection. + # this will be done in the next step. + self.extend(equal_collection._data) + # add all submodules + for equal_sub_collection in equal_collection.contained_collections: + self.contain_collection_inside(equal_sub_collection) + + # now the ugly part + # replace all refs of the other element with this one + self.merge(equal_collection) + + + def contain_collection_inside(self, sub_collection: "Collection"): + """ + This collection will ALWAYS contain everything from the passed in collection + """ + if sub_collection in self.contained_collections: + return + + self.contained_collections.append(sub_collection) + sub_collection.upper_collections.append(self) + + @property + def data(self) -> List[T]: + return [*self._data, *(__object for collection in self.contained_collections for __object in collection.shallow_list)] - self.extend(new_collection.shallow_list, no_hook=False) - new_object.__setattr__(collection_attribute, self) - - self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) - """ + def __len__(self) -> int: + return len(self._data) + sum(len(collection) for collection in self.contained_collections) def __iter__(self) -> Iterator[T]: for element in self._data: - yield element - - def __str__(self) -> str: - return "\n".join([f"{str(j).zfill(2)}: {i.__repr__()}" for j, i in enumerate(self._data)]) - - def __len__(self) -> int: - return len(self._data) - - def __getitem__(self, key) -> T: - if type(key) != int: - return ValueError("key needs to be an integer") - - return self._data[key] - - def __setitem__(self, key, value: T): - if type(key) != int: - return ValueError("key needs to be an integer") - - old_item = self._data[key] - self.unmap_element(old_item) - self.map_element(value) - - self._data[key] = value - - @property - def shallow_list(self) -> List[T]: - """ - returns a shallow copy of the data list - """ - return self._data.copy() - - @property - def empty(self) -> bool: - return len(self._data) == 0 - - def clear(self): - self.__init__(element_type=self.element_type) + yield element \ No newline at end of file diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py deleted file mode 100644 index 785d9d3..0000000 --- a/src/music_kraken/objects/new_collection.py +++ /dev/null @@ -1,133 +0,0 @@ -from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type -from collections import defaultdict - -from .parents import DatabaseObject -from ..utils.support_classes.hacking import MetaClass - - -T = TypeVar('T', bound=DatabaseObject) - - -class Collection(Generic[T], metaclass=MetaClass): - _data: List[T] - - _indexed_values: Dict[str, set] - _indexed_to_objects: Dict[any, list] - - shallow_list = property(fget=lambda self: self.data) - - def __init__(self, data: Optional[Iterable[T]]) -> None: - self._data = [] - self.contained_collections: List[Collection[T]] = [] - - self._indexed_values = defaultdict(set) - self._indexed_to_objects = defaultdict(list) - - self.extend(data) - - def _map_element(self, __object: T): - for name, value in __object.indexing_values: - if value is None: - continue - - self._indexed_values[name].add(value) - self._indexed_to_objects[value].append(__object) - - def _unmap_element(self, __object: T): - for name, value in __object.indexing_values: - if value is None: - continue - if value not in self._indexed_values[name]: - continue - - try: - self._indexed_to_objects[value].remove(__object) - except ValueError: - continue - - if not len(self._indexed_to_objects[value]): - self._indexed_values[name].remove(value) - - def _contained_in_self(self, __object: T) -> bool: - for name, value in __object.indexing_values: - if value is None: - continue - if value in self._indexed_values[name]: - return True - return False - - def _contained_in(self, __object: T) -> Optional["Collection"]: - if self._contained_in_self(__object): - return self - - for collection in self.contained_collections: - if collection._contained_in_self(__object): - return collection - - return None - - def contains(self, __object: T) -> bool: - return self._contained_in(__object) is not None - - - def _append(self, __object: T): - self._map_element(__object) - self._data.append(__object) - - def append(self, __object: Optional[T]): - if __object is None: - return - - self._append(__object) - - def extend(self, __iterable: Optional[Iterable[T]]): - if __iterable is None: - return - - for __object in __iterable: - self.append(__object) - - def sync_with_other_collection(self, equal_collection: "Collection"): - """ - If two collections always need to have the same values, this can be used. - - Internally: - 1. import the data from other to self - - _data - - contained_collections - 2. replace all refs from the other object, with refs from this object - """ - if equal_collection is self: - return - - # don't add the elements from the subelements from the other collection. - # this will be done in the next step. - self.extend(equal_collection._data) - # add all submodules - for equal_sub_collection in equal_collection.contained_collections: - self.contain_collection_inside(equal_sub_collection) - - # now the ugly part - # replace all refs of the other element with this one - self.merge(equal_collection) - - - def contain_collection_inside(self, sub_collection: "Collection"): - """ - This collection will ALWAYS contain everything from the passed in collection - """ - if sub_collection in self.contained_collections: - return - - self.contained_collections.append(sub_collection) - - @property - def data(self) -> List[T]: - return [*self._data, *(__object for collection in self.contained_collections for __object in collection.shallow_list)] - - def __len__(self) -> int: - return len(self._data) + sum(len(collection) for collection in self.contained_collections) - - def __iter__(self) -> Iterator[T]: - for element in self._data: - yield element \ No newline at end of file diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index f3cd41c..c65d2dd 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -147,6 +147,8 @@ class DatabaseObject(metaclass=MetaClass): return list() def merge(self, other, override: bool = False, replace_all_refs: bool = False): + print("merge") + if other is None: return diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index ddc883f..85464df 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -5,7 +5,7 @@ from typing import List, Optional, Dict, Tuple, Type import pycountry from ..utils.enums.album import AlbumType, AlbumStatus -from .collection import Collection, CollectionHooks +from .collection import Collection from .formatted_text import FormattedText from .lyrics import Lyrics from .contact import Contact @@ -100,25 +100,29 @@ class Song(MainObject): self.notes: FormattedText = notes or FormattedText() self.source_collection: SourceCollection = SourceCollection(source_list) - self.target_collection: Collection[Target] = Collection(data=target_list, element_type=Target) - self.lyrics_collection: Collection[Lyrics] = Collection(data=lyrics_list, element_type=Lyrics) + self.target_collection: Collection[Target] = Collection(data=target_list) + self.lyrics_collection: Collection[Lyrics] = Collection(data=lyrics_list) # main_artist_collection = album.artist collection - self.main_artist_collection: Collection[Artist] = Collection(data=main_artist_list, element_type=Artist) + self.main_artist_collection: Collection[Artist] = Collection(data=[]) # this album_collection equals no collection - self.album_collection: Collection[Album] = Collection(data=[], element_type=Album) - self.album_collection.sync_main_collection(self.main_artist_collection, "artist_collection") - self.album_collection.extend(album_list) - # self.album_collection.sync_collection("song_collection") - # self.album_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_album_append) + self.album_collection: Collection[Album] = Collection(data=album_list, + contain_given_in_attribute={ + "artist_collection": self.main_artist_collection + }, append_object_to_attribute={ + "song_collection": self + }) - # on feature_artist_collection append, append self to artist self - self.feature_artist_collection: Collection[Artist] = Collection(data=[], element_type=Artist) - def on_feature_artist_append(event, new_object: Artist, *args, **kwargs): - new_object.feature_song_collection.append(self, no_hook=True) - self.feature_artist_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_feature_artist_append) - self.feature_artist_collection.extend(feature_artist_list) + self.main_artist_collection.contain_given_in_attribute = {"main_album_collection": self.album_collection} + self.main_artist_collection.extend(main_artist_list) + + self.feature_artist_collection: Collection[Artist] = Collection( + data=feature_artist_list, + append_object_to_attribute={ + "feature_song_collection": self + } + ) def _build_recursive_structures(self, build_version: int, merge: bool): if build_version == self.build_version: @@ -322,13 +326,16 @@ class Album(MainObject): self.source_collection: SourceCollection = SourceCollection(source_list) - self.artist_collection: Collection[Artist] = Collection(data=artist_list, element_type=Artist) + self.artist_collection: Collection[Artist] = Collection(data=artist_list) - self.song_collection: Collection[Song] = Collection(data=[], element_type=Song) - self.song_collection.sync_main_collection(self.artist_collection, "main_artist_collection") - self.song_collection.extend(song_list) + self.song_collection: Collection[Song] = Collection( + data=song_list, + contain_attribute_in_given={ + "main_artist_collection": self.artist_collection + } + ) - self.label_collection: Collection[Label] = Collection(data=label_list, element_type=Label) + self.label_collection: Collection[Label] = Collection(data=label_list) def _build_recursive_structures(self, build_version: int, merge: bool): if build_version == self.build_version: @@ -589,22 +596,26 @@ class Artist(MainObject): self.source_collection: SourceCollection = SourceCollection(source_list) self.contact_collection: Collection[Label] = Collection(data=contact_list, element_type=Contact) - self.feature_song_collection: Collection[Song] = Collection(data=[], element_type=Song) - def on_feature_song_append(event, new_object: Song, *args, **kwargs): - new_object.feature_artist_collection.append(self, no_hook=True) - self.feature_song_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_feature_song_append) - self.feature_song_collection.extend(feature_song_list) + self.feature_song_collection: Collection[Song] = Collection( + data=feature_song_list, + append_object_to_attribute={ + "feature_artist_collection": self + } + ) - self.main_album_collection: Collection[Album] = Collection(data=[], element_type=Album) - def on_album_append(event, new_object: Album, *args, **kwargs): - new_object.artist_collection.append(self, no_hook=True) - self.main_album_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_album_append) - self.main_album_collection.extend(main_album_list) + self.main_album_collection: Collection[Album] = Collection( + data=main_album_list, + append_object_to_attribute={ + "artist_collection": self + } + ) - self.label_collection: Collection[Label] = Collection(data=label_list, element_type=Label) - def on_label_append(event, new_object: Label, *args, **kwargs): - new_object.current_artist_collection.append(self, no_hook=True) - self.label_collection.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_label_append) + self.label_collection: Collection[Label] = Collection( + data=label_list, + append_object_to_attribute={ + "current_artist_collection": self + } + ) def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): diff --git a/src/music_kraken/utils/support_classes/hacking.py b/src/music_kraken/utils/support_classes/hacking.py index 1cb1d78..a5e7851 100644 --- a/src/music_kraken/utils/support_classes/hacking.py +++ b/src/music_kraken/utils/support_classes/hacking.py @@ -10,9 +10,7 @@ class Lake: def get_real_object(self, db_object: object) -> object: def _get_real_id(_id: int) -> int: - if _id in self.redirects: - return _get_real_id(self.redirects[_id]) - return _id + return self.redirects.get(_id, _id) _id = _get_real_id(id(db_object)) if _id not in self.id_to_object: From af8a4d29a8b55501586414db68c047abfc9699d1 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 24 Oct 2023 13:31:43 +0200 Subject: [PATCH 028/104] feat: layed out callvacks --- src/music_kraken/objects/old_collection.py | 221 +++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 src/music_kraken/objects/old_collection.py diff --git a/src/music_kraken/objects/old_collection.py b/src/music_kraken/objects/old_collection.py new file mode 100644 index 0000000..4f50ff1 --- /dev/null +++ b/src/music_kraken/objects/old_collection.py @@ -0,0 +1,221 @@ +from typing import List, Iterable, Dict, TypeVar, Generic, Iterator +from collections import defaultdict +from dataclasses import dataclass + +from .parents import DatabaseObject +from ..utils.hooks import HookEventTypes, Hooks, Event + + +class CollectionHooks(HookEventTypes): + APPEND_NEW = "append_new" + + +T = TypeVar('T', bound=DatabaseObject) + + +@dataclass +class AppendResult: + was_in_collection: bool + current_element: DatabaseObject + was_the_same: bool + + +class Collection(Generic[T]): + """ + This a class for the iterables + like tracklist or discography + """ + _data: List[T] + + _by_url: dict + _by_attribute: dict + + def __init__(self, data: List[T] = None, element_type=None, *args, **kwargs) -> None: + # Attribute needs to point to + self.element_type = element_type + + self._data: List[T] = list() + + """ + example of attribute_to_object_map + the song objects are references pointing to objects + in _data + + ```python + { + 'id': {323: song_1, 563: song_2, 666: song_3}, + 'url': {'www.song_2.com': song_2} + } + ``` + """ + self._attribute_to_object_map: Dict[str, Dict[object, T]] = defaultdict(dict) + self._used_ids: set = set() + + self.hooks: Hooks = Hooks(self) + + if data is not None: + self.extend(data, merge_on_conflict=True) + + def sort(self, reverse: bool = False, **kwargs): + self._data.sort(reverse=reverse, **kwargs) + + def map_element(self, element: T): + for name, value in element.indexing_values: + if value is None: + continue + + self._attribute_to_object_map[name][value] = element + + self._used_ids.add(element.id) + + def unmap_element(self, element: T): + for name, value in element.indexing_values: + if value is None: + continue + + if value in self._attribute_to_object_map[name]: + if element is self._attribute_to_object_map[name][value]: + try: + self._attribute_to_object_map[name].pop(value) + except KeyError: + pass + + def append(self, element: T, merge_on_conflict: bool = True, + merge_into_existing: bool = True, no_hook: bool = False) -> AppendResult: + """ + :param element: + :param merge_on_conflict: + :param merge_into_existing: + :return did_not_exist: + """ + if element is None: + return AppendResult(False, None, False) + + for existing_element in self._data: + if element is existing_element: + return AppendResult(False, None, False) + + # if the element type has been defined in the initializer it checks if the type matches + if self.element_type is not None and not isinstance(element, self.element_type): + raise TypeError(f"{type(element)} is not the set type {self.element_type}") + + # return if the same instance of the object is in the list + for existing in self._data: + if element is existing: + return AppendResult(True, element, True) + + for name, value in element.indexing_values: + if value in self._attribute_to_object_map[name]: + existing_object = self._attribute_to_object_map[name][value] + + if not merge_on_conflict: + return AppendResult(True, existing_object, False) + + # if the object does already exist + # thus merging and don't add it afterward + if merge_into_existing: + existing_object.merge(element) + # in case any relevant data has been added (e.g. it remaps the old object) + self.map_element(existing_object) + return AppendResult(True, existing_object, False) + + element.merge(existing_object) + + exists_at = self._data.index(existing_object) + self._data[exists_at] = element + + self.unmap_element(existing_object) + self.map_element(element) + return AppendResult(True, existing_object, False) + + if not no_hook: + self.hooks.trigger_event(CollectionHooks.APPEND_NEW, new_object=element) + self._data.append(element) + self.map_element(element) + + return AppendResult(False, element, False) + + def extend(self, element_list: Iterable[T], merge_on_conflict: bool = True, + merge_into_existing: bool = True, no_hook: bool = False): + if element_list is None: + return + if len(element_list) <= 0: + return + if element_list is self: + return + for element in element_list: + self.append(element, merge_on_conflict=merge_on_conflict, merge_into_existing=merge_into_existing, no_hook=no_hook) + + def sync_collection(self, collection_attribute: str): + def on_append(event: Event, new_object: T, *args, **kwargs): + new_collection = new_object.__getattribute__(collection_attribute) + if self is new_collection: + return + + self.extend(new_object.__getattribute__(collection_attribute), no_hook=True) + new_object.__setattr__(collection_attribute, self) + + self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + + def sync_main_collection(self, main_collection: "Collection", collection_attribute: str): + def on_append(event: Event, new_object: T, *args, **kwargs): + new_collection = new_object.__getattribute__(collection_attribute) + if main_collection is new_collection: + return + + main_collection.extend(new_object.__getattribute__(collection_attribute), no_hook=True) + new_object.__setattr__(collection_attribute, main_collection) + + self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + + """ + def on_append(event: Event, new_object: T, *args, **kwargs): + new_collection: Collection = new_object.__getattribute__(collection_attribute) + if self is new_collection: + return + + self.extend(new_collection.shallow_list, no_hook=False) + new_object.__setattr__(collection_attribute, self) + + self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + """ + + def __iter__(self) -> Iterator[T]: + for element in self._data: + yield element + + def __str__(self) -> str: + return "\n".join([f"{str(j).zfill(2)}: {i.__repr__()}" for j, i in enumerate(self._data)]) + + def __len__(self) -> int: + return len(self._data) + + def __getitem__(self, key) -> T: + if type(key) != int: + return ValueError("key needs to be an integer") + + return self._data[key] + + def __setitem__(self, key, value: T): + if type(key) != int: + return ValueError("key needs to be an integer") + + old_item = self._data[key] + self.unmap_element(old_item) + self.map_element(value) + + self._data[key] = value + + @property + def shallow_list(self) -> List[T]: + """ + returns a shallow copy of the data list + """ + return self._data.copy() + + @property + def empty(self) -> bool: + return len(self._data) == 0 + + def clear(self): + self.__init__(element_type=self.element_type) From 92258e4dce625707a73b7202e02d9f037fdc5eb8 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 24 Oct 2023 13:32:26 +0200 Subject: [PATCH 029/104] feat: implement proper merging for collections --- src/create_custom_objects.py | 7 ++++++- src/music_kraken/__init__.py | 4 ---- src/music_kraken/objects/new_collection.py | 12 +++++++----- src/music_kraken/objects/parents.py | 6 +----- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index e4cb12f..5eddb2d 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -107,7 +107,7 @@ print(only_smile) c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) -c11 = Collection([Song(title="wow how ultra subby")]) +c11 = Collection([Song(title="wow how ultra subby", isrc="hiii")]) c2 = Collection([Song(title="heeee")]) b = Collection([Song(title="some b"), Song(title="other b")]) @@ -130,9 +130,14 @@ print(c1.data) c11.append(Song(title="after creation")) +other_song = Song(title="has same isrc", isrc="hiii", genre="hssss") +print(c.contains(other_song)) +c.append(other_song) + print() print(c.data, len(c)) print(c1.data) +print([obj.genre for obj in c.data]) print() print("c: ", c) diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index 555b154..a803b48 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -8,10 +8,6 @@ read_config() from . import cli -# I am SO sorry -print(sys.setrecursionlimit(500)) - - # configure logger default logging.basicConfig( level=logging_settings['log_level'] if not DEBUG_LOGGIN else logging.DEBUG, diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py index 5569855..3b2565a 100644 --- a/src/music_kraken/objects/new_collection.py +++ b/src/music_kraken/objects/new_collection.py @@ -25,7 +25,7 @@ class Collection(Generic[T]): self.extend(data) - def _map_element(self, __object: T, no_append: bool = True): + def _map_element(self, __object: T, no_append: bool = False): for name, value in __object.indexing_values: if value is None: continue @@ -62,8 +62,7 @@ class Collection(Generic[T]): return self for collection in self.contained_collections: - if collection._contained_in_self(__object): - return collection + return collection._contained_in(__object) return None @@ -80,13 +79,16 @@ class Collection(Generic[T]): if value is None: continue if value in self._indexed_values[name]: - existing_object = self._indexed_to_objects[value] + existing_object = self._indexed_to_objects[value][0] break if existing_object is None: return None - existing_object.merge(__object, replace_all_refs=True) + existing_object.merge(__object) + replace_all_refs(existing_object, __object) + + print(existing_object, __object) if existing_object is not __object: raise ValueError("This should NEVER happen. Merging doesn't work.") diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 808cff1..c3d8b1e 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -7,7 +7,6 @@ from .metadata import Metadata from .option import Options from ..utils.shared import HIGHEST_ID from ..utils.config import main_settings, logging_settings -from ..utils.functions import replace_all_refs LOGGER = logging_settings["object_logger"] @@ -146,7 +145,7 @@ class DatabaseObject: return list() - def merge(self, other, override: bool = False, replace_all_refs: bool = False): + def merge(self, other, override: bool = False): if other is None: return @@ -169,9 +168,6 @@ class DatabaseObject: if override or getattr(self, simple_attribute) == default_value: setattr(self, simple_attribute, getattr(other, simple_attribute)) - if replace_all_refs: - replace_all_refs(self, other) - def strip_details(self): for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: getattr(self, collection).clear() From 11c2917dadeedfb7bbf627481fc9ce03e868887f Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 24 Oct 2023 13:46:52 +0200 Subject: [PATCH 030/104] fix: merging works again --- src/create_custom_objects.py | 8 +- src/music_kraken/objects/collection.py | 41 ++++- src/music_kraken/objects/new_collection.py | 168 --------------------- src/music_kraken/objects/parents.py | 2 +- src/music_kraken/objects/source.py | 2 +- 5 files changed, 43 insertions(+), 178 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 5eddb2d..a29e5a8 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -6,7 +6,7 @@ from music_kraken.objects import ( Source, DatabaseObject ) -from music_kraken.objects.new_collection import Collection +from music_kraken.objects.collection import Collection from music_kraken.utils.enums import SourcePages """ @@ -128,11 +128,13 @@ c1.contain_collection_inside(c11) print(c.data) print(c1.data) -c11.append(Song(title="after creation")) +c.append(Song(title="after creation")) other_song = Song(title="has same isrc", isrc="hiii", genre="hssss") print(c.contains(other_song)) -c.append(other_song) +c11.append(other_song) +print(other_song) + print() print(c.data, len(c)) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 48c671e..c73ecdb 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -18,9 +18,9 @@ class Collection(Generic[T], metaclass=MetaClass): def __init__( self, data: Optional[Iterable[T]], - sync_on_append: Dict[str, Collection] = None, - contain_given_in_attribute: Dict[str, Collection] = None, - contain_attribute_in_given: Dict[str, Collection] = None, + sync_on_append: Dict[str, "Collection"] = None, + contain_given_in_attribute: Dict[str, "Collection"] = None, + contain_attribute_in_given: Dict[str, "Collection"] = None, append_object_to_attribute: Dict[str, DatabaseObject] = None ) -> None: self._data = [] @@ -32,7 +32,7 @@ class Collection(Generic[T], metaclass=MetaClass): # Value: main collection to sync to self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} - self.contain_attribute_in_given: Dict[str, Collection] = contain_given_in_attribute or {} + self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} self.append_object_to_attribute: Dict[str, DatabaseObject] = append_object_to_attribute or {} self.contain_self_on_append: List[str] = [] @@ -83,6 +83,32 @@ class Collection(Generic[T], metaclass=MetaClass): return None + def _merge_in_self(self, __object: T): + """ + 1. find existing objects + 2. merge into existing object + 3. remap existing object + """ + existing_object: DatabaseObject = None + + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + existing_object = self._indexed_to_objects[value][0] + break + + if existing_object is None: + return None + + existing_object.merge(__object, replace_all_refs=True) + + # just a check if it really worked + if existing_object.id != __object.id: + raise ValueError("This should NEVER happen. Merging doesn't work.") + + self._map_element(existing_object) + def contains(self, __object: T) -> bool: return self._contained_in(__object) is not None @@ -94,8 +120,13 @@ class Collection(Generic[T], metaclass=MetaClass): def append(self, __object: Optional[T]): if __object is None: return + + exists_in_collection = self._contained_in(__object) - self._append(__object) + if exists_in_collection is None: + self._append(__object) + else: + exists_in_collection._merge_in_self(__object) def extend(self, __iterable: Optional[Iterable[T]]): if __iterable is None: diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py index 3b2565a..e69de29 100644 --- a/src/music_kraken/objects/new_collection.py +++ b/src/music_kraken/objects/new_collection.py @@ -1,168 +0,0 @@ -from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type -from collections import defaultdict - -from .parents import DatabaseObject -from ..utils.functions import replace_all_refs - - -T = TypeVar('T', bound=DatabaseObject) - - -class Collection(Generic[T]): - _data: List[T] - - _indexed_values: Dict[str, set] - _indexed_to_objects: Dict[any, list] - - shallow_list = property(fget=lambda self: self.data) - - def __init__(self, data: Optional[Iterable[T]]) -> None: - self._data = [] - self.contained_collections: List[Collection[T]] = [] - - self._indexed_values = defaultdict(set) - self._indexed_to_objects = defaultdict(list) - - self.extend(data) - - def _map_element(self, __object: T, no_append: bool = False): - for name, value in __object.indexing_values: - if value is None: - continue - - self._indexed_values[name].add(value) - if not no_append: - self._indexed_to_objects[value].append(__object) - - def _unmap_element(self, __object: T): - for name, value in __object.indexing_values: - if value is None: - continue - if value not in self._indexed_values[name]: - continue - - try: - self._indexed_to_objects[value].remove(__object) - except ValueError: - continue - - if not len(self._indexed_to_objects[value]): - self._indexed_values[name].remove(value) - - def _contained_in_self(self, __object: T) -> bool: - for name, value in __object.indexing_values: - if value is None: - continue - if value in self._indexed_values[name]: - return True - return False - - def _contained_in(self, __object: T) -> Optional["Collection"]: - if self._contained_in_self(__object): - return self - - for collection in self.contained_collections: - return collection._contained_in(__object) - - return None - - - def _merge_in_self(self, __object: T): - """ - 1. find existing objects - 2. merge into existing object - 3. remap existing object - """ - existing_object: DatabaseObject = None - - for name, value in __object.indexing_values: - if value is None: - continue - if value in self._indexed_values[name]: - existing_object = self._indexed_to_objects[value][0] - break - - if existing_object is None: - return None - - existing_object.merge(__object) - replace_all_refs(existing_object, __object) - - print(existing_object, __object) - - if existing_object is not __object: - raise ValueError("This should NEVER happen. Merging doesn't work.") - - self._map_element(existing_object) - - - def contains(self, __object: T) -> bool: - return self._contained_in(__object) is not None - - - def _append(self, __object: T): - self._map_element(__object) - self._data.append(__object) - - def append(self, __object: Optional[T]): - if __object is None: - return - - exists_in_collection = self._contained_in(__object) - - if exists_in_collection is None: - self._append(__object) - else: - exists_in_collection._merge_in_self(__object) - - def extend(self, __iterable: Optional[Iterable[T]]): - if __iterable is None: - return - - for __object in __iterable: - self.append(__object) - - def sync_with_other_collection(self, equal_collection: "Collection"): - """ - If two collections always need to have the same values, this can be used. - - Internally: - 1. import the data from other to self - - _data - - contained_collections - 2. replace all refs from the other object, with refs from this object - """ - if equal_collection is self: - return - - # don't add the elements from the subelements from the other collection. - # this will be done in the next step. - self.extend(equal_collection._data) - # add all submodules - for equal_sub_collection in equal_collection.contained_collections: - self.contain_collection_inside(equal_sub_collection) - - # now the ugly part - # replace all refs of the other element with this one - replace_all_refs(self, equal_collection) - - - def contain_collection_inside(self, sub_collection: "Collection"): - """ - This collection will ALWAYS contain everything from the passed in collection - """ - if sub_collection in self.contained_collections: - return - - self.contained_collections.append(sub_collection) - - @property - def data(self) -> List[T]: - return [*self._data, *(__object for collection in self.contained_collections for __object in collection.shallow_list)] - - def __len__(self) -> int: - return len(self._data) + sum(len(collection) for collection in self.contained_collections) - - def __iter__(self) -> Iterator[T]: - for element in self._data: - yield element \ No newline at end of file diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index c65d2dd..36bb3a7 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -172,7 +172,7 @@ class DatabaseObject(metaclass=MetaClass): setattr(self, simple_attribute, getattr(other, simple_attribute)) if replace_all_refs: - self.merge(other) + super().merge(other) def strip_details(self): for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index fd0dd0f..a4e489a 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -124,7 +124,7 @@ class SourceCollection(Collection): def __init__(self, source_list: List[Source]): self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list) - super().__init__(data=source_list, element_type=Source) + super().__init__(data=source_list) def map_element(self, source: Source): super().map_element(source) From b34e9be52ae4b1ed99569d22ba94dc2304821352 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 24 Oct 2023 14:53:29 +0200 Subject: [PATCH 031/104] feat: huge conceptual improvements to merging --- src/create_custom_objects.py | 6 +-- src/music_kraken/objects/collection.py | 67 +++++++++++++++++++++----- src/music_kraken/objects/parents.py | 7 +-- src/music_kraken/objects/song.py | 2 +- 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index a29e5a8..3d954c2 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -9,7 +9,7 @@ from music_kraken.objects import ( from music_kraken.objects.collection import Collection from music_kraken.utils.enums import SourcePages -""" + only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], @@ -103,7 +103,7 @@ for _id, _object in objects_by_id.items(): print(_id, _object, sep=": ") print(only_smile) -""" + c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) @@ -139,7 +139,7 @@ print(other_song) print() print(c.data, len(c)) print(c1.data) -print([obj.genre for obj in c.data]) +print([(obj.genre or "various") + ":" + obj.title for obj in c.data]) print() print("c: ", c) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index c73ecdb..372de80 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -72,16 +72,52 @@ class Collection(Generic[T], metaclass=MetaClass): if value in self._indexed_values[name]: return True return False + + def _get_root_collections(self) -> List["Collection"]: + if not len(self.upper_collections): + return [self] + + root_collections = [] + for upper_collection in self.upper_collections: + root_collections.extend(upper_collection._get_root_collections()) + return root_collections + + @property + def _is_root(self) -> bool: + return len(self.upper_collections) <= 0 + + def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List["Collection"]: + results = [] - def _contained_in(self, __object: T) -> Optional["Collection"]: if self._contained_in_self(__object): - return self + return [self] for collection in self.contained_collections: - if collection._contained_in_self(__object): - return collection + results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) + if break_at_first: + return results + + return results + + def _get_parents_of_multiple_contained_children(self, __object: T): + results = [] + if len(self.contained_collections) < 2 or self._contained_in_self(__object): + return results + + count = 0 + + for collection in self.contained_collections: + sub_results = collection._get_parents_of_multiple_contained_children(__object) - return None + if len(sub_results) > 0: + count += 1 + results.extend(sub_results) + + if count >= 2: + results.append(self) + + return results + def _merge_in_self(self, __object: T): """ @@ -110,23 +146,31 @@ class Collection(Generic[T], metaclass=MetaClass): self._map_element(existing_object) def contains(self, __object: T) -> bool: - return self._contained_in(__object) is not None - + return len(self._contained_in_sub(__object)) > 0 def _append(self, __object: T): self._map_element(__object) self._data.append(__object) - def append(self, __object: Optional[T]): + def append(self, __object: Optional[T], already_is_parent: bool = False): if __object is None: return - exists_in_collection = self._contained_in(__object) + exists_in_collection = self._contained_in_sub(__object) + if len(exists_in_collection) and self is exists_in_collection[0]: + # assuming that the object already is contained in the correct collections + if not already_is_parent: + self._merge_in_self(__object) + return - if exists_in_collection is None: + if not len(exists_in_collection): self._append(__object) else: - exists_in_collection._merge_in_self(__object) + exists_in_collection[0]._merge_in_self(__object) + + if not already_is_parent or not self._is_root: + for parent_collection in self._get_parents_of_multiple_contained_children(__object): + parent_collection.append(__object, already_is_parent=True) def extend(self, __iterable: Optional[Iterable[T]]): if __iterable is None: @@ -135,6 +179,7 @@ class Collection(Generic[T], metaclass=MetaClass): for __object in __iterable: self.append(__object) + def sync_with_other_collection(self, equal_collection: "Collection"): """ If two collections always need to have the same values, this can be used. diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 36bb3a7..d6d4a09 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -1,3 +1,4 @@ +from __future__ import annotations import random from collections import defaultdict from typing import Optional, Dict, Tuple, List, Type, Generic, TypeVar, Any @@ -94,17 +95,17 @@ class DatabaseObject(metaclass=MetaClass): self.build_version = -1 @property - def upwards_collection(self) -> "Collection": + def upwards_collection(self) -> Collection: for attribute in self._upwards_collection_attributes: yield attribute.get() @property - def downwards_collection(self) -> "Collection": + def downwards_collection(self) -> Collection: for attribute in self._downwards_collection_attributes: yield attribute.get() @property - def all_collections(self) -> "Collection": + def all_collections(self) -> Collection: for attribute in self._collection_attributes: yield attribute.get() diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 85464df..d7b1cef 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -594,7 +594,7 @@ class Artist(MainObject): self.unformated_location: Optional[str] = unformated_location self.source_collection: SourceCollection = SourceCollection(source_list) - self.contact_collection: Collection[Label] = Collection(data=contact_list, element_type=Contact) + self.contact_collection: Collection[Label] = Collection(data=contact_list) self.feature_song_collection: Collection[Song] = Collection( data=feature_song_list, From 0ec1a162bee260647647301bad496421e6cec3f0 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:41:42 +0200 Subject: [PATCH 032/104] feat: marginally improved hacking --- src/create_custom_objects.py | 3 +- src/music_kraken/__init__.py | 3 + src/music_kraken/objects/collection.py | 55 ++++++++++++++----- src/music_kraken/objects/parents.py | 4 +- .../utils/support_classes/hacking.py | 26 ++++----- 5 files changed, 59 insertions(+), 32 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 3d954c2..a61da6c 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -104,7 +104,7 @@ for _id, _object in objects_by_id.items(): print(only_smile) - +""" c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) c11 = Collection([Song(title="wow how ultra subby", isrc="hiii")]) @@ -153,3 +153,4 @@ print("b: ", b) print(c.data) print(c._data) +""" \ No newline at end of file diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index a803b48..49c35c7 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -7,6 +7,9 @@ from .utils.config import logging_settings, main_settings, read_config read_config() from . import cli +if True: + import sys + sys.setrecursionlimit(100) # configure logger default logging.basicConfig( diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 372de80..f105de9 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -23,6 +23,7 @@ class Collection(Generic[T], metaclass=MetaClass): contain_attribute_in_given: Dict[str, "Collection"] = None, append_object_to_attribute: Dict[str, DatabaseObject] = None ) -> None: + self._contains_ids = set() self._data = [] self.upper_collections: List[Collection[T]] = [] self.contained_collections: List[Collection[T]] = [] @@ -42,7 +43,9 @@ class Collection(Generic[T], metaclass=MetaClass): self.extend(data) - def _map_element(self, __object: T): + def _map_element(self, __object: T, from_map: bool = False): + self._contains_ids.add(__object.id) + for name, value in __object.indexing_values: if value is None: continue @@ -50,7 +53,19 @@ class Collection(Generic[T], metaclass=MetaClass): self._indexed_values[name].add(value) self._indexed_to_objects[value].append(__object) + if not from_map: + for attribute, new_object in self.contain_given_in_attribute.items(): + __object.__getattribute__(attribute).contain_collection_inside(new_object) + + for attribute, new_object in self.contain_given_in_attribute.items(): + new_object.contain_collection_inside(__object.__getattribute__(attribute)) + + for attribute, new_object in self.append_object_to_attribute.items(): + __object.__getattribute__(attribute).append(new_object, from_map = True) + def _unmap_element(self, __object: T): + self._contains_ids.remove(__object.id) + for name, value in __object.indexing_values: if value is None: continue @@ -117,14 +132,16 @@ class Collection(Generic[T], metaclass=MetaClass): results.append(self) return results - - - def _merge_in_self(self, __object: T): + + def _merge_in_self(self, __object: T, from_map: bool = False): """ 1. find existing objects 2. merge into existing object 3. remap existing object """ + if __object.id in self._contains_ids: + return + existing_object: DatabaseObject = None for name, value in __object.indexing_values: @@ -132,45 +149,53 @@ class Collection(Generic[T], metaclass=MetaClass): continue if value in self._indexed_values[name]: existing_object = self._indexed_to_objects[value][0] - break + if existing_object == __object: + return None + else: + break if existing_object is None: return None - + existing_object.merge(__object, replace_all_refs=True) # just a check if it really worked if existing_object.id != __object.id: raise ValueError("This should NEVER happen. Merging doesn't work.") - self._map_element(existing_object) + self._map_element(existing_object, from_map = from_map) def contains(self, __object: T) -> bool: return len(self._contained_in_sub(__object)) > 0 - def _append(self, __object: T): - self._map_element(__object) + def _append(self, __object: T, from_map: bool = False): + for attribute, to_sync_with in self.sync_on_append.items(): + to_sync_with.sync_with_other_collection(__object.__getattribute__(attribute)) + + self._map_element(__object, from_map=from_map) self._data.append(__object) - def append(self, __object: Optional[T], already_is_parent: bool = False): + def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): if __object is None: return + if __object.id in self._contains_ids: + return exists_in_collection = self._contained_in_sub(__object) if len(exists_in_collection) and self is exists_in_collection[0]: # assuming that the object already is contained in the correct collections if not already_is_parent: - self._merge_in_self(__object) + self._merge_in_self(__object, from_map = from_map) return if not len(exists_in_collection): - self._append(__object) + self._append(__object, from_map=from_map) else: - exists_in_collection[0]._merge_in_self(__object) + exists_in_collection[0]._merge_in_self(__object, from_map = from_map) if not already_is_parent or not self._is_root: for parent_collection in self._get_parents_of_multiple_contained_children(__object): - parent_collection.append(__object, already_is_parent=True) + parent_collection.append(__object, already_is_parent=True, from_map=from_map) def extend(self, __iterable: Optional[Iterable[T]]): if __iterable is None: @@ -202,7 +227,7 @@ class Collection(Generic[T], metaclass=MetaClass): # now the ugly part # replace all refs of the other element with this one - self.merge(equal_collection) + self._risky_merge(equal_collection) def contain_collection_inside(self, sub_collection: "Collection"): diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index d6d4a09..9431db2 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -153,7 +153,7 @@ class DatabaseObject(metaclass=MetaClass): if other is None: return - if self is other: + if self.id == other.id: return if not isinstance(other, type(self)): @@ -173,7 +173,7 @@ class DatabaseObject(metaclass=MetaClass): setattr(self, simple_attribute, getattr(other, simple_attribute)) if replace_all_refs: - super().merge(other) + self._risky_merge(other) def strip_details(self): for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: diff --git a/src/music_kraken/utils/support_classes/hacking.py b/src/music_kraken/utils/support_classes/hacking.py index a5e7851..abcf67f 100644 --- a/src/music_kraken/utils/support_classes/hacking.py +++ b/src/music_kraken/utils/support_classes/hacking.py @@ -9,21 +9,23 @@ class Lake: self.id_to_object: Dict[int, object] = {} def get_real_object(self, db_object: object) -> object: - def _get_real_id(_id: int) -> int: - return self.redirects.get(_id, _id) + _id = id(db_object) + while _id in self.redirects: + _id = self.redirects[_id] - _id = _get_real_id(id(db_object)) - if _id not in self.id_to_object: + try: + return self.id_to_object[_id] + except KeyError: self.add(db_object) - - return self.id_to_object[_id] + return db_object def add(self, db_object: object): self.id_to_object[id(db_object)] = db_object def override(self, to_override: object, new_db_object: object): self.redirects[id(to_override)] = id(new_db_object) - del self.id_to_object[id(to_override)] + if id(to_override) in self.id_to_object: + del self.id_to_object[id(to_override)] lake = Lake() @@ -32,17 +34,13 @@ lake = Lake() def wrapper(method): @wraps(method) def wrapped(*args, **kwargs): - if len(args) >= 0 and method.__name__ != "__init__": - _self = lake.get_real_object(args[0]) - args = (_self, *args[1:]) - - return method(*args, **kwargs) + return method(*(lake.get_real_object(args[0]), *args[1:]), **kwargs) return wrapped class BaseClass: - def merge(self, to_replace): + def _risky_merge(self, to_replace): lake.override(to_replace, self) @@ -57,7 +55,7 @@ class MetaClass(type): newClassDict[attributeName] = attribute for key, value in object.__dict__.items( ): - if hasattr( value, '__call__' ) and value not in newClassDict and key not in ("__new__", "__repr__", "__init__"): + if hasattr( value, '__call__' ) and value not in newClassDict and key not in ("__new__", "__init__"): newClassDict[key] = wrapper(value) new_instance = type.__new__(meta, classname, bases, newClassDict) From 513054a0feb81b31d04025e52d2368fd4f7ebcaf Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 19 Dec 2023 13:58:39 +0100 Subject: [PATCH 033/104] feat: added new implementation --- .idea/misc.xml | 3 + .idea/music-downloader.iml | 3 +- .idea/vcs.xml | 1 - src/create_custom_objects.py | 26 +- src/music_kraken/objects/collection.py | 132 +++--- src/music_kraken/objects/country.py | 36 +- src/music_kraken/objects/new_collection.py | 257 ++++++++++++ src/music_kraken/objects/old_collection.py | 395 ++++++++++-------- src/music_kraken/objects/parents.py | 163 +++++++- src/music_kraken/objects/song.py | 195 ++++----- src/music_kraken/objects/source.py | 3 +- src/music_kraken/utils/exception/objects.py | 10 + .../utils/support_classes/hacking.py | 59 ++- 13 files changed, 895 insertions(+), 388 deletions(-) create mode 100644 src/music_kraken/utils/exception/objects.py diff --git a/.idea/misc.xml b/.idea/misc.xml index 6468d4f..df5fc58 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,7 @@ + + \ No newline at end of file diff --git a/.idea/music-downloader.iml b/.idea/music-downloader.iml index d3aa814..27eed28 100644 --- a/.idea/music-downloader.iml +++ b/.idea/music-downloader.iml @@ -3,9 +3,10 @@ + - + diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 1a15e6d..35eb1dd 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,6 +2,5 @@ - \ No newline at end of file diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index a61da6c..49d58a9 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -9,7 +9,23 @@ from music_kraken.objects import ( from music_kraken.objects.collection import Collection from music_kraken.utils.enums import SourcePages +song = Song(title="Sad Story", isrc="testTest") +other_song = Song(title="hihi", genre="dsbm") +song.merge(other_song) + +print(song.__dict__) +print(other_song.__dict__) + +other_song.title = ":3" + +print(song.__dict__) +print(other_song.__dict__) + + +print(song) + +""" only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], @@ -100,10 +116,16 @@ def add_to_objects_dump(db_obj: DatabaseObject): add_to_objects_dump(only_smile) for _id, _object in objects_by_id.items(): - print(_id, _object, sep=": ") + try: + print(_id, _object.title, sep=": ") + except AttributeError: + try: + print(_id, _object.name, sep=": ") + except AttributeError: + print(_id, _object, sep=": ") print(only_smile) - +""" """ c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index f105de9..de98f47 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -1,14 +1,13 @@ -from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type +from __future__ import annotations + from collections import defaultdict +from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator +from .parents import OuterProxy -from .parents import DatabaseObject -from ..utils.support_classes.hacking import MetaClass +T = TypeVar('T', bound=OuterProxy) -T = TypeVar('T', bound=DatabaseObject) - - -class Collection(Generic[T], metaclass=MetaClass): +class Collection(Generic[T]): _data: List[T] _indexed_values: Dict[str, set] @@ -17,16 +16,18 @@ class Collection(Generic[T], metaclass=MetaClass): shallow_list = property(fget=lambda self: self.data) def __init__( - self, data: Optional[Iterable[T]], - sync_on_append: Dict[str, "Collection"] = None, - contain_given_in_attribute: Dict[str, "Collection"] = None, - contain_attribute_in_given: Dict[str, "Collection"] = None, - append_object_to_attribute: Dict[str, DatabaseObject] = None + self, + data: Optional[Iterable[T]] = None, + sync_on_append: Dict[str, Collection] = None, + contain_given_in_attribute: Dict[str, Collection] = None, + contain_attribute_in_given: Dict[str, Collection] = None, + append_object_to_attribute: Dict[str, T] = None ) -> None: self._contains_ids = set() self._data = [] - self.upper_collections: List[Collection[T]] = [] - self.contained_collections: List[Collection[T]] = [] + + self.parents: List[Collection[T]] = [] + self.children: List[Collection[T]] = [] # List of collection attributes that should be modified on append # Key: collection attribute (str) of appended element @@ -34,13 +35,13 @@ class Collection(Generic[T], metaclass=MetaClass): self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} - self.append_object_to_attribute: Dict[str, DatabaseObject] = append_object_to_attribute or {} + self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} self.contain_self_on_append: List[str] = [] self._indexed_values = defaultdict(set) self._indexed_to_objects = defaultdict(list) - + self.extend(data) def _map_element(self, __object: T, from_map: bool = False): @@ -56,13 +57,13 @@ class Collection(Generic[T], metaclass=MetaClass): if not from_map: for attribute, new_object in self.contain_given_in_attribute.items(): __object.__getattribute__(attribute).contain_collection_inside(new_object) - + for attribute, new_object in self.contain_given_in_attribute.items(): new_object.contain_collection_inside(__object.__getattribute__(attribute)) for attribute, new_object in self.append_object_to_attribute.items(): - __object.__getattribute__(attribute).append(new_object, from_map = True) - + __object.__getattribute__(attribute).append(new_object, from_map=True) + def _unmap_element(self, __object: T): self._contains_ids.remove(__object.id) @@ -71,7 +72,7 @@ class Collection(Generic[T], metaclass=MetaClass): continue if value not in self._indexed_values[name]: continue - + try: self._indexed_to_objects[value].remove(__object) except ValueError: @@ -81,59 +82,62 @@ class Collection(Generic[T], metaclass=MetaClass): self._indexed_values[name].remove(value) def _contained_in_self(self, __object: T) -> bool: + if __object.id in self._contains_ids: + return True + for name, value in __object.indexing_values: if value is None: continue if value in self._indexed_values[name]: return True return False - - def _get_root_collections(self) -> List["Collection"]: - if not len(self.upper_collections): + + def _get_root_collections(self) -> List[Collection]: + if not len(self.parents): return [self] - + root_collections = [] - for upper_collection in self.upper_collections: + for upper_collection in self.parents: root_collections.extend(upper_collection._get_root_collections()) return root_collections @property def _is_root(self) -> bool: - return len(self.upper_collections) <= 0 + return len(self.parents) <= 0 - def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List["Collection"]: + def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List[Collection]: results = [] if self._contained_in_self(__object): return [self] - - for collection in self.contained_collections: + + for collection in self.children: results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) if break_at_first: return results return results - + def _get_parents_of_multiple_contained_children(self, __object: T): results = [] - if len(self.contained_collections) < 2 or self._contained_in_self(__object): + if len(self.children) < 2 or self._contained_in_self(__object): return results - + count = 0 - for collection in self.contained_collections: + for collection in self.children: sub_results = collection._get_parents_of_multiple_contained_children(__object) - + if len(sub_results) > 0: count += 1 results.extend(sub_results) - + if count >= 2: results.append(self) return results - - def _merge_in_self(self, __object: T, from_map: bool = False): + + def merge_into_self(self, __object: T, from_map: bool = False): """ 1. find existing objects 2. merge into existing object @@ -141,30 +145,30 @@ class Collection(Generic[T], metaclass=MetaClass): """ if __object.id in self._contains_ids: return - - existing_object: DatabaseObject = None + + existing_object: T = None for name, value in __object.indexing_values: if value is None: continue if value in self._indexed_values[name]: existing_object = self._indexed_to_objects[value][0] - if existing_object == __object: + if existing_object.id == __object.id: return None - else: - break - + + break + if existing_object is None: return None - existing_object.merge(__object, replace_all_refs=True) + existing_object.merge(__object) # just a check if it really worked if existing_object.id != __object.id: raise ValueError("This should NEVER happen. Merging doesn't work.") - self._map_element(existing_object, from_map = from_map) - + self._map_element(existing_object, from_map=from_map) + def contains(self, __object: T) -> bool: return len(self._contained_in_sub(__object)) > 0 @@ -178,37 +182,39 @@ class Collection(Generic[T], metaclass=MetaClass): def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): if __object is None: return + if __object.id in self._contains_ids: return - + exists_in_collection = self._contained_in_sub(__object) if len(exists_in_collection) and self is exists_in_collection[0]: # assuming that the object already is contained in the correct collections if not already_is_parent: - self._merge_in_self(__object, from_map = from_map) + self.merge_into_self(__object, from_map=from_map) return if not len(exists_in_collection): self._append(__object, from_map=from_map) else: - exists_in_collection[0]._merge_in_self(__object, from_map = from_map) + pass + exists_in_collection[0].merge_into_self(__object, from_map=from_map) if not already_is_parent or not self._is_root: for parent_collection in self._get_parents_of_multiple_contained_children(__object): + pass parent_collection.append(__object, already_is_parent=True, from_map=from_map) def extend(self, __iterable: Optional[Iterable[T]]): if __iterable is None: return - + for __object in __iterable: self.append(__object) - - def sync_with_other_collection(self, equal_collection: "Collection"): + def sync_with_other_collection(self, equal_collection: Collection): """ If two collections always need to have the same values, this can be used. - + Internally: 1. import the data from other to self - _data @@ -222,31 +228,31 @@ class Collection(Generic[T], metaclass=MetaClass): # this will be done in the next step. self.extend(equal_collection._data) # add all submodules - for equal_sub_collection in equal_collection.contained_collections: + for equal_sub_collection in equal_collection.children: self.contain_collection_inside(equal_sub_collection) # now the ugly part # replace all refs of the other element with this one self._risky_merge(equal_collection) - def contain_collection_inside(self, sub_collection: "Collection"): """ This collection will ALWAYS contain everything from the passed in collection """ - if sub_collection in self.contained_collections: + if sub_collection in self.children: return - - self.contained_collections.append(sub_collection) - sub_collection.upper_collections.append(self) + + self.children.append(sub_collection) + sub_collection.parents.append(self) @property def data(self) -> List[T]: - return [*self._data, *(__object for collection in self.contained_collections for __object in collection.shallow_list)] - + return [*self._data, + *(__object for collection in self.children for __object in collection.shallow_list)] + def __len__(self) -> int: - return len(self._data) + sum(len(collection) for collection in self.contained_collections) + return len(self._data) + sum(len(collection) for collection in self.children) def __iter__(self) -> Iterator[T]: for element in self._data: - yield element \ No newline at end of file + yield element diff --git a/src/music_kraken/objects/country.py b/src/music_kraken/objects/country.py index aed4842..c9aeaa3 100644 --- a/src/music_kraken/objects/country.py +++ b/src/music_kraken/objects/country.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass import pycountry @@ -9,7 +11,6 @@ CountryTyping = type(list(pycountry.countries)[0]) emoji_map = {'AD': '🇦🇩', 'AE': '🇦🇪', 'AF': '🇦🇫', 'AG': '🇦🇬', 'AI': '🇦🇮', 'AL': '🇦🇱', 'AM': '🇦🇲', 'AO': '🇦🇴', 'AQ': '🇦🇶', 'AR': '🇦🇷', 'AS': '🇦🇸', 'AT': '🇦🇹', 'AU': '🇦🇺', 'AW': '🇦🇼', 'AX': '🇦🇽', 'AZ': '🇦🇿', 'BA': '🇧🇦', 'BB': '🇧🇧', 'BD': '🇧🇩', 'BE': '🇧🇪', 'BF': '🇧🇫', 'BG': '🇧🇬', 'BH': '🇧🇭', 'BI': '🇧🇮', 'BJ': '🇧🇯', 'BL': '🇧🇱', 'BM': '🇧🇲', 'BN': '🇧🇳', 'BO': '🇧🇴', 'BQ': '🇧🇶', 'BR': '🇧🇷', 'BS': '🇧🇸', 'BT': '🇧🇹', 'BV': '🇧🇻', 'BW': '🇧🇼', 'BY': '🇧🇾', 'BZ': '🇧🇿', 'CA': '🇨🇦', 'CC': '🇨🇨', 'CD': '🇨🇩', 'CF': '🇨🇫', 'CG': '🇨🇬', 'CH': '🇨🇭', 'CI': '🇨🇮', 'CK': '🇨🇰', 'CL': '🇨🇱', 'CM': '🇨🇲', 'CN': '🇨🇳', 'CO': '🇨🇴', 'CR': '🇨🇷', 'CU': '🇨🇺', 'CV': '🇨🇻', 'CW': '🇨🇼', 'CX': '🇨🇽', 'CY': '🇨🇾', 'CZ': '🇨🇿', 'DE': '🇩🇪', 'DJ': '🇩🇯', 'DK': '🇩🇰', 'DM': '🇩🇲', 'DO': '🇩🇴', 'DZ': '🇩🇿', 'EC': '🇪🇨', 'EE': '🇪🇪', 'EG': '🇪🇬', 'EH': '🇪🇭', 'ER': '🇪🇷', 'ES': '🇪🇸', 'ET': '🇪🇹', 'FI': '🇫🇮', 'FJ': '🇫🇯', 'FK': '🇫🇰', 'FM': '🇫🇲', 'FO': '🇫🇴', 'FR': '🇫🇷', 'GA': '🇬🇦', 'GB': '🇬🇧', 'GD': '🇬🇩', 'GE': '🇬🇪', 'GF': '🇬🇫', 'GG': '🇬🇬', 'GH': '🇬🇭', 'GI': '🇬🇮', 'GL': '🇬🇱', 'GM': '🇬🇲', 'GN': '🇬🇳', 'GP': '🇬🇵', 'GQ': '🇬🇶', 'GR': '🇬🇷', 'GS': '🇬🇸', 'GT': '🇬🇹', 'GU': '🇬🇺', 'GW': '🇬🇼', 'GY': '🇬🇾', 'HK': '🇭🇰', 'HM': '🇭🇲', 'HN': '🇭🇳', 'HR': '🇭🇷', 'HT': '🇭🇹', 'HU': '🇭🇺', 'ID': '🇮🇩', 'IE': '🇮🇪', 'IL': '🇮🇱', 'IM': '🇮🇲', 'IN': '🇮🇳', 'IO': '🇮🇴', 'IQ': '🇮🇶', 'IR': '🇮🇷', 'IS': '🇮🇸', 'IT': '🇮🇹', 'JE': '🇯🇪', 'JM': '🇯🇲', 'JO': '🇯🇴', 'JP': '🇯🇵', 'KE': '🇰🇪', 'KG': '🇰🇬', 'KH': '🇰🇭', 'KI': '🇰🇮', 'KM': '🇰🇲', 'KN': '🇰🇳', 'KP': '🇰🇵', 'KR': '🇰🇷', 'KW': '🇰🇼', 'KY': '🇰🇾', 'KZ': '🇰🇿', 'LA': '🇱🇦', 'LB': '🇱🇧', 'LC': '🇱🇨', 'LI': '🇱🇮', 'LK': '🇱🇰', 'LR': '🇱🇷', 'LS': '🇱🇸', 'LT': '🇱🇹', 'LU': '🇱🇺', 'LV': '🇱🇻', 'LY': '🇱🇾', 'MA': '🇲🇦', 'MC': '🇲🇨', 'MD': '🇲🇩', 'ME': '🇲🇪', 'MF': '🇲🇫', 'MG': '🇲🇬', 'MH': '🇲🇭', 'MK': '🇲🇰', 'ML': '🇲🇱', 'MM': '🇲🇲', 'MN': '🇲🇳', 'MO': '🇲🇴', 'MP': '🇲🇵', 'MQ': '🇲🇶', 'MR': '🇲🇷', 'MS': '🇲🇸', 'MT': '🇲🇹', 'MU': '🇲🇺', 'MV': '🇲🇻', 'MW': '🇲🇼', 'MX': '🇲🇽', 'MY': '🇲🇾', 'MZ': '🇲🇿', 'NA': '🇳🇦', 'NC': '🇳🇨', 'NE': '🇳🇪', 'NF': '🇳🇫', 'NG': '🇳🇬', 'NI': '🇳🇮', 'NL': '🇳🇱', 'NO': '🇳🇴', 'NP': '🇳🇵', 'NR': '🇳🇷', 'NU': '🇳🇺', 'NZ': '🇳🇿', 'OM': '🇴🇲', 'PA': '🇵🇦', 'PE': '🇵🇪', 'PF': '🇵🇫', 'PG': '🇵🇬', 'PH': '🇵🇭', 'PK': '🇵🇰', 'PL': '🇵🇱', 'PM': '🇵🇲', 'PN': '🇵🇳', 'PR': '🇵🇷', 'PS': '🇵🇸', 'PT': '🇵🇹', 'PW': '🇵🇼', 'PY': '🇵🇾', 'QA': '🇶🇦', 'RE': '🇷🇪', 'RO': '🇷🇴', 'RS': '🇷🇸', 'RU': '🇷🇺', 'RW': '🇷🇼', 'SA': '🇸🇦', 'SB': '🇸🇧', 'SC': '🇸🇨', 'SD': '🇸🇩', 'SE': '🇸🇪', 'SG': '🇸🇬', 'SH': '🇸🇭', 'SI': '🇸🇮', 'SJ': '🇸🇯', 'SK': '🇸🇰', 'SL': '🇸🇱', 'SM': '🇸🇲', 'SN': '🇸🇳', 'SO': '🇸🇴', 'SR': '🇸🇷', 'SS': '🇸🇸', 'ST': '🇸🇹', 'SV': '🇸🇻', 'SX': '🇸🇽', 'SY': '🇸🇾', 'SZ': '🇸🇿', 'TC': '🇹🇨', 'TD': '🇹🇩', 'TF': '🇹🇫', 'TG': '🇹🇬', 'TH': '🇹🇭', 'TJ': '🇹🇯', 'TK': '🇹🇰', 'TL': '🇹🇱', 'TM': '🇹🇲', 'TN': '🇹🇳', 'TO': '🇹🇴', 'TR': '🇹🇷', 'TT': '🇹🇹', 'TV': '🇹🇻', 'TW': '🇹🇼', 'TZ': '🇹🇿', 'UA': '🇺🇦', 'UG': '🇺🇬', 'UM': '🇺🇲', 'US': '🇺🇸', 'UY': '🇺🇾', 'UZ': '🇺🇿', 'VA': '🇻🇦', 'VC': '🇻🇨', 'VE': '🇻🇪', 'VG': '🇻🇬', 'VI': '🇻🇮', 'VN': '🇻🇳', 'VU': '🇻🇺', 'WF': '🇼🇫', 'WS': '🇼🇸', 'YE': '🇾🇪', 'YT': '🇾🇹', 'ZA': '🇿🇦', 'ZM': '🇿🇲', 'ZW': '🇿🇼', '🇦🇩': 'AD', '🇦🇪': 'AE', '🇦🇫': 'AF', '🇦🇬': 'AG', '🇦🇮': 'AI', '🇦🇱': 'AL', '🇦🇲': 'AM', '🇦🇴': 'AO', '🇦🇶': 'AQ', '🇦🇷': 'AR', '🇦🇸': 'AS', '🇦🇹': 'AT', '🇦🇺': 'AU', '🇦🇼': 'AW', '🇦🇽': 'AX', '🇦🇿': 'AZ', '🇧🇦': 'BA', '🇧🇧': 'BB', '🇧🇩': 'BD', '🇧🇪': 'BE', '🇧🇫': 'BF', '🇧🇬': 'BG', '🇧🇭': 'BH', '🇧🇮': 'BI', '🇧🇯': 'BJ', '🇧🇱': 'BL', '🇧🇲': 'BM', '🇧🇳': 'BN', '🇧🇴': 'BO', '🇧🇶': 'BQ', '🇧🇷': 'BR', '🇧🇸': 'BS', '🇧🇹': 'BT', '🇧🇻': 'BV', '🇧🇼': 'BW', '🇧🇾': 'BY', '🇧🇿': 'BZ', '🇨🇦': 'CA', '🇨🇨': 'CC', '🇨🇩': 'CD', '🇨🇫': 'CF', '🇨🇬': 'CG', '🇨🇭': 'CH', '🇨🇮': 'CI', '🇨🇰': 'CK', '🇨🇱': 'CL', '🇨🇲': 'CM', '🇨🇳': 'CN', '🇨🇴': 'CO', '🇨🇷': 'CR', '🇨🇺': 'CU', '🇨🇻': 'CV', '🇨🇼': 'CW', '🇨🇽': 'CX', '🇨🇾': 'CY', '🇨🇿': 'CZ', '🇩🇪': 'DE', '🇩🇯': 'DJ', '🇩🇰': 'DK', '🇩🇲': 'DM', '🇩🇴': 'DO', '🇩🇿': 'DZ', '🇪🇨': 'EC', '🇪🇪': 'EE', '🇪🇬': 'EG', '🇪🇭': 'EH', '🇪🇷': 'ER', '🇪🇸': 'ES', '🇪🇹': 'ET', '🇫🇮': 'FI', '🇫🇯': 'FJ', '🇫🇰': 'FK', '🇫🇲': 'FM', '🇫🇴': 'FO', '🇫🇷': 'FR', '🇬🇦': 'GA', '🇬🇧': 'GB', '🇬🇩': 'GD', '🇬🇪': 'GE', '🇬🇫': 'GF', '🇬🇬': 'GG', '🇬🇭': 'GH', '🇬🇮': 'GI', '🇬🇱': 'GL', '🇬🇲': 'GM', '🇬🇳': 'GN', '🇬🇵': 'GP', '🇬🇶': 'GQ', '🇬🇷': 'GR', '🇬🇸': 'GS', '🇬🇹': 'GT', '🇬🇺': 'GU', '🇬🇼': 'GW', '🇬🇾': 'GY', '🇭🇰': 'HK', '🇭🇲': 'HM', '🇭🇳': 'HN', '🇭🇷': 'HR', '🇭🇹': 'HT', '🇭🇺': 'HU', '🇮🇩': 'ID', '🇮🇪': 'IE', '🇮🇱': 'IL', '🇮🇲': 'IM', '🇮🇳': 'IN', '🇮🇴': 'IO', '🇮🇶': 'IQ', '🇮🇷': 'IR', '🇮🇸': 'IS', '🇮🇹': 'IT', '🇯🇪': 'JE', '🇯🇲': 'JM', '🇯🇴': 'JO', '🇯🇵': 'JP', '🇰🇪': 'KE', '🇰🇬': 'KG', '🇰🇭': 'KH', '🇰🇮': 'KI', '🇰🇲': 'KM', '🇰🇳': 'KN', '🇰🇵': 'KP', '🇰🇷': 'KR', '🇰🇼': 'KW', '🇰🇾': 'KY', '🇰🇿': 'KZ', '🇱🇦': 'LA', '🇱🇧': 'LB', '🇱🇨': 'LC', '🇱🇮': 'LI', '🇱🇰': 'LK', '🇱🇷': 'LR', '🇱🇸': 'LS', '🇱🇹': 'LT', '🇱🇺': 'LU', '🇱🇻': 'LV', '🇱🇾': 'LY', '🇲🇦': 'MA', '🇲🇨': 'MC', '🇲🇩': 'MD', '🇲🇪': 'ME', '🇲🇫': 'MF', '🇲🇬': 'MG', '🇲🇭': 'MH', '🇲🇰': 'MK', '🇲🇱': 'ML', '🇲🇲': 'MM', '🇲🇳': 'MN', '🇲🇴': 'MO', '🇲🇵': 'MP', '🇲🇶': 'MQ', '🇲🇷': 'MR', '🇲🇸': 'MS', '🇲🇹': 'MT', '🇲🇺': 'MU', '🇲🇻': 'MV', '🇲🇼': 'MW', '🇲🇽': 'MX', '🇲🇾': 'MY', '🇲🇿': 'MZ', '🇳🇦': 'NA', '🇳🇨': 'NC', '🇳🇪': 'NE', '🇳🇫': 'NF', '🇳🇬': 'NG', '🇳🇮': 'NI', '🇳🇱': 'NL', '🇳🇴': 'NO', '🇳🇵': 'NP', '🇳🇷': 'NR', '🇳🇺': 'NU', '🇳🇿': 'NZ', '🇴🇲': 'OM', '🇵🇦': 'PA', '🇵🇪': 'PE', '🇵🇫': 'PF', '🇵🇬': 'PG', '🇵🇭': 'PH', '🇵🇰': 'PK', '🇵🇱': 'PL', '🇵🇲': 'PM', '🇵🇳': 'PN', '🇵🇷': 'PR', '🇵🇸': 'PS', '🇵🇹': 'PT', '🇵🇼': 'PW', '🇵🇾': 'PY', '🇶🇦': 'QA', '🇷🇪': 'RE', '🇷🇴': 'RO', '🇷🇸': 'RS', '🇷🇺': 'RU', '🇷🇼': 'RW', '🇸🇦': 'SA', '🇸🇧': 'SB', '🇸🇨': 'SC', '🇸🇩': 'SD', '🇸🇪': 'SE', '🇸🇬': 'SG', '🇸🇭': 'SH', '🇸🇮': 'SI', '🇸🇯': 'SJ', '🇸🇰': 'SK', '🇸🇱': 'SL', '🇸🇲': 'SM', '🇸🇳': 'SN', '🇸🇴': 'SO', '🇸🇷': 'SR', '🇸🇸': 'SS', '🇸🇹': 'ST', '🇸🇻': 'SV', '🇸🇽': 'SX', '🇸🇾': 'SY', '🇸🇿': 'SZ', '🇹🇨': 'TC', '🇹🇩': 'TD', '🇹🇫': 'TF', '🇹🇬': 'TG', '🇹🇭': 'TH', '🇹🇯': 'TJ', '🇹🇰': 'TK', '🇹🇱': 'TL', '🇹🇲': 'TM', '🇹🇳': 'TN', '🇹🇴': 'TO', '🇹🇷': 'TR', '🇹🇹': 'TT', '🇹🇻': 'TV', '🇹🇼': 'TW', '🇹🇿': 'TZ', '🇺🇦': 'UA', '🇺🇬': 'UG', '🇺🇲': 'UM', '🇺🇸': 'US', '🇺🇾': 'UY', '🇺🇿': 'UZ', '🇻🇦': 'VA', '🇻🇨': 'VC', '🇻🇪': 'VE', '🇻🇬': 'VG', '🇻🇮': 'VI', '🇻🇳': 'VN', '🇻🇺': 'VU', '🇼🇫': 'WF', '🇼🇸': 'WS', '🇾🇪': 'YE', '🇾🇹': 'YT', '🇿🇦': 'ZA', '🇿🇲': 'ZM', '🇿🇼': 'ZW'} - @dataclass class Country: alpha_2: str @@ -19,7 +20,7 @@ class Country: emoji: str @classmethod - def by_pycountry(cls, country: CountryTyping, emoji: str = "") -> "Country": + def by_pycountry(cls, country: CountryTyping, emoji: str = "") -> "Country": emoji = "" alpha_2 = country.alpha_2.upper() @@ -40,7 +41,7 @@ class Country: return cls.by_pycountry(pycountry.countries.get(alpha_2=alpha_2)) @classmethod - def by_apha_3(cls, alpha_3: str) -> "Country": + def by_alpha_3(cls, alpha_3: str) -> "Country": return cls.by_pycountry(pycountry.countries.get(alpha_3=alpha_3)) @classmethod @@ -63,4 +64,31 @@ class Country: return hash(self.alpha_3) def __eq__(self, __value: object) -> bool: - return self.__hash__() == __value.__hash__() \ No newline at end of file + return self.__hash__() == __value.__hash__() + + +@dataclass +class Language: + alpha_2: str + alpha_3: str + name: str + numeric: int + + @classmethod + def by_pycountry(cls, language) -> Language: + alpha_2 = language.alpha_2.upper() + + return cls( + alpha_2=alpha_2, + alpha_3=language.alpha_3, + name=language.name, + numeric=language.numeric, + ) + + @classmethod + def by_alpha_2(cls, alpha_2: str) -> Language: + return cls.by_pycountry(pycountry.languages.get(alpha_2=alpha_2)) + + @classmethod + def by_alpha_3(cls, alpha_3: str) -> Language: + return cls.by_pycountry(pycountry.languages.get(alpha_3=alpha_3)) diff --git a/src/music_kraken/objects/new_collection.py b/src/music_kraken/objects/new_collection.py index e69de29..1a556b6 100644 --- a/src/music_kraken/objects/new_collection.py +++ b/src/music_kraken/objects/new_collection.py @@ -0,0 +1,257 @@ +from __future__ import annotations + +from collections import defaultdict +from typing import TypeVar, Generic, Dict, Optional, Iterable, List +from .parents import OuterProxy + +T = TypeVar('T', bound=OuterProxy) + + +class Collection(Generic[T]): + _data: List[T] + + _indexed_values: Dict[str, set] + _indexed_to_objects: Dict[any, list] + + shallow_list = property(fget=lambda self: self.data) + + def __init__( + self, + data: Optional[Iterable[T]] = None, + sync_on_append: Dict[str, "Collection"] = None, + contain_given_in_attribute: Dict[str, "Collection"] = None, + contain_attribute_in_given: Dict[str, "Collection"] = None, + append_object_to_attribute: Dict[str, T] = None + ) -> None: + self._contains_ids = set() + self._data = [] + self.upper_collections: List[Collection[T]] = [] + self.contained_collections: List[Collection[T]] = [] + + # List of collection attributes that should be modified on append + # Key: collection attribute (str) of appended element + # Value: main collection to sync to + self.sync_on_append: Dict[str, Collection] = sync_on_append or {} + self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} + self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} + self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} + + self.contain_self_on_append: List[str] = [] + + self._indexed_values = defaultdict(set) + self._indexed_to_objects = defaultdict(list) + + self.extend(data) + + def _map_element(self, __object: T, from_map: bool = False): + self._contains_ids.add(__object.id) + + for name, value in __object.indexing_values: + if value is None: + continue + + self._indexed_values[name].add(value) + self._indexed_to_objects[value].append(__object) + + if not from_map: + for attribute, new_object in self.contain_given_in_attribute.items(): + __object.__getattribute__(attribute).contain_collection_inside(new_object) + + for attribute, new_object in self.contain_given_in_attribute.items(): + new_object.contain_collection_inside(__object.__getattribute__(attribute)) + + for attribute, new_object in self.append_object_to_attribute.items(): + __object.__getattribute__(attribute).append(new_object, from_map=True) + + def _unmap_element(self, __object: T): + self._contains_ids.remove(__object.id) + + for name, value in __object.indexing_values: + if value is None: + continue + if value not in self._indexed_values[name]: + continue + + try: + self._indexed_to_objects[value].remove(__object) + except ValueError: + continue + + if not len(self._indexed_to_objects[value]): + self._indexed_values[name].remove(value) + + def _contained_in_self(self, __object: T) -> bool: + if __object.id in self._contains_ids: + return True + + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + return True + return False + + def _get_root_collections(self) -> List["Collection"]: + if not len(self.upper_collections): + return [self] + + root_collections = [] + for upper_collection in self.upper_collections: + root_collections.extend(upper_collection._get_root_collections()) + return root_collections + + @property + def _is_root(self) -> bool: + return len(self.upper_collections) <= 0 + + def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List["Collection"]: + results = [] + + if self._contained_in_self(__object): + return [self] + + for collection in self.contained_collections: + results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) + if break_at_first: + return results + + return results + + def _get_parents_of_multiple_contained_children(self, __object: T): + results = [] + if len(self.contained_collections) < 2 or self._contained_in_self(__object): + return results + + count = 0 + + for collection in self.contained_collections: + sub_results = collection._get_parents_of_multiple_contained_children(__object) + + if len(sub_results) > 0: + count += 1 + results.extend(sub_results) + + if count >= 2: + results.append(self) + + return results + + def _merge_in_self(self, __object: T, from_map: bool = False): + """ + 1. find existing objects + 2. merge into existing object + 3. remap existing object + """ + if __object.id in self._contains_ids: + return + + existing_object: DatabaseObject = None + + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + existing_object = self._indexed_to_objects[value][0] + if existing_object.id == __object.id: + return None + + break + + if existing_object is None: + return None + + existing_object.merge(__object, replace_all_refs=True) + + # just a check if it really worked + if existing_object.id != __object.id: + raise ValueError("This should NEVER happen. Merging doesn't work.") + + self._map_element(existing_object, from_map=from_map) + + def contains(self, __object: T) -> bool: + return len(self._contained_in_sub(__object)) > 0 + + def _append(self, __object: T, from_map: bool = False): + for attribute, to_sync_with in self.sync_on_append.items(): + pass + to_sync_with.sync_with_other_collection(__object.__getattribute__(attribute)) + + self._map_element(__object, from_map=from_map) + self._data.append(__object) + + def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): + if __object is None: + return + if __object.id in self._contains_ids: + return + + exists_in_collection = self._contained_in_sub(__object) + if len(exists_in_collection) and self is exists_in_collection[0]: + # assuming that the object already is contained in the correct collections + if not already_is_parent: + self._merge_in_self(__object, from_map=from_map) + return + + if not len(exists_in_collection): + self._append(__object, from_map=from_map) + else: + pass + exists_in_collection[0]._merge_in_self(__object, from_map=from_map) + + if not already_is_parent or not self._is_root: + for parent_collection in self._get_parents_of_multiple_contained_children(__object): + pass + parent_collection.append(__object, already_is_parent=True, from_map=from_map) + + def extend(self, __iterable: Optional[Iterable[T]]): + if __iterable is None: + return + + for __object in __iterable: + self.append(__object) + + def sync_with_other_collection(self, equal_collection: "Collection"): + """ + If two collections always need to have the same values, this can be used. + + Internally: + 1. import the data from other to self + - _data + - contained_collections + 2. replace all refs from the other object, with refs from this object + """ + if equal_collection is self: + return + + # don't add the elements from the subelements from the other collection. + # this will be done in the next step. + self.extend(equal_collection._data) + # add all submodules + for equal_sub_collection in equal_collection.contained_collections: + self.contain_collection_inside(equal_sub_collection) + + # now the ugly part + # replace all refs of the other element with this one + self._risky_merge(equal_collection) + + def contain_collection_inside(self, sub_collection: "Collection"): + """ + This collection will ALWAYS contain everything from the passed in collection + """ + if sub_collection in self.contained_collections: + return + + self.contained_collections.append(sub_collection) + sub_collection.upper_collections.append(self) + + @property + def data(self) -> List[T]: + return [*self._data, + *(__object for collection in self.contained_collections for __object in collection.shallow_list)] + + def __len__(self) -> int: + return len(self._data) + sum(len(collection) for collection in self.contained_collections) + + def __iter__(self) -> Iterator[T]: + for element in self._data: + yield element diff --git a/src/music_kraken/objects/old_collection.py b/src/music_kraken/objects/old_collection.py index 4f50ff1..4aa8f21 100644 --- a/src/music_kraken/objects/old_collection.py +++ b/src/music_kraken/objects/old_collection.py @@ -1,221 +1,256 @@ -from typing import List, Iterable, Dict, TypeVar, Generic, Iterator +from typing import List, Iterable, Iterator, Optional, TypeVar, Generic, Dict, Type from collections import defaultdict -from dataclasses import dataclass from .parents import DatabaseObject -from ..utils.hooks import HookEventTypes, Hooks, Event - - -class CollectionHooks(HookEventTypes): - APPEND_NEW = "append_new" - +from ..utils.support_classes.hacking import MetaClass T = TypeVar('T', bound=DatabaseObject) -@dataclass -class AppendResult: - was_in_collection: bool - current_element: DatabaseObject - was_the_same: bool - - class Collection(Generic[T]): - """ - This a class for the iterables - like tracklist or discography - """ _data: List[T] - _by_url: dict - _by_attribute: dict + _indexed_values: Dict[str, set] + _indexed_to_objects: Dict[any, list] - def __init__(self, data: List[T] = None, element_type=None, *args, **kwargs) -> None: - # Attribute needs to point to - self.element_type = element_type + shallow_list = property(fget=lambda self: self.data) - self._data: List[T] = list() + def __init__( + self, data: Optional[Iterable[T]] = None, + sync_on_append: Dict[str, "Collection"] = None, + contain_given_in_attribute: Dict[str, "Collection"] = None, + contain_attribute_in_given: Dict[str, "Collection"] = None, + append_object_to_attribute: Dict[str, DatabaseObject] = None + ) -> None: + self._contains_ids = set() + self._data = [] + self.upper_collections: List[Collection[T]] = [] + self.contained_collections: List[Collection[T]] = [] - """ - example of attribute_to_object_map - the song objects are references pointing to objects - in _data - - ```python - { - 'id': {323: song_1, 563: song_2, 666: song_3}, - 'url': {'www.song_2.com': song_2} - } - ``` - """ - self._attribute_to_object_map: Dict[str, Dict[object, T]] = defaultdict(dict) - self._used_ids: set = set() + # List of collection attributes that should be modified on append + # Key: collection attribute (str) of appended element + # Value: main collection to sync to + self.sync_on_append: Dict[str, Collection] = sync_on_append or {} + self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} + self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} + self.append_object_to_attribute: Dict[str, DatabaseObject] = append_object_to_attribute or {} - self.hooks: Hooks = Hooks(self) + self.contain_self_on_append: List[str] = [] - if data is not None: - self.extend(data, merge_on_conflict=True) + self._indexed_values = defaultdict(set) + self._indexed_to_objects = defaultdict(list) - def sort(self, reverse: bool = False, **kwargs): - self._data.sort(reverse=reverse, **kwargs) + self.extend(data) - def map_element(self, element: T): - for name, value in element.indexing_values: + def _map_element(self, __object: T, from_map: bool = False): + self._contains_ids.add(__object.id) + + for name, value in __object.indexing_values: if value is None: continue - self._attribute_to_object_map[name][value] = element + self._indexed_values[name].add(value) + self._indexed_to_objects[value].append(__object) - self._used_ids.add(element.id) + if not from_map: + for attribute, new_object in self.contain_given_in_attribute.items(): + __object.__getattribute__(attribute).contain_collection_inside(new_object) - def unmap_element(self, element: T): - for name, value in element.indexing_values: + for attribute, new_object in self.contain_given_in_attribute.items(): + new_object.contain_collection_inside(__object.__getattribute__(attribute)) + + for attribute, new_object in self.append_object_to_attribute.items(): + __object.__getattribute__(attribute).append(new_object, from_map=True) + + def _unmap_element(self, __object: T): + self._contains_ids.remove(__object.id) + + for name, value in __object.indexing_values: if value is None: continue + if value not in self._indexed_values[name]: + continue - if value in self._attribute_to_object_map[name]: - if element is self._attribute_to_object_map[name][value]: - try: - self._attribute_to_object_map[name].pop(value) - except KeyError: - pass + try: + self._indexed_to_objects[value].remove(__object) + except ValueError: + continue - def append(self, element: T, merge_on_conflict: bool = True, - merge_into_existing: bool = True, no_hook: bool = False) -> AppendResult: + if not len(self._indexed_to_objects[value]): + self._indexed_values[name].remove(value) + + def _contained_in_self(self, __object: T) -> bool: + if __object.id in self._contains_ids: + return True + + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + return True + return False + + def _get_root_collections(self) -> List["Collection"]: + if not len(self.upper_collections): + return [self] + + root_collections = [] + for upper_collection in self.upper_collections: + root_collections.extend(upper_collection._get_root_collections()) + return root_collections + + @property + def _is_root(self) -> bool: + return len(self.upper_collections) <= 0 + + def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List["Collection"]: + results = [] + + if self._contained_in_self(__object): + return [self] + + for collection in self.contained_collections: + results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) + if break_at_first: + return results + + return results + + def _get_parents_of_multiple_contained_children(self, __object: T): + results = [] + if len(self.contained_collections) < 2 or self._contained_in_self(__object): + return results + + count = 0 + + for collection in self.contained_collections: + sub_results = collection._get_parents_of_multiple_contained_children(__object) + + if len(sub_results) > 0: + count += 1 + results.extend(sub_results) + + if count >= 2: + results.append(self) + + return results + + def _merge_in_self(self, __object: T, from_map: bool = False): """ - :param element: - :param merge_on_conflict: - :param merge_into_existing: - :return did_not_exist: + 1. find existing objects + 2. merge into existing object + 3. remap existing object """ - if element is None: - return AppendResult(False, None, False) - - for existing_element in self._data: - if element is existing_element: - return AppendResult(False, None, False) - - # if the element type has been defined in the initializer it checks if the type matches - if self.element_type is not None and not isinstance(element, self.element_type): - raise TypeError(f"{type(element)} is not the set type {self.element_type}") - - # return if the same instance of the object is in the list - for existing in self._data: - if element is existing: - return AppendResult(True, element, True) - - for name, value in element.indexing_values: - if value in self._attribute_to_object_map[name]: - existing_object = self._attribute_to_object_map[name][value] - - if not merge_on_conflict: - return AppendResult(True, existing_object, False) - - # if the object does already exist - # thus merging and don't add it afterward - if merge_into_existing: - existing_object.merge(element) - # in case any relevant data has been added (e.g. it remaps the old object) - self.map_element(existing_object) - return AppendResult(True, existing_object, False) - - element.merge(existing_object) - - exists_at = self._data.index(existing_object) - self._data[exists_at] = element - - self.unmap_element(existing_object) - self.map_element(element) - return AppendResult(True, existing_object, False) - - if not no_hook: - self.hooks.trigger_event(CollectionHooks.APPEND_NEW, new_object=element) - self._data.append(element) - self.map_element(element) - - return AppendResult(False, element, False) - - def extend(self, element_list: Iterable[T], merge_on_conflict: bool = True, - merge_into_existing: bool = True, no_hook: bool = False): - if element_list is None: + if __object.id in self._contains_ids: return - if len(element_list) <= 0: + + existing_object: DatabaseObject = None + + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + existing_object = self._indexed_to_objects[value][0] + if existing_object.id == __object.id: + return None + + break + + if existing_object is None: + return None + + existing_object.merge(__object, replace_all_refs=True) + + # just a check if it really worked + if existing_object.id != __object.id: + raise ValueError("This should NEVER happen. Merging doesn't work.") + + self._map_element(existing_object, from_map=from_map) + + def contains(self, __object: T) -> bool: + return len(self._contained_in_sub(__object)) > 0 + + def _append(self, __object: T, from_map: bool = False): + for attribute, to_sync_with in self.sync_on_append.items(): + pass + to_sync_with.sync_with_other_collection(__object.__getattribute__(attribute)) + + self._map_element(__object, from_map=from_map) + self._data.append(__object) + + def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): + if __object is None: return - if element_list is self: + if __object.id in self._contains_ids: return - for element in element_list: - self.append(element, merge_on_conflict=merge_on_conflict, merge_into_existing=merge_into_existing, no_hook=no_hook) - def sync_collection(self, collection_attribute: str): - def on_append(event: Event, new_object: T, *args, **kwargs): - new_collection = new_object.__getattribute__(collection_attribute) - if self is new_collection: - return + exists_in_collection = self._contained_in_sub(__object) + if len(exists_in_collection) and self is exists_in_collection[0]: + # assuming that the object already is contained in the correct collections + if not already_is_parent: + self._merge_in_self(__object, from_map=from_map) + return - self.extend(new_object.__getattribute__(collection_attribute), no_hook=True) - new_object.__setattr__(collection_attribute, self) + if not len(exists_in_collection): + self._append(__object, from_map=from_map) + else: + pass + exists_in_collection[0]._merge_in_self(__object, from_map=from_map) - self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + if not already_is_parent or not self._is_root: + for parent_collection in self._get_parents_of_multiple_contained_children(__object): + pass + parent_collection.append(__object, already_is_parent=True, from_map=from_map) - def sync_main_collection(self, main_collection: "Collection", collection_attribute: str): - def on_append(event: Event, new_object: T, *args, **kwargs): - new_collection = new_object.__getattribute__(collection_attribute) - if main_collection is new_collection: - return - - main_collection.extend(new_object.__getattribute__(collection_attribute), no_hook=True) - new_object.__setattr__(collection_attribute, main_collection) + def extend(self, __iterable: Optional[Iterable[T]]): + if __iterable is None: + return - self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) + for __object in __iterable: + self.append(__object) - """ - def on_append(event: Event, new_object: T, *args, **kwargs): - new_collection: Collection = new_object.__getattribute__(collection_attribute) - if self is new_collection: - return - - self.extend(new_collection.shallow_list, no_hook=False) - new_object.__setattr__(collection_attribute, self) + def sync_with_other_collection(self, equal_collection: "Collection"): + """ + If two collections always need to have the same values, this can be used. - self.hooks.add_event_listener(CollectionHooks.APPEND_NEW, on_append) - """ + Internally: + 1. import the data from other to self + - _data + - contained_collections + 2. replace all refs from the other object, with refs from this object + """ + if equal_collection is self: + return + + # don't add the elements from the subelements from the other collection. + # this will be done in the next step. + self.extend(equal_collection._data) + # add all submodules + for equal_sub_collection in equal_collection.contained_collections: + self.contain_collection_inside(equal_sub_collection) + + # now the ugly part + # replace all refs of the other element with this one + self._risky_merge(equal_collection) + + def contain_collection_inside(self, sub_collection: "Collection"): + """ + This collection will ALWAYS contain everything from the passed in collection + """ + if sub_collection in self.contained_collections: + return + + self.contained_collections.append(sub_collection) + sub_collection.upper_collections.append(self) + + @property + def data(self) -> List[T]: + return [*self._data, + *(__object for collection in self.contained_collections for __object in collection.shallow_list)] + + def __len__(self) -> int: + return len(self._data) + sum(len(collection) for collection in self.contained_collections) def __iter__(self) -> Iterator[T]: for element in self._data: - yield element - - def __str__(self) -> str: - return "\n".join([f"{str(j).zfill(2)}: {i.__repr__()}" for j, i in enumerate(self._data)]) - - def __len__(self) -> int: - return len(self._data) - - def __getitem__(self, key) -> T: - if type(key) != int: - return ValueError("key needs to be an integer") - - return self._data[key] - - def __setitem__(self, key, value: T): - if type(key) != int: - return ValueError("key needs to be an integer") - - old_item = self._data[key] - self.unmap_element(old_item) - self.map_element(value) - - self._data[key] = value - - @property - def shallow_list(self) -> List[T]: - """ - returns a shallow copy of the data list - """ - return self._data.copy() - - @property - def empty(self) -> bool: - return len(self._data) == 0 - - def clear(self): - self.__init__(element_type=self.element_type) + yield element \ No newline at end of file diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 9431db2..13a90f6 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -9,12 +9,13 @@ from .option import Options from ..utils.shared import HIGHEST_ID from ..utils.config import main_settings, logging_settings from ..utils.support_classes.hacking import MetaClass - +from ..utils.exception.objects import IsDynamicException LOGGER = logging_settings["object_logger"] P = TypeVar('P') + @dataclass class StaticAttribute(Generic[P]): name: str @@ -27,6 +28,139 @@ class StaticAttribute(Generic[P]): is_upwards_collection: bool = False +class InnerData: + """ + This is the core class, which is used for every Data class. + The attributes are set, and can be merged. + + The concept is, that the outer class proxies this class. + If the data in the wrapper class has to be merged, then this class is just replaced and garbage collected. + """ + + def __init__(self, **kwargs): + for key, value in kwargs.items(): + self.__setattr__(key, value) + + def __merge__(self, __other: InnerData, override: bool = False): + """ + TODO + is default is totally ignored + + :param __other: + :param override: + :return: + """ + + for key, value in __other.__dict__.items(): + # just set the other value if self doesn't already have it + if key not in self.__dict__: + self.__setattr__(key, value) + continue + + # if the object of value implemented __merge__, it merges + existing = self.__getattribute__(key) + if hasattr(type(existing), "__merge__"): + existing.merge_into_self(value, override) + continue + + # override the existing value if requested + if override: + self.__setattr__(key, value) + + + +class OuterProxy: + """ + Wraps the inner data, and provides apis, to naturally access those values. + """ + + _default_factories: dict + + def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): + _automatic_id: bool = False + + if _id is None and not dynamic: + """ + generates a random integer id + the range is defined in the config + """ + _id = random.randint(0, HIGHEST_ID) + _automatic_id = True + + kwargs["automatic_id"] = _automatic_id + kwargs["id"] = _id + kwargs["dynamic"] = dynamic + + for name, factory in type(self)._default_factories.items(): + if name not in kwargs: + kwargs[name] = factory() + + self._inner: InnerData = InnerData(**kwargs) + self.__init_collections__() + + for name, data_list in kwargs.items(): + if isinstance(data_list, list) and name.endswith("_list"): + collection_name = name.replace("_list", "_collection") + + if collection_name not in self.__dict__: + continue + + collection = self.__getattribute__(collection_name) + collection.extend(data_list) + + def __init_collections__(self): + pass + + def __getattribute__(self, __name: str) -> Any: + """ + Returns the attribute of _inner if the attribute exists, + else it returns the attribute of self. + + That the _inner gets checked first is essential for the type hints. + :param __name: + :return: + """ + + _inner: InnerData = super().__getattribute__("_inner") + try: + return _inner.__getattribute__(__name) + except AttributeError: + return super().__getattribute__(__name) + + def __setattr__(self, __name, __value): + if not __name.startswith("_") and hasattr(self, "_inner"): + _inner: InnerData = super().__getattribute__("_inner") + return _inner.__setattr__(__name, __value) + + return super().__setattr__(__name, __value) + + def __hash__(self): + """ + :raise: IsDynamicException + :return: + """ + + if self.dynamic: + return id(self._inner) + + return self.id + + def __eq__(self, other: Any): + return self.__hash__() == other.__hash__() + + def merge(self, __other: OuterProxy, override: bool = False): + """ + 1. merges the data of __other in self + 2. replaces the data of __other with the data of self + + :param __other: + :param override: + :return: + """ + self._inner.__merge__(__other._inner, override=override) + __other._inner = self._inner + + class Attribute(Generic[P]): def __init__(self, database_object: "DatabaseObject", static_attribute: StaticAttribute) -> None: self.database_object: DatabaseObject = database_object @@ -38,12 +172,11 @@ class Attribute(Generic[P]): def get(self) -> P: return self.database_object.__getattribute__(self.name) - + def set(self, value: P): self.database_object.__setattr__(self.name, value) - class DatabaseObject(metaclass=MetaClass): COLLECTION_STRING_ATTRIBUTES: tuple = tuple() SIMPLE_STRING_ATTRIBUTES: dict = dict() @@ -77,7 +210,7 @@ class DatabaseObject(metaclass=MetaClass): for static_attribute in self.STATIC_ATTRIBUTES: attribute: Attribute = Attribute(self, static_attribute) self._attributes.append(attribute) - + if static_attribute.is_collection: if static_attribute.is_collection: self._collection_attributes.append(attribute) @@ -94,6 +227,8 @@ class DatabaseObject(metaclass=MetaClass): self.dynamic = dynamic self.build_version = -1 + super().__init__() + @property def upwards_collection(self) -> Collection: for attribute in self._upwards_collection_attributes: @@ -114,10 +249,19 @@ class DatabaseObject(metaclass=MetaClass): raise TypeError("Dynamic DatabaseObjects are unhashable.") return self.id + def __deep_eq__(self, other) -> bool: + if not isinstance(other, type(self)): + return False + + return super().__eq__(other) + def __eq__(self, other) -> bool: if not isinstance(other, type(self)): return False + if super().__eq__(other): + return True + # add the checks for dynamic, to not throw an exception if not self.dynamic and not other.dynamic and self.id == other.id: return True @@ -152,10 +296,10 @@ class DatabaseObject(metaclass=MetaClass): if other is None: return - - if self.id == other.id: + + if self.__deep_eq__(other): return - + if not isinstance(other, type(self)): LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"") return @@ -163,6 +307,7 @@ class DatabaseObject(metaclass=MetaClass): for collection in self._collection_attributes: if hasattr(self, collection.name) and hasattr(other, collection.name): if collection.get() is not getattr(other, collection.name): + pass collection.get().extend(getattr(other, collection.name)) for simple_attribute, default_value in type(self).SIMPLE_STRING_ATTRIBUTES.items(): @@ -190,7 +335,7 @@ class DatabaseObject(metaclass=MetaClass): @property def option_string(self) -> str: return self.__repr__() - + def _build_recursive_structures(self, build_version: int, merge: False): pass @@ -202,7 +347,7 @@ class DatabaseObject(metaclass=MetaClass): no need to override if only the recursive structure should be build. override self.build_recursive_structures() instead """ - + self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into) def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index d7b1cef..0af1489 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import random from collections import defaultdict from typing import List, Optional, Dict, Tuple, Type @@ -15,11 +17,13 @@ from .metadata import ( Metadata ) from .option import Options -from .parents import MainObject, DatabaseObject, StaticAttribute +from .parents import DatabaseObject, StaticAttribute from .source import Source, SourceCollection from .target import Target from ..utils.string_processing import unify +from .parents import OuterProxy as Base + from ..utils.config import main_settings """ @@ -30,12 +34,39 @@ CountryTyping = type(list(pycountry.countries)[0]) OPTION_STRING_DELIMITER = " | " -class Song(MainObject): +class Song(Base): """ Class representing a song object, with attributes id, mb_id, title, album_name, isrc, length, tracksort, genre, source_list, target, lyrics_list, album, main_artist_list, and feature_artist_list. """ + title: str + unified_title: str + isrc: str + length: int + genre: str + note: FormattedText + + source_collection: SourceCollection + target_collection: Collection[Target] + lyrics_collection: Collection[Lyrics] + main_artist_collection: Collection[Artist] + feature_artist_collection: Collection[Artist] + album_collection: Collection[Album] + + _default_factories = { + "note": FormattedText, + "length": lambda: 0, + "source_collection": SourceCollection, + "target_collection": Collection, + "lyrics_collection": Collection, + + "main_artist_collection": Collection, + "album_collection": Collection, + "feature_artist_collection": Collection + } + + """ COLLECTION_STRING_ATTRIBUTES = ( "lyrics_collection", "album_collection", "main_artist_collection", "feature_artist_collection", "source_collection") @@ -48,118 +79,38 @@ class Song(MainObject): "genre": None, "notes": FormattedText() } + """ UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") + """ + title: str = None, + unified_title: str = None, + isrc: str = None, + length: int = None, + tracksort: int = None, + genre: str = None, + source_list: List[Source] = None, + target_list: List[Target] = None, + lyrics_list: List[Lyrics] = None, + album_list: List['Album'] = None, + main_artist_list: List['Artist'] = None, + feature_artist_list: List['Artist'] = None, + notes: FormattedText = None, + """ + def __init_collections__(self) -> None: + self.album_collection.contain_given_in_attribute = { + "artist_collection": self.main_artist_collection, + } + self.album_collection.append_object_to_attribute = { + "song_collection": self, + } - STATIC_ATTRIBUTES = [ - StaticAttribute(name="title", weight=.5), - StaticAttribute(name="unified_title", weight=.3), - StaticAttribute(name="isrc", weight=1), - StaticAttribute(name="length"), - StaticAttribute(name="tracksort", default_value=0), - StaticAttribute(name="genre"), - StaticAttribute(name="notes", default_value=FormattedText()), - - StaticAttribute(name="source_collection", is_collection=True), - StaticAttribute(name="lyrics_collection", is_collection=True), - StaticAttribute(name="album_collection", is_collection=True, is_upwards_collection=True), - StaticAttribute(name="main_artist_collection", is_collection=True, is_upwards_collection=True), - StaticAttribute(name="feature_artist_collection", is_collection=True, is_upwards_collection=True) - ] - - def __init__( - self, - _id: int = None, - dynamic: bool = False, - title: str = None, - unified_title: str = None, - isrc: str = None, - length: int = None, - tracksort: int = None, - genre: str = None, - source_list: List[Source] = None, - target_list: List[Target] = None, - lyrics_list: List[Lyrics] = None, - album_list: List['Album'] = None, - main_artist_list: List['Artist'] = None, - feature_artist_list: List['Artist'] = None, - notes: FormattedText = None, - **kwargs - ) -> None: - super().__init__(_id=_id, dynamic=dynamic, **kwargs) - # attributes - self.title: str = title - self.unified_title: str = unified_title - if unified_title is None and title is not None: - self.unified_title = unify(title) - - self.isrc: str = isrc - self.length: int = length - self.tracksort: int = tracksort or 0 - self.genre: str = genre - self.notes: FormattedText = notes or FormattedText() - - self.source_collection: SourceCollection = SourceCollection(source_list) - self.target_collection: Collection[Target] = Collection(data=target_list) - self.lyrics_collection: Collection[Lyrics] = Collection(data=lyrics_list) - - # main_artist_collection = album.artist collection - self.main_artist_collection: Collection[Artist] = Collection(data=[]) - - # this album_collection equals no collection - self.album_collection: Collection[Album] = Collection(data=album_list, - contain_given_in_attribute={ - "artist_collection": self.main_artist_collection - }, append_object_to_attribute={ - "song_collection": self - }) - - self.main_artist_collection.contain_given_in_attribute = {"main_album_collection": self.album_collection} - self.main_artist_collection.extend(main_artist_list) - - self.feature_artist_collection: Collection[Artist] = Collection( - data=feature_artist_list, - append_object_to_attribute={ - "feature_song_collection": self - } - ) - - def _build_recursive_structures(self, build_version: int, merge: bool): - if build_version == self.build_version: - return - self.build_version = build_version - - album: Album - for album in self.album_collection: - album.song_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - album._build_recursive_structures(build_version=build_version, merge=merge) - - artist: Artist - for artist in self.feature_artist_collection: - artist.feature_song_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - artist._build_recursive_structures(build_version=build_version, merge=merge) - - for artist in self.main_artist_collection: - for album in self.album_collection: - artist.main_album_collection.append(album, merge_on_conflict=merge, merge_into_existing=False) - artist._build_recursive_structures(build_version=build_version, merge=merge) - - def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): - if object_type is Song: - return - - if object_type is Lyrics: - self.lyrics_collection.extend(object_list) - return - - if object_type is Artist: - self.main_artist_collection.extend(object_list) - return - - if object_type is Album: - self.album_collection.extend(object_list) - return - + self.main_artist_collection.contain_given_in_attribute = { + "main_album_collection": self.album_collection + } + self.feature_artist_collection.append_object_to_attribute = { + "feature_song_collection": self + } @property def indexing_values(self) -> List[Tuple[str, object]]: @@ -245,7 +196,7 @@ All objects dependent on Album """ -class Album(MainObject): +class Album(Base): COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection") SIMPLE_STRING_ATTRIBUTES = { "title": None, @@ -259,6 +210,16 @@ class Album(MainObject): "notes": FormattedText() } + title: str + unified_title: str + album_status: str + album_type: AlbumType + language: LanguageSelector + + _default_factories = { + "album_type": AlbumType.OTHER + } + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") @@ -298,7 +259,7 @@ class Album(MainObject): notes: FormattedText = None, **kwargs ) -> None: - MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) + Base.__init__(self, _id=_id, dynamic=dynamic, **kwargs) self.title: str = title self.unified_title: str = unified_title @@ -512,7 +473,7 @@ All objects dependent on Artist """ -class Artist(MainObject): +class Artist(Base): COLLECTION_STRING_ATTRIBUTES = ( "feature_song_collection", "main_album_collection", @@ -570,7 +531,7 @@ class Artist(MainObject): unformated_location: str = None, **kwargs ): - MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) + Base.__init__(self, _id=_id, dynamic=dynamic, **kwargs) self.name: str = name self.unified_name: str = unified_name @@ -806,7 +767,7 @@ Label """ -class Label(MainObject): +class Label(Base): COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection") SIMPLE_STRING_ATTRIBUTES = { "name": None, @@ -837,7 +798,7 @@ class Label(MainObject): source_list: List[Source] = None, **kwargs ): - MainObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) + Base.__init__(self, _id=_id, dynamic=dynamic, **kwargs) self.name: str = name self.unified_name: str = unified_name diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index a4e489a..4fb1e40 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -121,7 +121,8 @@ class Source(DatabaseObject): class SourceCollection(Collection): - def __init__(self, source_list: List[Source]): + def __init__(self, source_list: List[Source] = None): + source_list = source_list if source_list is not None else [] self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list) super().__init__(data=source_list) diff --git a/src/music_kraken/utils/exception/objects.py b/src/music_kraken/utils/exception/objects.py new file mode 100644 index 0000000..ff3026d --- /dev/null +++ b/src/music_kraken/utils/exception/objects.py @@ -0,0 +1,10 @@ +class ObjectException(Exception): + pass + + +class IsDynamicException(Exception): + """ + Gets raised, if a dynamic data object tries to perform an action, + which does not make sense for a dynamic object. + """ + pass diff --git a/src/music_kraken/utils/support_classes/hacking.py b/src/music_kraken/utils/support_classes/hacking.py index abcf67f..1125768 100644 --- a/src/music_kraken/utils/support_classes/hacking.py +++ b/src/music_kraken/utils/support_classes/hacking.py @@ -1,7 +1,8 @@ +import weakref from types import FunctionType from functools import wraps -from typing import Dict +from typing import Dict, Set class Lake: def __init__(self): @@ -17,15 +18,34 @@ class Lake: return self.id_to_object[_id] except KeyError: self.add(db_object) - return db_object + return db_object def add(self, db_object: object): self.id_to_object[id(db_object)] = db_object def override(self, to_override: object, new_db_object: object): - self.redirects[id(to_override)] = id(new_db_object) - if id(to_override) in self.id_to_object: - del self.id_to_object[id(to_override)] + _id = id(to_override) + while _id in self.redirects: + _id = self.redirects[_id] + + if id(new_db_object) in self.id_to_object: + print("!!!!!") + + self.add(new_db_object) + self.redirects[_id] = id(new_db_object) + # if _id in self.id_to_object: + # del self.id_to_object[_id] + + def is_same(self, __object: object, other: object) -> bool: + _self_id = id(__object) + while _self_id in self.redirects: + _self_id = self.redirects[_self_id] + + _other_id = id(other) + while _other_id in self.redirects: + _other_id = self.redirects[_other_id] + + return _self_id == _other_id lake = Lake() @@ -35,11 +55,20 @@ def wrapper(method): @wraps(method) def wrapped(*args, **kwargs): return method(*(lake.get_real_object(args[0]), *args[1:]), **kwargs) + return wrapped - class BaseClass: + def __new__(cls, *args, **kwargs): + instance = cls(*args, **kwargs) + print("new") + lake.add(instance) + return instance + + def __eq__(self, other): + return lake.is_same(self, other) + def _risky_merge(self, to_replace): lake.override(to_replace, self) @@ -49,17 +78,27 @@ class MetaClass(type): bases = (*bases, BaseClass) newClassDict = {} + ignore_functions: Set[str] = {"__new__", "__init__"} + for attributeName, attribute in classDict.items(): - if isinstance(attribute, FunctionType) and attributeName not in ("__new__", "__init__"): + if isinstance(attribute, FunctionType) and (attributeName not in ignore_functions): + """ + The funktion new and init shouldn't be accounted for because we can assume the class is + independent on initialization. + """ attribute = wrapper(attribute) + newClassDict[attributeName] = attribute - for key, value in object.__dict__.items( ): - if hasattr( value, '__call__' ) and value not in newClassDict and key not in ("__new__", "__init__"): + print() + + for key, value in object.__dict__.items(): + # hasattr( value, '__call__' ) and + if hasattr(value, '__call__') and value not in newClassDict and key not in ("__new__", "__init__"): newClassDict[key] = wrapper(value) new_instance = type.__new__(meta, classname, bases, newClassDict) lake.add(new_instance) - return new_instance \ No newline at end of file + return new_instance From 22b32b0c50ecbde08ee0817e45a4301645c86bdf Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 19 Dec 2023 16:10:02 +0100 Subject: [PATCH 034/104] feat: implemented song --- src/music_kraken/objects/song.py | 107 ++++++++----------------------- 1 file changed, 25 insertions(+), 82 deletions(-) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 0af1489..a74e261 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -20,6 +20,7 @@ from .option import Options from .parents import DatabaseObject, StaticAttribute from .source import Source, SourceCollection from .target import Target +from .country import Language, Country from ..utils.string_processing import unify from .parents import OuterProxy as Base @@ -35,11 +36,6 @@ OPTION_STRING_DELIMITER = " | " class Song(Base): - """ - Class representing a song object, with attributes id, mb_id, title, album_name, isrc, length, - tracksort, genre, source_list, target, lyrics_list, album, main_artist_list, and feature_artist_list. - """ - title: str unified_title: str isrc: str @@ -212,91 +208,33 @@ class Album(Base): title: str unified_title: str - album_status: str + album_status: AlbumStatus album_type: AlbumType - language: LanguageSelector + language: Language + date: ID3Timestamp + barcode: str + albumsort: int + notes: FormattedText + + source_collection: SourceCollection + artist_collection: Collection[Artist] + song_collection: Collection[Song] + label_collection: Collection[Label] _default_factories = { - "album_type": AlbumType.OTHER + "album_type": lambda: AlbumType.OTHER, + "language": lambda: Language.by_alpha_2("en"), + "date": ID3Timestamp, + "notes": FormattedText, } DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") - STATIC_ATTRIBUTES = [ - StaticAttribute(name="title", weight=.5), - StaticAttribute(name="unified_title", weight=.3), - StaticAttribute(name="language"), - StaticAttribute(name="barcode", weight=1), - StaticAttribute(name="albumsort"), - StaticAttribute(name="album_status"), - StaticAttribute(name="album_type", default_value=AlbumType.OTHER), - StaticAttribute(name="date", default_value=ID3Timestamp()), - StaticAttribute(name="notes", default_value=FormattedText()), - - StaticAttribute(name="source_collection", is_collection=True), - StaticAttribute(name="song_collection", is_collection=True, is_downwards_collection=True), - StaticAttribute(name="artist_collection", is_collection=True, is_upwards_collection=True), - StaticAttribute(name="label_collection", is_collection=True, is_upwards_collection=True), - ] - - def __init__( - self, - _id: int = None, - title: str = None, - unified_title: str = None, - language: pycountry.Languages = None, - date: ID3Timestamp = None, - barcode: str = None, - albumsort: int = None, - dynamic: bool = False, - source_list: List[Source] = None, - artist_list: List['Artist'] = None, - song_list: List[Song] = None, - album_status: AlbumStatus = None, - album_type: AlbumType = None, - label_list: List['Label'] = None, - notes: FormattedText = None, - **kwargs - ) -> None: - Base.__init__(self, _id=_id, dynamic=dynamic, **kwargs) - - self.title: str = title - self.unified_title: str = unified_title - if unified_title is None and title is not None: - self.unified_title = unify(title) - - self.album_status: AlbumStatus = album_status - self.album_type: AlbumType = album_type or AlbumType.OTHER - self.language: pycountry.Languages = language - self.date: ID3Timestamp = date or ID3Timestamp() - - """ - TODO - find out the id3 tag for barcode and implement it - maybe look at how mutagen does it with easy_id3 - """ - self.barcode: str = barcode - """ - TODO - implement a function in the Artist class, - to set albumsort with help of the release year - """ - self.albumsort: Optional[int] = albumsort - self.notes = notes or FormattedText() - - self.source_collection: SourceCollection = SourceCollection(source_list) - - self.artist_collection: Collection[Artist] = Collection(data=artist_list) - - self.song_collection: Collection[Song] = Collection( - data=song_list, - contain_attribute_in_given={ - "main_artist_collection": self.artist_collection - } - ) - - self.label_collection: Collection[Label] = Collection(data=label_list) + def __init_collections__(self): + self.song_collection.contain_attribute_in_given = { + "main_artist_collection": self.artist_collection + } def _build_recursive_structures(self, build_version: int, merge: bool): if build_version == self.build_version: @@ -345,6 +283,11 @@ class Album(Base): @property def metadata(self) -> Metadata: + """ + TODO + - barcode + :return: + """ return Metadata({ id3Mapping.ALBUM: [self.title], id3Mapping.COPYRIGHT: [self.copyright], From a2a229735405a3cc8ae5229bf834a8d3e98b6d3c Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Tue, 19 Dec 2023 22:11:46 +0100 Subject: [PATCH 035/104] feat: fixed bugs --- .../inspectionProfiles/profiles_settings.xml | 1 + .idea/misc.xml | 3 + src/create_custom_objects.py | 2 - src/music_kraken/objects/__init__.py | 4 +- src/music_kraken/objects/collection.py | 8 +- src/music_kraken/objects/contact.py | 7 +- src/music_kraken/objects/country.py | 4 +- src/music_kraken/objects/lyrics.py | 6 +- src/music_kraken/objects/parents.py | 293 ++++------------- src/music_kraken/objects/song.py | 296 +++++------------- src/music_kraken/objects/source.py | 29 +- src/music_kraken/objects/target.py | 4 +- .../utils/support_classes/query.py | 2 +- 13 files changed, 174 insertions(+), 485 deletions(-) diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml index 105ce2d..dd4c951 100644 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -1,5 +1,6 @@ + diff --git a/.idea/misc.xml b/.idea/misc.xml index df5fc58..878f755 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -4,4 +4,7 @@ + + \ No newline at end of file diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 49d58a9..78adc96 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -25,7 +25,6 @@ print(other_song.__dict__) print(song) -""" only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], @@ -126,7 +125,6 @@ for _id, _object in objects_by_id.items(): print(only_smile) """ -""" c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) c11 = Collection([Song(title="wow how ultra subby", isrc="hiii")]) diff --git a/src/music_kraken/objects/__init__.py b/src/music_kraken/objects/__init__.py index bdca840..5596a2c 100644 --- a/src/music_kraken/objects/__init__.py +++ b/src/music_kraken/objects/__init__.py @@ -1,5 +1,5 @@ +from typing_extensions import TypeVar from .option import Options -from .parents import DatabaseObject from .metadata import Metadata, Mapping as ID3Mapping, ID3Timestamp @@ -19,3 +19,5 @@ from .collection import Collection from .country import Country from .contact import Contact + +from .parents import OuterProxy as DatabaseObject diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index de98f47..a16d64f 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -54,6 +54,7 @@ class Collection(Generic[T]): self._indexed_values[name].add(value) self._indexed_to_objects[value].append(__object) + print(from_map) if not from_map: for attribute, new_object in self.contain_given_in_attribute.items(): __object.__getattribute__(attribute).contain_collection_inside(new_object) @@ -143,6 +144,7 @@ class Collection(Generic[T]): 2. merge into existing object 3. remap existing object """ + self = self.__self__ if __object.id in self._contains_ids: return @@ -233,7 +235,7 @@ class Collection(Generic[T]): # now the ugly part # replace all refs of the other element with this one - self._risky_merge(equal_collection) + self = self._risky_merge(equal_collection) def contain_collection_inside(self, sub_collection: "Collection"): """ @@ -253,6 +255,10 @@ class Collection(Generic[T]): def __len__(self) -> int: return len(self._data) + sum(len(collection) for collection in self.children) + @property + def empty(self) -> bool: + return self.__len__() == 0 + def __iter__(self) -> Iterator[T]: for element in self._data: yield element diff --git a/src/music_kraken/objects/contact.py b/src/music_kraken/objects/contact.py index 2041297..e2e024f 100644 --- a/src/music_kraken/objects/contact.py +++ b/src/music_kraken/objects/contact.py @@ -1,10 +1,10 @@ from typing import Optional, List, Tuple from ..utils.enums.contact import ContactMethod -from .parents import DatabaseObject +from .parents import OuterProxy -class Contact(DatabaseObject): +class Contact(OuterProxy): COLLECTION_STRING_ATTRIBUTES = tuple() SIMPLE_STRING_ATTRIBUTES = { "contact_method": None, @@ -18,7 +18,8 @@ class Contact(DatabaseObject): ('value', self.value), ] - def __init__(self, contact_method: ContactMethod, value: str) -> None: + def __init__(self, contact_method: ContactMethod, value: str, **kwargs) -> None: + super().__init__(**kwargs) self.contact_method: ContactMethod = contact_method self.value: str = value diff --git a/src/music_kraken/objects/country.py b/src/music_kraken/objects/country.py index c9aeaa3..3b60bf5 100644 --- a/src/music_kraken/objects/country.py +++ b/src/music_kraken/objects/country.py @@ -72,7 +72,6 @@ class Language: alpha_2: str alpha_3: str name: str - numeric: int @classmethod def by_pycountry(cls, language) -> Language: @@ -82,12 +81,11 @@ class Language: alpha_2=alpha_2, alpha_3=language.alpha_3, name=language.name, - numeric=language.numeric, ) @classmethod def by_alpha_2(cls, alpha_2: str) -> Language: - return cls.by_pycountry(pycountry.languages.get(alpha_2=alpha_2)) + return cls.by_pycountry(pycountry.languages.get(alpha_2=alpha_2.upper())) @classmethod def by_alpha_3(cls, alpha_3: str) -> Language: diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index bcd1c1e..f35e186 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -2,12 +2,12 @@ from typing import List from collections import defaultdict import pycountry -from .parents import DatabaseObject +from .parents import OuterProxy from .source import Source, SourceCollection from .formatted_text import FormattedText -class Lyrics(DatabaseObject): +class Lyrics(OuterProxy): COLLECTION_STRING_ATTRIBUTES = ("source_collection",) SIMPLE_STRING_ATTRIBUTES = { "text": FormattedText(), @@ -23,7 +23,7 @@ class Lyrics(DatabaseObject): source_list: List[Source] = None, **kwargs ) -> None: - DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) + super().__init__(_id=_id, dynamic=dynamic, **kwargs) self.text: FormattedText = text or FormattedText() self.language: pycountry.Languages = language diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 13a90f6..1ed5379 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -1,31 +1,17 @@ from __future__ import annotations + import random from collections import defaultdict -from typing import Optional, Dict, Tuple, List, Type, Generic, TypeVar, Any -from dataclasses import dataclass +from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar from .metadata import Metadata -from .option import Options +from ..utils.config import logging_settings from ..utils.shared import HIGHEST_ID -from ..utils.config import main_settings, logging_settings from ..utils.support_classes.hacking import MetaClass -from ..utils.exception.objects import IsDynamicException LOGGER = logging_settings["object_logger"] -P = TypeVar('P') - - -@dataclass -class StaticAttribute(Generic[P]): - name: str - - default_value: Any = None - weight: float = 0 - - is_collection: bool = False - is_downwards_collection: bool = False - is_upwards_collection: bool = False +P = TypeVar("P", bound="OuterProxy") class InnerData: @@ -68,13 +54,43 @@ class InnerData: self.__setattr__(key, value) +class Meta(type): + def __new__(meta, classname, bases, classDict): + for key, value in classDict.items(): + if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): + continue -class OuterProxy: + if hasattr(value, "__call__") or isinstance(value, property) or isinstance(value, classmethod): + continue + + print("hi", type(value)) + print(key, value) + + new_instance = type.__new__(meta, classname, bases, classDict) + + return new_instance + + + +class OuterProxy(metaclass=Meta): """ Wraps the inner data, and provides apis, to naturally access those values. """ - _default_factories: dict + _default_factories: dict = {} + + def __new__(cls, *args, **kwargs): + for key, value in cls.__dict__["__annotations__"].items(): + if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): + continue + + if key in cls._default_factories: + continue + + cls._default_factories[key] = lambda: None + + return super().__new__(cls) + def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): _automatic_id: bool = False @@ -91,6 +107,17 @@ class OuterProxy: kwargs["id"] = _id kwargs["dynamic"] = dynamic + key: str + for key, value in super().__getattribute__("__dict__").items(): + if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): + continue + + if hasattr(value, "__call__") or isinstance(value, property) or isinstance(value, classmethod): + continue + + print(type(value)) + print(key, value) + for name, factory in type(self)._default_factories.items(): if name not in kwargs: kwargs[name] = factory() @@ -160,222 +187,32 @@ class OuterProxy: self._inner.__merge__(__other._inner, override=override) __other._inner = self._inner - -class Attribute(Generic[P]): - def __init__(self, database_object: "DatabaseObject", static_attribute: StaticAttribute) -> None: - self.database_object: DatabaseObject = database_object - self.static_attribute: StaticAttribute = static_attribute + @property + def metadata(self) -> Metadata: + """ + This is an interface. + :return: + """ + return Metadata() @property - def name(self) -> str: - return self.static_attribute.name - - def get(self) -> P: - return self.database_object.__getattribute__(self.name) - - def set(self, value: P): - self.database_object.__setattr__(self.name, value) - - -class DatabaseObject(metaclass=MetaClass): - COLLECTION_STRING_ATTRIBUTES: tuple = tuple() - SIMPLE_STRING_ATTRIBUTES: dict = dict() - - # contains all collection attributes, which describe something "smaller" - # e.g. album has songs, but not artist. - DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: tuple = tuple() - UPWARDS_COLLECTION_STRING_ATTRIBUTES: tuple = tuple() - - STATIC_ATTRIBUTES: List[StaticAttribute] = list() - - def __init__(self, _id: int = None, dynamic: bool = False, **kwargs) -> None: - self.automatic_id: bool = False - - if _id is None and not dynamic: - """ - generates a random integer id - 64 bit integer, but this is defined in shared.py in ID_BITS - the range is defined in the Tuple ID_RANGE - """ - _id = random.randint(0, HIGHEST_ID) - self.automatic_id = True - # LOGGER.debug(f"Id for {type(self).__name__} isn't set. Setting to {_id}") - - self._attributes: List[Attribute] = [] - self._simple_attribute_list: List[Attribute] = [] - self._collection_attributes: List[Attribute] = [] - self._downwards_collection_attributes: List[Attribute] = [] - self._upwards_collection_attributes: List[Attribute] = [] - - for static_attribute in self.STATIC_ATTRIBUTES: - attribute: Attribute = Attribute(self, static_attribute) - self._attributes.append(attribute) - - if static_attribute.is_collection: - if static_attribute.is_collection: - self._collection_attributes.append(attribute) - if static_attribute.is_upwards_collection: - self._upwards_collection_attributes.append(attribute) - if static_attribute.is_downwards_collection: - self._downwards_collection_attributes.append(attribute) - else: - self._simple_attribute_list.append(attribute) - - # The id can only be None, if the object is dynamic (self.dynamic = True) - self.id: Optional[int] = _id - - self.dynamic = dynamic - self.build_version = -1 - - super().__init__() - - @property - def upwards_collection(self) -> Collection: - for attribute in self._upwards_collection_attributes: - yield attribute.get() - - @property - def downwards_collection(self) -> Collection: - for attribute in self._downwards_collection_attributes: - yield attribute.get() - - @property - def all_collections(self) -> Collection: - for attribute in self._collection_attributes: - yield attribute.get() - - def __hash__(self): - if self.dynamic: - raise TypeError("Dynamic DatabaseObjects are unhashable.") - return self.id - - def __deep_eq__(self, other) -> bool: - if not isinstance(other, type(self)): - return False - - return super().__eq__(other) - - def __eq__(self, other) -> bool: - if not isinstance(other, type(self)): - return False - - if super().__eq__(other): - return True - - # add the checks for dynamic, to not throw an exception - if not self.dynamic and not other.dynamic and self.id == other.id: - return True - - temp_attribute_map: Dict[str, set] = defaultdict(set) - - # building map with sets - for name, value in self.indexing_values: - temp_attribute_map[name].add(value) - - # check against the attributes of the other object - for name, other_value in other.indexing_values: - if other_value in temp_attribute_map[name]: - return True - - return False + def options(self) -> List[P]: + return [self] @property def indexing_values(self) -> List[Tuple[str, object]]: """ - returns a map of the name and values of the attributes. - This helps in comparing classes for equal data (eg. being the same song but different attributes) + This is an interface. + It is supposed to return a map of the name and values for all important attributes. + This helps in comparing classes for equal data (e.g. being the same song but different attributes) + + TODO + Rewrite this approach into a approach, that is centered around statistics, and not binaries. + Instead of: one of this matches, it is the same + This: If enough attributes are similar enough, they are the same Returns: List[Tuple[str, object]]: the first element in the tuple is the name of the attribute, the second the value. """ - return list() - - def merge(self, other, override: bool = False, replace_all_refs: bool = False): - print("merge") - - if other is None: - return - - if self.__deep_eq__(other): - return - - if not isinstance(other, type(self)): - LOGGER.warning(f"can't merge \"{type(other)}\" into \"{type(self)}\"") - return - - for collection in self._collection_attributes: - if hasattr(self, collection.name) and hasattr(other, collection.name): - if collection.get() is not getattr(other, collection.name): - pass - collection.get().extend(getattr(other, collection.name)) - - for simple_attribute, default_value in type(self).SIMPLE_STRING_ATTRIBUTES.items(): - if getattr(other, simple_attribute) == default_value: - continue - - if override or getattr(self, simple_attribute) == default_value: - setattr(self, simple_attribute, getattr(other, simple_attribute)) - - if replace_all_refs: - self._risky_merge(other) - - def strip_details(self): - for collection in type(self).DOWNWARDS_COLLECTION_STRING_ATTRIBUTES: - getattr(self, collection).clear() - - @property - def metadata(self) -> Metadata: - return Metadata() - - @property - def options(self) -> List["DatabaseObject"]: - return [self] - - @property - def option_string(self) -> str: - return self.__repr__() - - def _build_recursive_structures(self, build_version: int, merge: False): - pass - - def compile(self, merge_into: bool = False): - """ - compiles the recursive structures, - and does depending on the object some other stuff. - - no need to override if only the recursive structure should be build. - override self.build_recursive_structures() instead - """ - - self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into) - - def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): - pass - - def add_list_of_other_objects(self, object_list: List["DatabaseObject"]): - d: Dict[Type[DatabaseObject], List[DatabaseObject]] = defaultdict(list) - - for db_object in object_list: - d[type(db_object)].append(db_object) - - for key, value in d.items(): - self._add_other_db_objects(key, value) - - -class MainObject(DatabaseObject): - """ - This is the parent class for all "main" data objects: - - Song - - Album - - Artist - - Label - - It has all the functionality of the "DatabaseObject" (it inherits from said class) - but also some added functions as well. - """ - - def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): - DatabaseObject.__init__(self, _id=_id, dynamic=dynamic, **kwargs) - - self.additional_arguments: dict = kwargs + return [] diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index a74e261..2067648 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -2,7 +2,7 @@ from __future__ import annotations import random from collections import defaultdict -from typing import List, Optional, Dict, Tuple, Type +from typing import List, Optional, Dict, Tuple, Type, Union import pycountry @@ -17,7 +17,7 @@ from .metadata import ( Metadata ) from .option import Options -from .parents import DatabaseObject, StaticAttribute +from .parents import OuterProxy, P from .source import Source, SourceCollection from .target import Target from .country import Language, Country @@ -59,7 +59,7 @@ class Song(Base): "main_artist_collection": Collection, "album_collection": Collection, - "feature_artist_collection": Collection + "feature_artist_collection": Collection, } """ @@ -161,14 +161,10 @@ class Song(Base): f"by Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.main_artist_collection)}) " \ f"feat. Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.feature_artist_collection)})" - @property - def options(self) -> List[DatabaseObject]: - """ - Return a list of related objects including the song object, album object, main artist objects, and - feature artist objects. - :return: a list of objects that are related to the Song object - """ + + @property + def options(self) -> List[P]: options = self.main_artist_collection.shallow_list options.extend(self.feature_artist_collection) options.extend(self.album_collection) @@ -226,6 +222,11 @@ class Album(Base): "language": lambda: Language.by_alpha_2("en"), "date": ID3Timestamp, "notes": FormattedText, + + "source_collection": SourceCollection, + "artist_collection": Collection, + "song_collection": Collection, + "label_collection": Collection, } DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) @@ -236,42 +237,6 @@ class Album(Base): "main_artist_collection": self.artist_collection } - def _build_recursive_structures(self, build_version: int, merge: bool): - if build_version == self.build_version: - return - self.build_version = build_version - - song: Song - for song in self.song_collection: - song.album_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - song._build_recursive_structures(build_version=build_version, merge=merge) - - artist: Artist - for artist in self.artist_collection: - artist.main_album_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - artist._build_recursive_structures(build_version=build_version, merge=merge) - - label: Label - for label in self.label_collection: - label.album_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - label._build_recursive_structures(build_version=build_version, merge=merge) - - def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): - if object_type is Song: - self.song_collection.extend(object_list) - return - - if object_type is Artist: - self.artist_collection.extend(object_list) - return - - if object_type is Album: - return - - if object_type is Label: - self.label_collection.extend(object_list) - return - @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -311,7 +276,7 @@ class Album(Base): f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" @property - def options(self) -> List[DatabaseObject]: + def options(self) -> List[P]: options = self.artist_collection.shallow_list options.append(self) options.extend(self.song_collection) @@ -434,122 +399,50 @@ class Artist(Base): "unformated_location": None, } + name: str + unified_name: str + country: Country + formed_in: ID3Timestamp + notes: FormattedText + lyrical_themes: List[str] + + general_genre: str + unformated_location: str + + source_collection: SourceCollection + contact_collection: Collection[Contact] + + feature_song_collection: Collection[Song] + main_album_collection: Collection[Album] + label_collection: Collection[Label] + + _default_factories = { + "formed_in": ID3Timestamp, + "notes": FormattedText, + "lyrical_themes": list, + "general_genre": lambda: "", + "source_collection": SourceCollection, + "feature_song_collection": Collection, + "main_album_collection": Collection, + "contact_collection": Collection, + "label_collection": Collection, + } + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", ) + def __init_collections__(self): + self.feature_song_collection.append_object_to_attribute = { + "feature_artist_collection": self + } - STATIC_ATTRIBUTES = [ - StaticAttribute(name="name", weight=.5), - StaticAttribute(name="unified_name", weight=.3), - StaticAttribute(name="country"), - StaticAttribute(name="formed_in", default_value=ID3Timestamp()), - StaticAttribute(name="lyrical_themes", default_value=[]), - StaticAttribute(name="general_genre", default_value=""), - StaticAttribute(name="notes", default_value=FormattedText()), - StaticAttribute(name="unformated_location"), - - StaticAttribute(name="source_collection", is_collection=True), - StaticAttribute(name="contact_collection", is_collection=True), - StaticAttribute(name="feature_song_collection", is_collection=True, is_downwards_collection=True), - StaticAttribute(name="main_album_collection", is_collection=True, is_downwards_collection=True), - StaticAttribute(name="label_collection", is_collection=True, is_upwards_collection=True), - ] - - def __init__( - self, - _id: int = None, - dynamic: bool = False, - name: str = None, - unified_name: str = None, - source_list: List[Source] = None, - feature_song_list: List[Song] = None, - main_album_list: List[Album] = None, - contact_list: List[Contact] = None, - notes: FormattedText = None, - lyrical_themes: List[str] = None, - general_genre: str = "", - country: CountryTyping = None, - formed_in: ID3Timestamp = None, - label_list: List['Label'] = None, - unformated_location: str = None, - **kwargs - ): - Base.__init__(self, _id=_id, dynamic=dynamic, **kwargs) - - self.name: str = name - self.unified_name: str = unified_name - if unified_name is None and name is not None: - self.unified_name = unify(name) - - """ - TODO implement album type and notes - """ - self.country: CountryTyping = country - self.formed_in: ID3Timestamp = formed_in - """ - notes, general genre, lyrics themes are attributes - which are meant to only use in outputs to describe the object - i mean do as you want but there is no strict rule about em so good luck - """ - self.notes: FormattedText = notes or FormattedText() - - self.lyrical_themes: List[str] = lyrical_themes or [] - self.general_genre = general_genre - self.unformated_location: Optional[str] = unformated_location - - self.source_collection: SourceCollection = SourceCollection(source_list) - self.contact_collection: Collection[Label] = Collection(data=contact_list) - - self.feature_song_collection: Collection[Song] = Collection( - data=feature_song_list, - append_object_to_attribute={ - "feature_artist_collection": self - } - ) + self.main_album_collection.append_object_to_attribute = { + "artist_collection": self + } - self.main_album_collection: Collection[Album] = Collection( - data=main_album_list, - append_object_to_attribute={ - "artist_collection": self - } - ) - - self.label_collection: Collection[Label] = Collection( - data=label_list, - append_object_to_attribute={ - "current_artist_collection": self - } - ) - - - def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): - if object_type is Song: - # this doesn't really make sense - # self.feature_song_collection.extend(object_list) - return - - if object_type is Artist: - return - - if object_type is Album: - self.main_album_collection.extend(object_list) - return - - if object_type is Label: - self.label_collection.extend(object_list) - return - - def compile(self, merge_into: bool = False): - """ - compiles the recursive structures, - and does depending on the object some other stuff. - - no need to override if only the recursive structure should be built. - override self.build_recursive_structures() instead - """ - - self.update_albumsort() - self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into) + self.label_collection.append_object_to_attribute = { + "current_artist_collection": self + } def update_albumsort(self): """ @@ -564,7 +457,7 @@ class Artist(Base): if len(self.main_album_collection) <= 0: return - type_section: Dict[AlbumType] = defaultdict(lambda: 2, { + type_section: Dict[AlbumType, int] = defaultdict(lambda: 2, { AlbumType.OTHER: 0, # if I don't know it, I add it to the first section AlbumType.STUDIO_ALBUM: 0, AlbumType.EP: 0, @@ -608,27 +501,6 @@ class Artist(Base): # replace the old collection with the new one self.main_album_collection: Collection = Collection(data=album_list, element_type=Album) - - def _build_recursive_structures(self, build_version: int, merge: False): - if build_version == self.build_version: - return - self.build_version = build_version - - song: Song - for song in self.feature_song_collection: - song.feature_artist_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - song._build_recursive_structures(build_version=build_version, merge=merge) - - album: Album - for album in self.main_album_collection: - album.artist_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - album._build_recursive_structures(build_version=build_version, merge=merge) - - label: Label - for label in self.label_collection: - label.current_artist_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - label._build_recursive_structures(build_version=build_version, merge=merge) - @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -664,16 +536,12 @@ class Artist(Base): f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" @property - def options(self) -> List[DatabaseObject]: + def options(self) -> List[P]: options = [self] options.extend(self.main_album_collection) options.extend(self.feature_song_collection) return options - @property - def country_string(self): - return self.country.alpha_3 - @property def feature_album(self) -> Album: return Album( @@ -712,22 +580,27 @@ Label class Label(Base): COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection") - SIMPLE_STRING_ATTRIBUTES = { - "name": None, - "unified_name": None, - "notes": FormattedText() - } + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = COLLECTION_STRING_ATTRIBUTES - STATIC_ATTRIBUTES = [ - StaticAttribute(name="name", weight=.5), - StaticAttribute(name="unified_name", weight=.3), - StaticAttribute(name="notes", default_value=FormattedText()), + name: str + unified_name: str + notes: FormattedText - StaticAttribute(name="album_collection", is_collection=True, is_downwards_collection=True), - StaticAttribute(name="current_artist_collection", is_collection=True, is_downwards_collection=True), - ] + source_collection: SourceCollection + contact_collection: Collection[Contact] + + album_collection: Collection[Album] + current_artist_collection: Collection[Artist] + + _default_factories = { + "notes": FormattedText, + "album_collection": Collection, + "current_artist_collection": Collection, + "source_collection": SourceCollection, + "contact_collection": Collection + } def __init__( self, @@ -753,33 +626,6 @@ class Label(Base): self.album_collection: Collection[Album] = Collection(data=album_list, element_type=Album) self.current_artist_collection: Collection[Artist] = Collection(data=current_artist_list, element_type=Artist) - def _add_other_db_objects(self, object_type: Type["DatabaseObject"], object_list: List["DatabaseObject"]): - if object_type is Song: - return - - if object_type is Artist: - self.current_artist_collection.extend(object_list) - return - - if object_type is Album: - self.album_collection.extend(object_list) - return - - def _build_recursive_structures(self, build_version: int, merge: False): - if build_version == self.build_version: - return - self.build_version = build_version - - album: Album - for album in self.album_collection: - album.label_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - album._build_recursive_structures(build_version=build_version, merge=merge) - - artist: Artist - for artist in self.current_artist_collection: - artist.label_collection.append(self, merge_on_conflict=merge, merge_into_existing=False) - artist._build_recursive_structures(build_version=build_version, merge=merge) - @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -789,7 +635,7 @@ class Label(Base): ] @property - def options(self) -> List[DatabaseObject]: + def options(self) -> List[P]: options = [self] options.extend(self.current_artist_collection.shallow_list) options.extend(self.album_collection.shallow_list) diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 4fb1e40..58d4b53 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -7,11 +7,11 @@ from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings from .metadata import Mapping, Metadata -from .parents import DatabaseObject +from .parents import OuterProxy from .collection import Collection -class Source(DatabaseObject): +class Source(OuterProxy): """ create somehow like that ```python @@ -19,6 +19,13 @@ class Source(DatabaseObject): Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd") ``` """ + + page_enum: SourcePages + referer_page: SourcePages + + url: str + audio_url: str + COLLECTION_STRING_ATTRIBUTES = tuple() SIMPLE_STRING_ATTRIBUTES = { "page_enum": None, @@ -27,21 +34,11 @@ class Source(DatabaseObject): "audio_url": None } - def __init__( - self, - page_enum: SourcePages, - url: str = None, - id_: str = None, - referer_page: SourcePages = None, - adio_url: str = None - ) -> None: - DatabaseObject.__init__(self, id_=id_) + def __init__(self, page_enum: SourcePages, referer_page: SourcePages = None, **kwargs) -> None: + if referer_page is None: + referer_page = page_enum - self.page_enum = page_enum - self.referer_page = page_enum if referer_page is None else referer_page - - self.url = url - self.audio_url = adio_url + super().__init__(page_enum=page_enum, referer_page=referer_page, **kwargs) @classmethod def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]: diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index 4faee32..a5cc0dc 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -5,7 +5,7 @@ import logging import requests from tqdm import tqdm -from .parents import DatabaseObject +from .parents import OuterProxy from ..utils.config import main_settings, logging_settings from ..utils.string_processing import fit_to_file_system @@ -13,7 +13,7 @@ from ..utils.string_processing import fit_to_file_system LOGGER = logging.getLogger("target") -class Target(DatabaseObject): +class Target(OuterProxy): """ create somehow like that ```python diff --git a/src/music_kraken/utils/support_classes/query.py b/src/music_kraken/utils/support_classes/query.py index 239096b..d6d70c9 100644 --- a/src/music_kraken/utils/support_classes/query.py +++ b/src/music_kraken/utils/support_classes/query.py @@ -1,6 +1,6 @@ from typing import Optional, List -from ...objects import DatabaseObject, Artist, Album, Song +from ...objects import Artist, Album, Song, DatabaseObject class Query: def __init__( From f81521b014672d9616a019c3a9056ec5716f1243 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 20 Dec 2023 09:55:09 +0100 Subject: [PATCH 036/104] feat: completed the default factories --- src/create_custom_objects.py | 6 ++ .../objects/lint_default_factories.py | 66 +++++++++++++++++++ src/music_kraken/objects/lyrics.py | 9 +++ src/music_kraken/objects/parents.py | 44 +------------ src/music_kraken/objects/song.py | 24 ++++++- src/music_kraken/objects/source.py | 11 ++-- src/music_kraken/objects/target.py | 8 +++ 7 files changed, 117 insertions(+), 51 deletions(-) create mode 100644 src/music_kraken/objects/lint_default_factories.py diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index 78adc96..c8f0052 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -9,6 +9,10 @@ from music_kraken.objects import ( from music_kraken.objects.collection import Collection from music_kraken.utils.enums import SourcePages +from music_kraken.objects.lint_default_factories import lint + +lint() + song = Song(title="Sad Story", isrc="testTest") other_song = Song(title="hihi", genre="dsbm") @@ -24,7 +28,9 @@ print(other_song.__dict__) print(song) +print(type(song).__dict__["__annotations__"]) +exit() only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], diff --git a/src/music_kraken/objects/lint_default_factories.py b/src/music_kraken/objects/lint_default_factories.py new file mode 100644 index 0000000..9afad36 --- /dev/null +++ b/src/music_kraken/objects/lint_default_factories.py @@ -0,0 +1,66 @@ +from typing import List, TypeVar, Type + +from .country import Language +from .lyrics import Lyrics +from .parents import OuterProxy +from .song import Song, Album, Artist, Label +from .source import Source +from .target import Target + +T = TypeVar('T', bound=OuterProxy) +ALL_CLASSES: List[Type[T]] = [Song, Album, Artist, Label, Source, Target, Lyrics] + + +def print_lint_res(missing_values: dict): + print("_default_factories = {") + for key, value in missing_values.items(): + print(f'\t"{key}": {value},') + print("}") + + +def lint_type(cls: T): + missing_values: dict = {} + + for key, value in cls.__dict__["__annotations__"].items(): + if value is None: + continue + + if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): + continue + + if key in cls._default_factories: + continue + + factory = "lambda: None" + if isinstance(value, str): + if value == "SourceCollection": + factory = "SourceCollection" + elif "collection" in value.lower(): + factory = "Collection" + elif value.istitle(): + factory = value + else: + if value is Language: + factory = 'Language.by_alpha_2("en")' + else: + try: + value() + factory = value.__name__ + except TypeError: + pass + + missing_values[key] = factory + + if len(missing_values) > 0: + print(f"{cls.__name__}:") + print_lint_res(missing_values) + print() + else: + print(f"Everything is fine at {cls.__name__}") + + +def lint(): + for i in ALL_CLASSES: + lint_type(i) + + print() diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index f35e186..e070bbf 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -5,6 +5,7 @@ import pycountry from .parents import OuterProxy from .source import Source, SourceCollection from .formatted_text import FormattedText +from .country import Language class Lyrics(OuterProxy): @@ -13,6 +14,14 @@ class Lyrics(OuterProxy): "text": FormattedText(), "language": None } + + text: FormattedText + language: Language + + _default_factories = { + "text": FormattedText, + "language": Language.by_alpha_2("en"), + } def __init__( self, diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 1ed5379..e4334f2 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -54,44 +54,13 @@ class InnerData: self.__setattr__(key, value) -class Meta(type): - def __new__(meta, classname, bases, classDict): - for key, value in classDict.items(): - if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): - continue - - if hasattr(value, "__call__") or isinstance(value, property) or isinstance(value, classmethod): - continue - - print("hi", type(value)) - print(key, value) - - new_instance = type.__new__(meta, classname, bases, classDict) - - return new_instance - - - -class OuterProxy(metaclass=Meta): +class OuterProxy: """ Wraps the inner data, and provides apis, to naturally access those values. """ _default_factories: dict = {} - def __new__(cls, *args, **kwargs): - for key, value in cls.__dict__["__annotations__"].items(): - if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): - continue - - if key in cls._default_factories: - continue - - cls._default_factories[key] = lambda: None - - return super().__new__(cls) - - def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): _automatic_id: bool = False @@ -107,17 +76,6 @@ class OuterProxy(metaclass=Meta): kwargs["id"] = _id kwargs["dynamic"] = dynamic - key: str - for key, value in super().__getattribute__("__dict__").items(): - if (not key.islower()) or key.startswith("_") or (key.startswith("__") and key.endswith("__")): - continue - - if hasattr(value, "__call__") or isinstance(value, property) or isinstance(value, classmethod): - continue - - print(type(value)) - print(key, value) - for name, factory in type(self)._default_factories.items(): if name not in kwargs: kwargs[name] = factory() diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 2067648..c870150 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -60,6 +60,11 @@ class Song(Base): "main_artist_collection": Collection, "album_collection": Collection, "feature_artist_collection": Collection, + + "title": lambda: None, + "unified_title": lambda: None, + "isrc": lambda: None, + "genre": lambda: None, } """ @@ -199,7 +204,7 @@ class Album(Base): "date": ID3Timestamp(), "barcode": None, "albumsort": None, - "notes": FormattedText() + "notes": FormattedText(), } title: str @@ -227,6 +232,12 @@ class Album(Base): "artist_collection": Collection, "song_collection": Collection, "label_collection": Collection, + + "title": lambda: None, + "unified_title": lambda: None, + "album_status": lambda: None, + "barcode": lambda: None, + "albumsort": lambda: None, } DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) @@ -421,11 +432,17 @@ class Artist(Base): "notes": FormattedText, "lyrical_themes": list, "general_genre": lambda: "", + "source_collection": SourceCollection, "feature_song_collection": Collection, "main_album_collection": Collection, "contact_collection": Collection, "label_collection": Collection, + + "name": lambda: None, + "unified_name": lambda: None, + "country": Country, + "unformated_location": lambda: None, } DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") @@ -599,7 +616,10 @@ class Label(Base): "album_collection": Collection, "current_artist_collection": Collection, "source_collection": SourceCollection, - "contact_collection": Collection + "contact_collection": Collection, + + "name": lambda: None, + "unified_name": lambda: None, } def __init__( diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 58d4b53..2ba269a 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -26,12 +26,11 @@ class Source(OuterProxy): url: str audio_url: str - COLLECTION_STRING_ATTRIBUTES = tuple() - SIMPLE_STRING_ATTRIBUTES = { - "page_enum": None, - "url": None, - "referer_page": None, - "audio_url": None + _default_factories = { + "page_enum": lambda: None, + "referer_page": lambda: None, + "url": str, + "audio_url": str, } def __init__(self, page_enum: SourcePages, referer_page: SourcePages = None, **kwargs) -> None: diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index a5cc0dc..0d8bb25 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -22,6 +22,14 @@ class Target(OuterProxy): ``` """ + file: str + path: str + + _default_factories = { + "file": str, + "path": str, + } + SIMPLE_STRING_ATTRIBUTES = { "_file": None, "_path": None From 794732aceefb6c145e592a88fd8200318bc2a4b7 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 20 Dec 2023 10:00:39 +0100 Subject: [PATCH 037/104] fix: default factory raised type error --- src/create_custom_objects.py | 3 ++- src/music_kraken/objects/collection.py | 1 - src/music_kraken/objects/song.py | 19 +------------------ 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index c8f0052..a6b178a 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -13,6 +13,7 @@ from music_kraken.objects.lint_default_factories import lint lint() +""" song = Song(title="Sad Story", isrc="testTest") other_song = Song(title="hihi", genre="dsbm") @@ -29,8 +30,8 @@ print(other_song.__dict__) print(song) print(type(song).__dict__["__annotations__"]) +""" -exit() only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index a16d64f..a3bd8a4 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -144,7 +144,6 @@ class Collection(Generic[T]): 2. merge into existing object 3. remap existing object """ - self = self.__self__ if __object.id in self._contains_ids: return diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index c870150..348c70c 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -393,23 +393,6 @@ All objects dependent on Artist class Artist(Base): - COLLECTION_STRING_ATTRIBUTES = ( - "feature_song_collection", - "main_album_collection", - "label_collection", - "source_collection" - ) - SIMPLE_STRING_ATTRIBUTES = { - "name": None, - "unified_name": None, - "country": None, - "formed_in": ID3Timestamp(), - "notes": FormattedText(), - "lyrical_themes": [], - "general_genre": "", - "unformated_location": None, - } - name: str unified_name: str country: Country @@ -441,7 +424,7 @@ class Artist(Base): "name": lambda: None, "unified_name": lambda: None, - "country": Country, + "country": lambda: None, "unformated_location": lambda: None, } From 15841ee079a675b607a5ebc9eea7af44f94af6df Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 20 Dec 2023 11:02:38 +0100 Subject: [PATCH 038/104] feat: implemented dynamic stuff OMG AAA --- src/create_custom_objects.py | 25 +++++++++++------------ src/music_kraken/objects/collection.py | 12 +++++------ src/music_kraken/objects/parents.py | 28 +++++++++++++++++++++----- src/music_kraken/objects/song.py | 4 ++++ src/music_kraken/objects/source.py | 7 +++++-- 5 files changed, 49 insertions(+), 27 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index a6b178a..de950bb 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -52,7 +52,8 @@ only_smile = Artist( main_album_list=[ Album( title="Few words...", - source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], + source_list=[ + Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], song_list=[ Song(title="Everything will be fine"), Song(title="Only Smile"), @@ -68,7 +69,8 @@ only_smile = Artist( ), Album( title="Your best friend", - source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] + source_list=[ + Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] ) ] ), @@ -78,7 +80,8 @@ only_smile = Artist( main_album_list=[ Album( title="Few words...", - source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], + source_list=[ + Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/few-words")], song_list=[ Song(title="Everything will be fine"), Song(title="Only Smile"), @@ -94,7 +97,8 @@ only_smile = Artist( ), Album( title="Your best friend", - source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] + source_list=[ + Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/album/your-best-friend")] ) ] ) @@ -107,9 +111,9 @@ only_smile = Artist( ] ) - objects_by_id = {} + def add_to_objects_dump(db_obj: DatabaseObject): objects_by_id[db_obj.id] = db_obj @@ -122,15 +126,10 @@ def add_to_objects_dump(db_obj: DatabaseObject): add_to_objects_dump(only_smile) for _id, _object in objects_by_id.items(): - try: - print(_id, _object.title, sep=": ") - except AttributeError: - try: - print(_id, _object.name, sep=": ") - except AttributeError: - print(_id, _object, sep=": ") + print(_id, _object, sep=": ") print(only_smile) + """ c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) @@ -180,4 +179,4 @@ print("b: ", b) print(c.data) print(c._data) -""" \ No newline at end of file +""" diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index a3bd8a4..ab7642b 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -8,6 +8,8 @@ T = TypeVar('T', bound=OuterProxy) class Collection(Generic[T]): + __is_collection__ = True + _data: List[T] _indexed_values: Dict[str, set] @@ -54,7 +56,6 @@ class Collection(Generic[T]): self._indexed_values[name].add(value) self._indexed_to_objects[value].append(__object) - print(from_map) if not from_map: for attribute, new_object in self.contain_given_in_attribute.items(): __object.__getattribute__(attribute).contain_collection_inside(new_object) @@ -181,14 +182,11 @@ class Collection(Generic[T]): self._data.append(__object) def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): - if __object is None: - return - - if __object.id in self._contains_ids: + if __object is None or __object.id in self._contains_ids: return exists_in_collection = self._contained_in_sub(__object) - if len(exists_in_collection) and self is exists_in_collection[0]: + if len(exists_in_collection) > 0 and self is exists_in_collection[0]: # assuming that the object already is contained in the correct collections if not already_is_parent: self.merge_into_self(__object, from_map=from_map) @@ -259,5 +257,5 @@ class Collection(Generic[T]): return self.__len__() == 0 def __iter__(self) -> Iterator[T]: - for element in self._data: + for element in self.data: yield element diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index e4334f2..2619d33 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -1,7 +1,8 @@ from __future__ import annotations import random -from collections import defaultdict +from functools import lru_cache + from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar from .metadata import Metadata @@ -87,12 +88,11 @@ class OuterProxy: if isinstance(data_list, list) and name.endswith("_list"): collection_name = name.replace("_list", "_collection") - if collection_name not in self.__dict__: - continue - - collection = self.__getattribute__(collection_name) + collection = self._inner.__getattribute__(collection_name) collection.extend(data_list) + self._inner.__setattr__(collection_name, collection) + def __init_collections__(self): pass @@ -106,6 +106,9 @@ class OuterProxy: :return: """ + if __name.startswith("__"): + return super().__getattribute__(__name) + _inner: InnerData = super().__getattribute__("_inner") try: return _inner.__getattribute__(__name) @@ -174,3 +177,18 @@ class OuterProxy: """ return [] + + @property + @lru_cache() + def all_collections(self): + r = [] + + for key in self._default_factories: + val = self._inner.__getattribute__(key) + if hasattr(val, "__is_collection__"): + r.append(val) + + return r + + def __repr__(self): + return f"{type(self).__name__}({', '.join(key + ': ' + str(val) for key, val in self.indexing_values)})" diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 348c70c..2d54aa3 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -148,6 +148,7 @@ class Song(Base): return main_artists return f"{main_artists} feat. {feature_artists}" + """ def __str__(self) -> str: artist_credit_str = "" artist_credits = self.get_artist_credits() @@ -155,6 +156,7 @@ class Song(Base): artist_credit_str = f" by {artist_credits}" return f"\"{self.title}\"{artist_credit_str}" + """ def __repr__(self) -> str: return f"Song(\"{self.title}\")" @@ -519,6 +521,7 @@ class Artist(Base): return metadata + """ def __str__(self, include_notes: bool = False): string = self.name or "" if include_notes: @@ -526,6 +529,7 @@ class Artist(Base): if plaintext_notes is not None: string += "\n" + plaintext_notes return string + """ def __repr__(self): return f"Artist(\"{self.name}\")" diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 2ba269a..fff1bcc 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -33,11 +33,14 @@ class Source(OuterProxy): "audio_url": str, } - def __init__(self, page_enum: SourcePages, referer_page: SourcePages = None, **kwargs) -> None: + def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None, **kwargs) -> None: if referer_page is None: referer_page = page_enum - super().__init__(page_enum=page_enum, referer_page=referer_page, **kwargs) + if audio_url is None: + audio_url = url + + super().__init__(page_enum=page_enum, url=url, referer_page=referer_page, audio_url=audio_url, **kwargs) @classmethod def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]: From 695c9f62b9f7e84c9adb1b20206f1b12900e6be9 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 20 Dec 2023 12:31:53 +0100 Subject: [PATCH 039/104] fix: fixed uglyyy typo --- src/create_custom_objects.py | 2 - src/music_kraken/objects/collection.py | 59 ++++++++++--------- .../pages/youtube_music/_list_render.py | 29 +++++---- .../pages/youtube_music/youtube_music.py | 2 +- 4 files changed, 51 insertions(+), 41 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index de950bb..d8360d3 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -130,7 +130,6 @@ for _id, _object in objects_by_id.items(): print(only_smile) -""" c = Collection([Song(title="hi"), Song(title="hi2"), Song(title="hi3")]) c1 = Collection([Song(title="he"), Song(title="hi5")]) c11 = Collection([Song(title="wow how ultra subby", isrc="hiii")]) @@ -179,4 +178,3 @@ print("b: ", b) print(c.data) print(c._data) -""" diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index ab7642b..519e4cc 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -4,6 +4,7 @@ from collections import defaultdict from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator from .parents import OuterProxy + T = TypeVar('T', bound=OuterProxy) @@ -34,7 +35,6 @@ class Collection(Generic[T]): # List of collection attributes that should be modified on append # Key: collection attribute (str) of appended element # Value: main collection to sync to - self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} @@ -60,11 +60,11 @@ class Collection(Generic[T]): for attribute, new_object in self.contain_given_in_attribute.items(): __object.__getattribute__(attribute).contain_collection_inside(new_object) - for attribute, new_object in self.contain_given_in_attribute.items(): + for attribute, new_object in self.contain_attribute_in_given.items(): new_object.contain_collection_inside(__object.__getattribute__(attribute)) for attribute, new_object in self.append_object_to_attribute.items(): - __object.__getattribute__(attribute).append(new_object, from_map=True) + __object.__getattribute__(attribute).append(new_object) def _unmap_element(self, __object: T): self._contains_ids.remove(__object.id) @@ -94,6 +94,29 @@ class Collection(Generic[T]): return True return False + def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List[Collection]: + """ + Gets the collection this object is found in, if it is found in any. + + :param __object: + :param break_at_first: + :return: + """ + results = [] + + if self._contained_in_self(__object): + return [self] + + print(len(self.children), id(self), ";".join(str(id(i)) for i in self.children)) + print() + for collection in self.children: + results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) + + if break_at_first: + return results + + return results + def _get_root_collections(self) -> List[Collection]: if not len(self.parents): return [self] @@ -107,19 +130,6 @@ class Collection(Generic[T]): def _is_root(self) -> bool: return len(self.parents) <= 0 - def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List[Collection]: - results = [] - - if self._contained_in_self(__object): - return [self] - - for collection in self.children: - results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) - if break_at_first: - return results - - return results - def _get_parents_of_multiple_contained_children(self, __object: T): results = [] if len(self.children) < 2 or self._contained_in_self(__object): @@ -153,6 +163,7 @@ class Collection(Generic[T]): for name, value in __object.indexing_values: if value is None: continue + if value in self._indexed_values[name]: existing_object = self._indexed_to_objects[value][0] if existing_object.id == __object.id: @@ -175,18 +186,16 @@ class Collection(Generic[T]): return len(self._contained_in_sub(__object)) > 0 def _append(self, __object: T, from_map: bool = False): - for attribute, to_sync_with in self.sync_on_append.items(): - to_sync_with.sync_with_other_collection(__object.__getattribute__(attribute)) - self._map_element(__object, from_map=from_map) self._data.append(__object) def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): + print(__object) if __object is None or __object.id in self._contains_ids: return exists_in_collection = self._contained_in_sub(__object) - if len(exists_in_collection) > 0 and self is exists_in_collection[0]: + if len(exists_in_collection) and self is exists_in_collection[0]: # assuming that the object already is contained in the correct collections if not already_is_parent: self.merge_into_self(__object, from_map=from_map) @@ -230,15 +239,11 @@ class Collection(Generic[T]): for equal_sub_collection in equal_collection.children: self.contain_collection_inside(equal_sub_collection) - # now the ugly part - # replace all refs of the other element with this one - self = self._risky_merge(equal_collection) - - def contain_collection_inside(self, sub_collection: "Collection"): + def contain_collection_inside(self, sub_collection: Collection): """ This collection will ALWAYS contain everything from the passed in collection """ - if sub_collection in self.children: + if self is sub_collection or sub_collection in self.children: return self.children.append(sub_collection) @@ -257,5 +262,5 @@ class Collection(Generic[T]): return self.__len__() == 0 def __iter__(self) -> Iterator[T]: - for element in self.data: + for element in self._data: yield element diff --git a/src/music_kraken/pages/youtube_music/_list_render.py b/src/music_kraken/pages/youtube_music/_list_render.py index 1acecee..8076e54 100644 --- a/src/music_kraken/pages/youtube_music/_list_render.py +++ b/src/music_kraken/pages/youtube_music/_list_render.py @@ -15,7 +15,6 @@ from ...objects import ( ) from ._music_object_render import parse_run_list, parse_run_element - LOGGER = logging_settings["youtube_music_logger"] @@ -23,20 +22,24 @@ def music_card_shelf_renderer(renderer: dict) -> List[DatabaseObject]: results = parse_run_list(renderer.get("title", {}).get("runs", [])) for sub_renderer in renderer.get("contents", []): - results.extend(parse_renderer(sub_renderer)) + results.extend(parse_renderer(sub_renderer)) return results + def music_responsive_list_item_flex_column_renderer(renderer: dict) -> List[DatabaseObject]: return parse_run_list(renderer.get("text", {}).get("runs", [])) + def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: results = [] - for i, collumn in enumerate(renderer.get("flexColumns", [])): - _r = parse_renderer(collumn) + for i, column in enumerate(renderer.get("flexColumns", [])): + _r = parse_renderer(column) if i == 0 and len(_r) == 0: - renderer["text"] = collumn.get("musicResponsiveListItemFlexColumnRenderer", {}).get("text", {}).get("runs", [{}])[0].get("text") - + renderer["text"] = \ + column.get("musicResponsiveListItemFlexColumnRenderer", {}).get("text", {}).get("runs", [{}])[0].get( + "text") + results.extend(_r) _r = parse_run_element(renderer) @@ -54,7 +57,7 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: for song in song_list: song.album_collection.extend(album_list) song.main_artist_collection.extend(artist_list) - + for album in album_list: album.artist_collection.extend(artist_list) @@ -64,19 +67,22 @@ def music_responsive_list_item_renderer(renderer: dict) -> List[DatabaseObject]: return album_list if len(artist_list) > 0: return artist_list - + return results + def music_shelf_renderer(renderer: dict) -> List[DatabaseObject]: result = [] for subrenderer in renderer.get("contents"): result.extend(parse_renderer(subrenderer)) - + return result + def music_carousel_shelf_renderer(renderer: dict): return music_shelf_renderer(renderer=renderer) + def music_two_row_item_renderer(renderer: dict): return parse_run_list(renderer.get("title", {}).get("runs", [])) @@ -92,6 +98,7 @@ RENDERER_PARSERS = { "itemSectionRenderer": lambda _: [], } + def parse_renderer(renderer: dict) -> List[DatabaseObject]: result: List[DatabaseObject] = [] @@ -99,7 +106,7 @@ def parse_renderer(renderer: dict) -> List[DatabaseObject]: if renderer_name not in RENDERER_PARSERS: LOGGER.warning(f"Can't parse the renderer {renderer_name}.") continue - + result.extend(RENDERER_PARSERS[renderer_name](renderer)) - return result \ No newline at end of file + return result diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 7eb139d..3ed65b3 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -261,7 +261,7 @@ class YoutubeMusic(SuperYouTube): results = [] """ - cant use fixed indices, because if something has no entries, the list dissappears + cant use fixed indices, because if something has no entries, the list disappears instead I have to try parse everything, and just reject community playlists and profiles. """ From aa139a7f7333a7576ee3012df11f92f7ee827c6e Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Fri, 29 Dec 2023 15:43:33 +0100 Subject: [PATCH 040/104] fixed the mapping in source maps --- .idea/vcs.xml | 2 ++ src/actual_donwload.py | 3 +-- src/music_kraken/download/page_attributes.py | 1 + src/music_kraken/download/results.py | 3 --- src/music_kraken/objects/__init__.py | 4 ++- src/music_kraken/objects/parents.py | 6 ++++- src/music_kraken/objects/source.py | 24 +++++------------ src/music_kraken/pages/abstract.py | 28 +------------------- 8 files changed, 19 insertions(+), 52 deletions(-) diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 35eb1dd..0823e82 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,5 +2,7 @@ + + \ No newline at end of file diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 5070293..293fe33 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -43,8 +43,7 @@ if __name__ == "__main__": bandcamp_test = [ "s: #a Ghost Bath", - "3", - "d: 0" + "0" ] diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 8409660..c655d11 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -41,6 +41,7 @@ if DEBUG_PAGES: ALL_PAGES = {DEBUGGING_PAGE} AUDIO_PAGES = ALL_PAGES.union(AUDIO_PAGES) + class Pages: def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False) -> None: # initialize all page instances diff --git a/src/music_kraken/download/results.py b/src/music_kraken/download/results.py index 46911b1..bbe63bb 100644 --- a/src/music_kraken/download/results.py +++ b/src/music_kraken/download/results.py @@ -34,9 +34,6 @@ class Results: for index, music_object in self._by_index.items(): if index == exclude_index: continue - - music_object.strip_details() - class SearchResults(Results): def __init__( diff --git a/src/music_kraken/objects/__init__.py b/src/music_kraken/objects/__init__.py index 5596a2c..b4e75cb 100644 --- a/src/music_kraken/objects/__init__.py +++ b/src/music_kraken/objects/__init__.py @@ -20,4 +20,6 @@ from .collection import Collection from .country import Country from .contact import Contact -from .parents import OuterProxy as DatabaseObject +from .parents import OuterProxy + +DatabaseObject = TypeVar('T', bound=OuterProxy) diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 2619d33..42172f3 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -136,7 +136,7 @@ class OuterProxy: def __eq__(self, other: Any): return self.__hash__() == other.__hash__() - def merge(self, __other: OuterProxy, override: bool = False): + def merge(self, __other: Optional[OuterProxy], override: bool = False): """ 1. merges the data of __other in self 2. replaces the data of __other with the data of self @@ -145,6 +145,10 @@ class OuterProxy: :param override: :return: """ + if __other is None: + _ = "debug" + return + self._inner.__merge__(__other._inner, override=override) __other._inner = self._inner diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index fff1bcc..0bc5694 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -1,6 +1,6 @@ from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional +from typing import List, Dict, Set, Tuple, Optional, Iterable from urllib.parse import urlparse from ..utils.enums.source import SourcePages, SourceTypes @@ -12,14 +12,6 @@ from .collection import Collection class Source(OuterProxy): - """ - create somehow like that - ```python - # url won't be a valid one due to it being just an example - Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd") - ``` - """ - page_enum: SourcePages referer_page: SourcePages @@ -37,9 +29,6 @@ class Source(OuterProxy): if referer_page is None: referer_page = page_enum - if audio_url is None: - audio_url = url - super().__init__(page_enum=page_enum, url=url, referer_page=referer_page, audio_url=audio_url, **kwargs) @classmethod @@ -120,16 +109,15 @@ class Source(OuterProxy): class SourceCollection(Collection): - def __init__(self, source_list: List[Source] = None): - source_list = source_list if source_list is not None else [] + def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list) - super().__init__(data=source_list) + super().__init__(data=data, **kwargs) - def map_element(self, source: Source): - super().map_element(source) + def _map_element(self, __object: Source, **kwargs): + super()._map_element(__object, **kwargs) - self._page_to_source_list[source.page_enum].append(source) + self._page_to_source_list[__object.page_enum].append(__object) @property def source_pages(self) -> Set[SourcePages]: diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index bffc15c..2a42bde 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -174,34 +174,8 @@ class Page: # set this to true, if all song details can also be fetched by fetching album details NO_ADDITIONAL_DATA_FROM_SONG = False - - - def __init__(self): - super().__init__() - - """ - CODE I NEED WHEN I START WITH MULTITHREADING - - def __init__(self, end_event: EndThread, search_queue: Queue, search_result_queue: Queue): - self.end_event = end_event - - self.search_queue = search_queue - self.search_result_queue = search_result_queue - - super().__init__() - - @property - def _empty_working_queues(self): - return self.search_queue.empty() - def run(self) -> None: - while bool(self.end_event) and self._empty_working_queues: - if not self.search_queue.empty(): - self.search(self.search_queue.get()) - self.search_result_queue.put(FinishedSearch()) - continue - """ - + def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return None From 4cbbee26e46216e70e31a83ba9feb88e1c4618b1 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Fri, 29 Dec 2023 16:15:54 +0100 Subject: [PATCH 041/104] fixed the merging of collections --- src/actual_donwload.py | 3 ++- src/music_kraken/cli/main_downloader.py | 7 +++---- src/music_kraken/download/results.py | 6 +----- src/music_kraken/objects/collection.py | 7 ++++--- src/music_kraken/objects/parents.py | 2 +- src/music_kraken/objects/song.py | 6 ++++++ 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 293fe33..1654f67 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -47,4 +47,5 @@ if __name__ == "__main__": ] - music_kraken.cli.download(genre="test", command_list=bandcamp_test, process_metadata_anyway=True) \ No newline at end of file + music_kraken.cli.download(genre="test", command_list=bandcamp_test, process_metadata_anyway=True) + _ = "debug" \ No newline at end of file diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index 4c5ced2..3958150 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -295,9 +295,6 @@ class Downloader: page: Type[Page] music_object: DatabaseObject - if self.current_results is not None: - self.current_results.delete_details(index) - try: page, music_object = self.current_results.get_music_object_by_index(index) except KeyError: @@ -307,7 +304,9 @@ class Downloader: return self.pages.fetch_details(music_object) - + + print(music_object) + print(music_object.options) self.set_current_options(PageResults(page, music_object.options)) self.print_current_options() diff --git a/src/music_kraken/download/results.py b/src/music_kraken/download/results.py index bbe63bb..c0dff08 100644 --- a/src/music_kraken/download/results.py +++ b/src/music_kraken/download/results.py @@ -29,11 +29,7 @@ class Results: def get_music_object_by_index(self, index: int) -> Tuple[Type[Page], DatabaseObject]: # if this throws a key error, either the formatted generator needs to be iterated, or the option doesn't exist. return self._page_by_index[index], self._by_index[index] - - def delete_details(self, exclude_index: int): - for index, music_object in self._by_index.items(): - if index == exclude_index: - continue + class SearchResults(Results): def __init__( diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 519e4cc..9863989 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -107,8 +107,6 @@ class Collection(Generic[T]): if self._contained_in_self(__object): return [self] - print(len(self.children), id(self), ";".join(str(id(i)) for i in self.children)) - print() for collection in self.children: results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) @@ -186,11 +184,11 @@ class Collection(Generic[T]): return len(self._contained_in_sub(__object)) > 0 def _append(self, __object: T, from_map: bool = False): + # print(self, __object) self._map_element(__object, from_map=from_map) self._data.append(__object) def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): - print(__object) if __object is None or __object.id in self._contains_ids: return @@ -264,3 +262,6 @@ class Collection(Generic[T]): def __iter__(self) -> Iterator[T]: for element in self._data: yield element + + def __merge__(self, __other: Collection, override: bool = False): + self.extend(__other.shallow_list) \ No newline at end of file diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 42172f3..572ad9a 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -47,7 +47,7 @@ class InnerData: # if the object of value implemented __merge__, it merges existing = self.__getattribute__(key) if hasattr(type(existing), "__merge__"): - existing.merge_into_self(value, override) + existing.__merge__(value, override) continue # override the existing value if requested diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 2d54aa3..3dfc982 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -446,6 +446,12 @@ class Artist(Base): "current_artist_collection": self } + @property + def options(self) -> List[P]: + options = [self, *self.main_album_collection.shallow_list, *self.feature_album] + print(options) + return options + def update_albumsort(self): """ This updates the albumsort attributes, of the albums in From 2dff8e4e0efd4820371804e9fcb05fae7374cc21 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Fri, 29 Dec 2023 20:18:34 +0100 Subject: [PATCH 042/104] fix: creation of lyrics --- src/music_kraken/objects/lyrics.py | 22 +++++----------------- src/music_kraken/objects/song.py | 4 +--- src/music_kraken/pages/bandcamp.py | 4 +--- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index e070bbf..1d0c0c9 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -18,23 +18,11 @@ class Lyrics(OuterProxy): text: FormattedText language: Language + source_collection: SourceCollection + _default_factories = { "text": FormattedText, - "language": Language.by_alpha_2("en"), + "language": lambda: Language.by_alpha_2("en"), + + "source_collection": SourceCollection, } - - def __init__( - self, - text: FormattedText, - language: pycountry.Languages = pycountry.languages.get(alpha_2="en"), - _id: str = None, - dynamic: bool = False, - source_list: List[Source] = None, - **kwargs - ) -> None: - super().__init__(_id=_id, dynamic=dynamic, **kwargs) - - self.text: FormattedText = text or FormattedText() - self.language: pycountry.Languages = language - - self.source_collection: SourceCollection = SourceCollection(source_list) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 3dfc982..87db238 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -290,9 +290,7 @@ class Album(Base): @property def options(self) -> List[P]: - options = self.artist_collection.shallow_list - options.append(self) - options.extend(self.song_collection) + options = [*self.artist_collection, self, *self.song_collection] return options diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 4cfb706..c0d1bb3 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -306,9 +306,7 @@ class Bandcamp(Page): track_lyrics = soup.find("div", {"class": "lyricsText"}) if track_lyrics: self.LOGGER.debug(" Lyrics retrieved..") - return [Lyrics(FormattedText( - html=track_lyrics.prettify() - ), pycountry.languages.get(alpha_2="en"))] + return [Lyrics(text=FormattedText(html=track_lyrics.prettify()))] return [] From 66539e6614d83831e2a7b01871783bb4687cb0c4 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Fri, 29 Dec 2023 21:16:09 +0100 Subject: [PATCH 043/104] feat: added annotations for init --- src/create_custom_objects.py | 3 +- .../objects/lint_default_factories.py | 32 ++++- src/music_kraken/objects/lyrics.py | 11 +- src/music_kraken/objects/song.py | 126 +++++++----------- src/music_kraken/objects/source.py | 15 ++- src/music_kraken/objects/target.py | 42 +++--- src/settings.py | 16 ++- 7 files changed, 122 insertions(+), 123 deletions(-) diff --git a/src/create_custom_objects.py b/src/create_custom_objects.py index d8360d3..d273986 100644 --- a/src/create_custom_objects.py +++ b/src/create_custom_objects.py @@ -31,7 +31,7 @@ print(other_song.__dict__) print(song) print(type(song).__dict__["__annotations__"]) """ - +""" only_smile = Artist( name="Only Smile", source_list=[Source(SourcePages.BANDCAMP, "https://onlysmile.bandcamp.com/")], @@ -178,3 +178,4 @@ print("b: ", b) print(c.data) print(c._data) +""" \ No newline at end of file diff --git a/src/music_kraken/objects/lint_default_factories.py b/src/music_kraken/objects/lint_default_factories.py index 9afad36..a39f407 100644 --- a/src/music_kraken/objects/lint_default_factories.py +++ b/src/music_kraken/objects/lint_default_factories.py @@ -17,11 +17,13 @@ def print_lint_res(missing_values: dict): print(f'\t"{key}": {value},') print("}") +# def __init__(self, foo: str, bar) -> None: ... def lint_type(cls: T): + all_values: dict = {} missing_values: dict = {} - for key, value in cls.__dict__["__annotations__"].items(): + for key, value in cls.__annotations__.items(): if value is None: continue @@ -58,6 +60,34 @@ def lint_type(cls: T): else: print(f"Everything is fine at {cls.__name__}") + p = [] + s = [] + for key, value in cls.__annotations__.items(): + has_default = key in cls._default_factories + + if not isinstance(value, str): + value = value.__name__ + + if key.endswith("_collection"): + key = key.replace("_collection", "_list") + + if isinstance(value, str): + if value.startswith("Collection[") and value.endswith("]"): + value = value.replace("Collection", "List") + + if isinstance(value, str) and has_default: + value = value + " = None" + + p.append(f'{key}: {value}') + s.append(f'{key}={key}') + p.append("**kwargs") + s.append("**kwargs") + + print("# This is automatically generated") + print(f"def __init__(self, {', '.join(p)}) -> None:") + print(f"\tsuper().__init__({', '.join(s)})") + print() + def lint(): for i in ALL_CLASSES: diff --git a/src/music_kraken/objects/lyrics.py b/src/music_kraken/objects/lyrics.py index 1d0c0c9..0ca37f0 100644 --- a/src/music_kraken/objects/lyrics.py +++ b/src/music_kraken/objects/lyrics.py @@ -9,12 +9,6 @@ from .country import Language class Lyrics(OuterProxy): - COLLECTION_STRING_ATTRIBUTES = ("source_collection",) - SIMPLE_STRING_ATTRIBUTES = { - "text": FormattedText(), - "language": None - } - text: FormattedText language: Language @@ -26,3 +20,8 @@ class Lyrics(OuterProxy): "source_collection": SourceCollection, } + + # This is automatically generated + def __init__(self, text: FormattedText = None, language: Language = None, source_list: SourceCollection = None, + **kwargs) -> None: + super().__init__(text=text, language=language, source_list=source_list, **kwargs) diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 87db238..b8772b0 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -61,43 +61,24 @@ class Song(Base): "album_collection": Collection, "feature_artist_collection": Collection, - "title": lambda: None, "unified_title": lambda: None, "isrc": lambda: None, "genre": lambda: None, } - """ - COLLECTION_STRING_ATTRIBUTES = ( - "lyrics_collection", "album_collection", "main_artist_collection", "feature_artist_collection", - "source_collection") - SIMPLE_STRING_ATTRIBUTES = { - "title": None, - "unified_title": None, - "isrc": None, - "length": None, - "tracksort": 0, - "genre": None, - "notes": FormattedText() - } - """ + def __init__(self, title: str, unified_title: str = None, isrc: str = None, length: int = None, genre: str = None, + note: FormattedText = None, source_list: SourceCollection = None, target_list: List[Target] = None, + lyrics_list: List[Lyrics] = None, main_artist_list: List[Artist] = None, + feature_artist_list: List[Artist] = None, album_list: List[Album] = None, **kwargs) -> None: + super().__init__(title=title, unified_title=unified_title, isrc=isrc, length=length, genre=genre, note=note, + source_list=source_list, target_list=target_list, lyrics_list=lyrics_list, + main_artist_list=main_artist_list, feature_artist_list=feature_artist_list, + album_list=album_list, **kwargs) + + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") - """ - title: str = None, - unified_title: str = None, - isrc: str = None, - length: int = None, - tracksort: int = None, - genre: str = None, - source_list: List[Source] = None, - target_list: List[Target] = None, - lyrics_list: List[Lyrics] = None, - album_list: List['Album'] = None, - main_artist_list: List['Artist'] = None, - feature_artist_list: List['Artist'] = None, - notes: FormattedText = None, - """ + def __init_collections__(self) -> None: self.album_collection.contain_given_in_attribute = { "artist_collection": self.main_artist_collection, @@ -196,19 +177,6 @@ All objects dependent on Album class Album(Base): - COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection", "song_collection") - SIMPLE_STRING_ATTRIBUTES = { - "title": None, - "unified_title": None, - "album_status": None, - "album_type": AlbumType.OTHER, - "language": None, - "date": ID3Timestamp(), - "barcode": None, - "albumsort": None, - "notes": FormattedText(), - } - title: str unified_title: str album_status: AlbumStatus @@ -225,6 +193,11 @@ class Album(Base): label_collection: Collection[Label] _default_factories = { + "unified_title": lambda: None, + "album_status": lambda: None, + "barcode": lambda: None, + "albumsort": lambda: None, + "album_type": lambda: AlbumType.OTHER, "language": lambda: Language.by_alpha_2("en"), "date": ID3Timestamp, @@ -235,13 +208,19 @@ class Album(Base): "song_collection": Collection, "label_collection": Collection, - "title": lambda: None, - "unified_title": lambda: None, - "album_status": lambda: None, - "barcode": lambda: None, - "albumsort": lambda: None, } + # This is automatically generated + def __init__(self, title: str, unified_title: str = None, album_status: AlbumStatus = None, + album_type: AlbumType = None, language: Language = None, date: ID3Timestamp = None, + barcode: str = None, albumsort: int = None, notes: FormattedText = None, + source_list: SourceCollection = None, artist_list: List[Artist] = None, song_list: List[Song] = None, + label_list: List[Label] = None, **kwargs) -> None: + super().__init__(title=title, unified_title=unified_title, album_status=album_status, album_type=album_type, + language=language, date=date, barcode=barcode, albumsort=albumsort, notes=notes, + source_list=source_list, artist_list=artist_list, song_list=song_list, label_list=label_list, + **kwargs) + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") @@ -411,6 +390,10 @@ class Artist(Base): label_collection: Collection[Label] _default_factories = { + "unified_name": lambda: None, + "country": lambda: None, + "unformated_location": lambda: None, + "formed_in": ID3Timestamp, "notes": FormattedText, "lyrical_themes": list, @@ -421,13 +404,20 @@ class Artist(Base): "main_album_collection": Collection, "contact_collection": Collection, "label_collection": Collection, - - "name": lambda: None, - "unified_name": lambda: None, - "country": lambda: None, - "unformated_location": lambda: None, } + # This is automatically generated + def __init__(self, name: str, unified_name: str = None, country: Country = None, formed_in: ID3Timestamp = None, + notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, + unformated_location: str = None, source_list: SourceCollection = None, + contact_list: List[Contact] = None, feature_song_list: List[Song] = None, + main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: + super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes, + lyrical_themes=lyrical_themes, general_genre=general_genre, + unformated_location=unformated_location, source_list=source_list, contact_list=contact_list, + feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list, + **kwargs) + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", ) @@ -609,33 +599,15 @@ class Label(Base): "source_collection": SourceCollection, "contact_collection": Collection, - "name": lambda: None, "unified_name": lambda: None, } - def __init__( - self, - _id: int = None, - dynamic: bool = False, - name: str = None, - unified_name: str = None, - notes: FormattedText = None, - album_list: List[Album] = None, - current_artist_list: List[Artist] = None, - source_list: List[Source] = None, - **kwargs - ): - Base.__init__(self, _id=_id, dynamic=dynamic, **kwargs) - - self.name: str = name - self.unified_name: str = unified_name - if unified_name is None and name is not None: - self.unified_name = unify(name) - self.notes = notes or FormattedText() - - self.source_collection: SourceCollection = SourceCollection(source_list) - self.album_collection: Collection[Album] = Collection(data=album_list, element_type=Album) - self.current_artist_collection: Collection[Artist] = Collection(data=current_artist_list, element_type=Artist) + def __init__(self, name: str, unified_name: str = None, notes: FormattedText = None, + source_list: SourceCollection = None, contact_list: List[Contact] = None, + album_list: List[Album] = None, current_artist_list: List[Artist] = None, **kwargs) -> None: + super().__init__(name=name, unified_name=unified_name, notes=notes, source_list=source_list, + contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list, + **kwargs) @property def indexing_values(self) -> List[Tuple[str, object]]: diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 0bc5694..1070bd5 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -12,24 +12,25 @@ from .collection import Collection class Source(OuterProxy): + url: str + page_enum: SourcePages referer_page: SourcePages - url: str audio_url: str _default_factories = { - "page_enum": lambda: None, - "referer_page": lambda: None, - "url": str, - "audio_url": str, + "audio_url": lambda: None, } - def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None, **kwargs) -> None: + # This is automatically generated + def __init__(self, url: str, page_enum: SourcePages, referer_page: SourcePages = None, audio_url: str = None, + **kwargs) -> None: + if referer_page is None: referer_page = page_enum - super().__init__(page_enum=page_enum, url=url, referer_page=referer_page, audio_url=audio_url, **kwargs) + super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs) @classmethod def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]: diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index 0d8bb25..c6a5b0e 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -1,5 +1,7 @@ +from __future__ import annotations + from pathlib import Path -from typing import List, Tuple, TextIO +from typing import List, Tuple, TextIO, Union import logging import requests @@ -22,40 +24,26 @@ class Target(OuterProxy): ``` """ - file: str - path: str + file_path: Path _default_factories = { - "file": str, - "path": str, } - SIMPLE_STRING_ATTRIBUTES = { - "_file": None, - "_path": None - } - COLLECTION_STRING_ATTRIBUTES = tuple() + # This is automatically generated + def __init__(self, file_path: Union[Path, str], relative_to_music_dir: bool = False, **kwargs) -> None: + if not isinstance(file_path, Path): + file_path = Path(file_path) - def __init__( - self, - file: str = None, - path: str = None, - dynamic: bool = False, - relative_to_music_dir: bool = False - ) -> None: - super().__init__(dynamic=dynamic) - self._file: Path = Path(fit_to_file_system(file)) - self._path: Path = fit_to_file_system(Path(main_settings["music_directory"], path) if relative_to_music_dir else Path(path)) + if relative_to_music_dir: + file_path = Path(main_settings["music_directory"], file_path) + + super().__init__(file_path=fit_to_file_system(file_path), **kwargs) self.is_relative_to_music_dir: bool = relative_to_music_dir def __repr__(self) -> str: return str(self.file_path) - @property - def file_path(self) -> Path: - return Path(self._path, self._file) - @property def indexing_values(self) -> List[Tuple[str, object]]: return [('filepath', self.file_path)] @@ -67,8 +55,8 @@ class Target(OuterProxy): @property def size(self) -> int: """ - returns the size the downloaded autio takes up in bytes - returns 0 if the file doesn't exsit + returns the size the downloaded audio takes up in bytes + returns 0 if the file doesn't exist """ if not self.exists: return 0 @@ -78,7 +66,7 @@ class Target(OuterProxy): def create_path(self): self._path.mkdir(parents=True, exist_ok=True) - def copy_content(self, copy_to: "Target"): + def copy_content(self, copy_to: Target): if not self.exists: LOGGER.warning(f"No file exists at: {self.file_path}") return diff --git a/src/settings.py b/src/settings.py index 6a14b84..7a2f323 100644 --- a/src/settings.py +++ b/src/settings.py @@ -1,6 +1,14 @@ -from pathlib import Path -import tomllib -data = tomllib.load(Path("/home/lars/music-kraken.conf").open("r")) -print(data) \ No newline at end of file +class Foo: + class_attr: str + class_attr_two: str + + def __init__(self, foo: str, bar) -> None: ... + + + + +f = Foo("fdfasdf", ["fsd", "fsedf"]) + +print(f) From 60297d6faf4b63140d51587edd59026c64421c71 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Fri, 29 Dec 2023 21:50:40 +0100 Subject: [PATCH 044/104] fix: fixed previous introduced bugs --- src/music_kraken/objects/collection.py | 3 ++- src/music_kraken/objects/parents.py | 25 +++++++++++++++---------- src/music_kraken/objects/song.py | 26 ++++++++++++++++---------- src/music_kraken/objects/source.py | 2 +- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 9863989..7bf4b6c 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -184,7 +184,7 @@ class Collection(Generic[T]): return len(self._contained_in_sub(__object)) > 0 def _append(self, __object: T, from_map: bool = False): - # print(self, __object) + print(self, __object) self._map_element(__object, from_map=from_map) self._data.append(__object) @@ -264,4 +264,5 @@ class Collection(Generic[T]): yield element def __merge__(self, __other: Collection, override: bool = False): + print(__other) self.extend(__other.shallow_list) \ No newline at end of file diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 572ad9a..77630a9 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -38,7 +38,7 @@ class InnerData: :return: """ - for key, value in __other.__dict__.items(): + for key, value in __other.__dict__.copy().items(): # just set the other value if self doesn't already have it if key not in self.__dict__: self.__setattr__(key, value) @@ -78,20 +78,25 @@ class OuterProxy: kwargs["dynamic"] = dynamic for name, factory in type(self)._default_factories.items(): - if name not in kwargs: + if kwargs.get(name, None) is None: kwargs[name] = factory() + collection_data: Dict[str, list] = {} + for name, value in kwargs.copy().items(): + if isinstance(value, list) and name.endswith("_list"): + collection_name = name.replace("_list", "_collection") + collection_data[collection_name] = value + + del kwargs[name] + self._inner: InnerData = InnerData(**kwargs) self.__init_collections__() - for name, data_list in kwargs.items(): - if isinstance(data_list, list) and name.endswith("_list"): - collection_name = name.replace("_list", "_collection") + for name, data_list in collection_data.items(): + collection = self._inner.__getattribute__(name) + collection.extend(data_list) - collection = self._inner.__getattribute__(collection_name) - collection.extend(data_list) - - self._inner.__setattr__(collection_name, collection) + self._inner.__setattr__(name, collection) def __init_collections__(self): pass @@ -106,7 +111,7 @@ class OuterProxy: :return: """ - if __name.startswith("__"): + if __name.startswith("_"): return super().__getattribute__(__name) _inner: InnerData = super().__getattribute__("_inner") diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index b8772b0..d48dd1f 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -61,15 +61,18 @@ class Song(Base): "album_collection": Collection, "feature_artist_collection": Collection, + "title": lambda: None, "unified_title": lambda: None, "isrc": lambda: None, "genre": lambda: None, } - def __init__(self, title: str, unified_title: str = None, isrc: str = None, length: int = None, genre: str = None, - note: FormattedText = None, source_list: SourceCollection = None, target_list: List[Target] = None, - lyrics_list: List[Lyrics] = None, main_artist_list: List[Artist] = None, - feature_artist_list: List[Artist] = None, album_list: List[Album] = None, **kwargs) -> None: + def __init__(self, title: str = None, unified_title: str = None, isrc: str = None, length: int = None, + genre: str = None, note: FormattedText = None, source_list: SourceCollection = None, + target_list: List[Target] = None, lyrics_list: List[Lyrics] = None, + main_artist_list: List[Artist] = None, feature_artist_list: List[Artist] = None, + album_list: List[Album] = None, **kwargs) -> None: + super().__init__(title=title, unified_title=unified_title, isrc=isrc, length=length, genre=genre, note=note, source_list=source_list, target_list=target_list, lyrics_list=lyrics_list, main_artist_list=main_artist_list, feature_artist_list=feature_artist_list, @@ -193,6 +196,7 @@ class Album(Base): label_collection: Collection[Label] _default_factories = { + "title": lambda: None, "unified_title": lambda: None, "album_status": lambda: None, "barcode": lambda: None, @@ -211,7 +215,7 @@ class Album(Base): } # This is automatically generated - def __init__(self, title: str, unified_title: str = None, album_status: AlbumStatus = None, + def __init__(self, title: str = None, unified_title: str = None, album_status: AlbumStatus = None, album_type: AlbumType = None, language: Language = None, date: ID3Timestamp = None, barcode: str = None, albumsort: int = None, notes: FormattedText = None, source_list: SourceCollection = None, artist_list: List[Artist] = None, song_list: List[Song] = None, @@ -390,6 +394,7 @@ class Artist(Base): label_collection: Collection[Label] _default_factories = { + "name": lambda: None, "unified_name": lambda: None, "country": lambda: None, "unformated_location": lambda: None, @@ -407,11 +412,12 @@ class Artist(Base): } # This is automatically generated - def __init__(self, name: str, unified_name: str = None, country: Country = None, formed_in: ID3Timestamp = None, - notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, - unformated_location: str = None, source_list: SourceCollection = None, + def __init__(self, name: str = None, unified_name: str = None, country: Country = None, + formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None, + general_genre: str = None, unformated_location: str = None, source_list: SourceCollection = None, contact_list: List[Contact] = None, feature_song_list: List[Song] = None, main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: + super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes, lyrical_themes=lyrical_themes, general_genre=general_genre, unformated_location=unformated_location, source_list=source_list, contact_list=contact_list, @@ -598,11 +604,11 @@ class Label(Base): "current_artist_collection": Collection, "source_collection": SourceCollection, "contact_collection": Collection, - + "name": lambda: None, "unified_name": lambda: None, } - def __init__(self, name: str, unified_name: str = None, notes: FormattedText = None, + def __init__(self, name: str = None, unified_name: str = None, notes: FormattedText = None, source_list: SourceCollection = None, contact_list: List[Contact] = None, album_list: List[Album] = None, current_artist_list: List[Artist] = None, **kwargs) -> None: super().__init__(name=name, unified_name=unified_name, notes=notes, source_list=source_list, diff --git a/src/music_kraken/objects/source.py b/src/music_kraken/objects/source.py index 1070bd5..8f39cb0 100644 --- a/src/music_kraken/objects/source.py +++ b/src/music_kraken/objects/source.py @@ -24,7 +24,7 @@ class Source(OuterProxy): } # This is automatically generated - def __init__(self, url: str, page_enum: SourcePages, referer_page: SourcePages = None, audio_url: str = None, + def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None, **kwargs) -> None: if referer_page is None: From f81720f01baf58b73afba945c714ba9bb69319b8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 15 Jan 2024 10:50:24 +0100 Subject: [PATCH 045/104] fix: recursion depth --- src/actual_donwload.py | 3 +- src/music_kraken/objects/collection.py | 79 +++++++++++++++++++++++--- src/music_kraken/objects/parents.py | 7 ++- src/music_kraken/objects/song.py | 2 +- 4 files changed, 78 insertions(+), 13 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 1654f67..b7a468c 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -43,7 +43,8 @@ if __name__ == "__main__": bandcamp_test = [ "s: #a Ghost Bath", - "0" + "0", + "d: 4" ] diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index 7bf4b6c..ee99692 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -1,10 +1,9 @@ from __future__ import annotations from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator +from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple from .parents import OuterProxy - T = TypeVar('T', bound=OuterProxy) @@ -47,6 +46,9 @@ class Collection(Generic[T]): self.extend(data) def _map_element(self, __object: T, from_map: bool = False): + if __object.id in self._contains_ids: + return + self._contains_ids.add(__object.id) for name, value in __object.indexing_values: @@ -67,7 +69,8 @@ class Collection(Generic[T]): __object.__getattribute__(attribute).append(new_object) def _unmap_element(self, __object: T): - self._contains_ids.remove(__object.id) + if __object.id in self._contains_ids: + self._contains_ids.remove(__object.id) for name, value in __object.indexing_values: if value is None: @@ -188,10 +191,53 @@ class Collection(Generic[T]): self._map_element(__object, from_map=from_map) self._data.append(__object) + def _find_object_in_self(self, __object: T) -> Optional[T]: + for name, value in __object.indexing_values: + if value is None: + continue + if value in self._indexed_values[name]: + return self._indexed_to_objects[value][0] + + def _find_object(self, __object: T) -> Tuple[Collection[T], Optional[T]]: + other_object = self._find_object_in_self(__object) + if other_object is not None: + return self, other_object + + for c in self.children: + o, other_object = c._find_object(__object) + if other_object is not None: + return o, other_object + + return self, None + def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): + """ + If an object, that represents the same entity exists in a relevant collection, + merge into this object. (and remap) + Else append to this collection. + + :param __object: + :param already_is_parent: + :param from_map: + :return: + """ + if __object is None or __object.id in self._contains_ids: return + append_to, existing_object = self._find_object(__object) + + if existing_object is None: + # append + append_to._data.append(__object) + else: + # merge + append_to._unmap_element(existing_object) + existing_object.merge(__object) + + append_to._map_element(__object, from_map=from_map) + + """ exists_in_collection = self._contained_in_sub(__object) if len(exists_in_collection) and self is exists_in_collection[0]: # assuming that the object already is contained in the correct collections @@ -202,20 +248,20 @@ class Collection(Generic[T]): if not len(exists_in_collection): self._append(__object, from_map=from_map) else: - pass exists_in_collection[0].merge_into_self(__object, from_map=from_map) if not already_is_parent or not self._is_root: for parent_collection in self._get_parents_of_multiple_contained_children(__object): pass parent_collection.append(__object, already_is_parent=True, from_map=from_map) + """ - def extend(self, __iterable: Optional[Iterable[T]]): + def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False): if __iterable is None: return for __object in __iterable: - self.append(__object) + self.append(__object, from_map=from_map) def sync_with_other_collection(self, equal_collection: Collection): """ @@ -263,6 +309,23 @@ class Collection(Generic[T]): for element in self._data: yield element + for c in self.children: + for element in c: + yield element + def __merge__(self, __other: Collection, override: bool = False): - print(__other) - self.extend(__other.shallow_list) \ No newline at end of file + self.extend(__other.shallow_list, from_map=True) + + def __getitem__(self, item: int): + if item < len(self._data): + return self._data[item] + + item = item - (len(self._data) - 1) + + for c in self.children: + if item < len(c): + return c[item] + + item = item - (len(self._data) - 1) + + raise IndexError diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index 77630a9..c7d8385 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -3,7 +3,7 @@ from __future__ import annotations import random from functools import lru_cache -from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar +from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set from .metadata import Metadata from ..utils.config import logging_settings @@ -61,6 +61,7 @@ class OuterProxy: """ _default_factories: dict = {} + _outer_attribute: Set[str] = {"options", "metadata", "indexing_values"} def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): _automatic_id: bool = False @@ -111,8 +112,8 @@ class OuterProxy: :return: """ - if __name.startswith("_"): - return super().__getattribute__(__name) + if __name.startswith("_") or __name in self._outer_attribute: + return object.__getattribute__(self, __name) _inner: InnerData = super().__getattribute__("_inner") try: diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index d48dd1f..c20b5b9 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -268,7 +268,7 @@ class Album(Base): @property def option_string(self) -> str: return f"{self.__repr__()} " \ - f"by Artist({OPTION_STRING_DELIMITER.join([artist.name for artist in self.artist_collection])}) " \ + f"by Artist({OPTION_STRING_DELIMITER.join([str(artist.name) for artist in self.artist_collection])}) " \ f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" @property From 99690068db91d3f47bcbb37fb7fd7fdb0f51f027 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 15 Jan 2024 10:56:59 +0100 Subject: [PATCH 046/104] fix: creation of target classes --- src/music_kraken/objects/parents.py | 5 ++++- src/music_kraken/objects/song.py | 15 +++++---------- src/music_kraken/pages/abstract.py | 14 ++++++++++---- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index c7d8385..f231bfd 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -63,6 +63,9 @@ class OuterProxy: _default_factories: dict = {} _outer_attribute: Set[str] = {"options", "metadata", "indexing_values"} + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() + UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() + def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): _automatic_id: bool = False @@ -112,7 +115,7 @@ class OuterProxy: :return: """ - if __name.startswith("_") or __name in self._outer_attribute: + if __name.startswith("_") or __name in self._outer_attribute or __name.isupper(): return object.__getattribute__(self, __name) _inner: InnerData = super().__getattribute__("_inner") diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index c20b5b9..a113c6f 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -78,8 +78,6 @@ class Song(Base): main_artist_list=main_artist_list, feature_artist_list=feature_artist_list, album_list=album_list, **kwargs) - - UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") def __init_collections__(self) -> None: @@ -152,8 +150,6 @@ class Song(Base): f"by Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.main_artist_collection)}) " \ f"feat. Artist({OPTION_STRING_DELIMITER.join(artist.name for artist in self.feature_artist_collection)})" - - @property def options(self) -> List[P]: options = self.main_artist_collection.shallow_list @@ -225,7 +221,7 @@ class Album(Base): source_list=source_list, artist_list=artist_list, song_list=song_list, label_list=label_list, **kwargs) - DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection", ) + DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") def __init_collections__(self): @@ -364,7 +360,7 @@ class Album(Base): :return: """ return len(self.artist_collection) > 1 - + @property def album_type_string(self) -> str: return self.album_type.value @@ -425,7 +421,7 @@ class Artist(Base): **kwargs) DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") - UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", ) + UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",) def __init_collections__(self): self.feature_song_collection.append_object_to_attribute = { @@ -435,7 +431,7 @@ class Artist(Base): self.main_album_collection.append_object_to_attribute = { "artist_collection": self } - + self.label_collection.append_object_to_attribute = { "current_artist_collection": self } @@ -585,7 +581,6 @@ Label class Label(Base): COLLECTION_STRING_ATTRIBUTES = ("album_collection", "current_artist_collection") - DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = COLLECTION_STRING_ATTRIBUTES name: str @@ -628,5 +623,5 @@ class Label(Base): options = [self] options.extend(self.current_artist_collection.shallow_list) options.extend(self.album_collection.shallow_list) - + return options diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 2a42bde..da15133 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -1,6 +1,7 @@ import logging import random from copy import copy +from pathlib import Path from typing import Optional, Union, Type, Dict, Set, List, Tuple from string import Formatter @@ -363,8 +364,10 @@ class Page: file_parts = Formatter().parse(main_settings["download_file"]) new_target = Target( relative_to_music_dir=True, - path=main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}), - file=main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts}) + file_path=Path( + main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}), + main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts}) + ) ) @@ -376,8 +379,11 @@ class Page: return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.") temp_target: Target = Target( - path=main_settings["temp_directory"], - file=str(random.randint(0, 999999)) + relative_to_music_dir=False, + file_path=Path( + main_settings["temp_directory"], + str(song.id) + ) ) r = DownloadResult(1) From 1a3f1648271af6e31dc762d5c1a951d1044257db Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 15 Jan 2024 11:40:48 +0100 Subject: [PATCH 047/104] fix: downloads --- src/music_kraken/audio/codec.py | 4 +-- src/music_kraken/objects/collection.py | 1 + src/music_kraken/objects/song.py | 8 ++--- src/music_kraken/objects/target.py | 2 +- src/music_kraken/pages/bandcamp.py | 48 ++++++++++++-------------- src/music_kraken/pages/musify.py | 2 +- 6 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/music_kraken/audio/codec.py b/src/music_kraken/audio/codec.py index afb3a0f..9ec70e6 100644 --- a/src/music_kraken/audio/codec.py +++ b/src/music_kraken/audio/codec.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import List, Tuple from tqdm import tqdm from ffmpeg_progress_yield import FfmpegProgress @@ -19,8 +20,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au bitrate_b = int(bitrate_kb / 1024) output_target = Target( - path=target._path, - file=str(target._file) + "." + audio_format + file_path=Path(str(target.file_path) + "." + audio_format) ) # get the select thingie diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index ee99692..a3b1bbf 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -229,6 +229,7 @@ class Collection(Generic[T]): if existing_object is None: # append + # print("appending", existing_object, __object) append_to._data.append(__object) else: # merge diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index a113c6f..120f109 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -68,7 +68,7 @@ class Song(Base): } def __init__(self, title: str = None, unified_title: str = None, isrc: str = None, length: int = None, - genre: str = None, note: FormattedText = None, source_list: SourceCollection = None, + genre: str = None, note: FormattedText = None, source_list: List[Source] = None, target_list: List[Target] = None, lyrics_list: List[Lyrics] = None, main_artist_list: List[Artist] = None, feature_artist_list: List[Artist] = None, album_list: List[Album] = None, **kwargs) -> None: @@ -214,7 +214,7 @@ class Album(Base): def __init__(self, title: str = None, unified_title: str = None, album_status: AlbumStatus = None, album_type: AlbumType = None, language: Language = None, date: ID3Timestamp = None, barcode: str = None, albumsort: int = None, notes: FormattedText = None, - source_list: SourceCollection = None, artist_list: List[Artist] = None, song_list: List[Song] = None, + source_list: List[Source] = None, artist_list: List[Artist] = None, song_list: List[Song] = None, label_list: List[Label] = None, **kwargs) -> None: super().__init__(title=title, unified_title=unified_title, album_status=album_status, album_type=album_type, language=language, date=date, barcode=barcode, albumsort=albumsort, notes=notes, @@ -410,7 +410,7 @@ class Artist(Base): # This is automatically generated def __init__(self, name: str = None, unified_name: str = None, country: Country = None, formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None, - general_genre: str = None, unformated_location: str = None, source_list: SourceCollection = None, + general_genre: str = None, unformated_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, feature_song_list: List[Song] = None, main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: @@ -604,7 +604,7 @@ class Label(Base): } def __init__(self, name: str = None, unified_name: str = None, notes: FormattedText = None, - source_list: SourceCollection = None, contact_list: List[Contact] = None, + source_list: List[Source] = None, contact_list: List[Contact] = None, album_list: List[Album] = None, current_artist_list: List[Artist] = None, **kwargs) -> None: super().__init__(name=name, unified_name=unified_name, notes=notes, source_list=source_list, contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list, diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index c6a5b0e..35afd4a 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -64,7 +64,7 @@ class Target(OuterProxy): return self.file_path.stat().st_size def create_path(self): - self._path.mkdir(parents=True, exist_ok=True) + self.file_path.parent.mkdir(parents=True, exist_ok=True) def copy_content(self, copy_to: Target): if not self.exists: diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index c0d1bb3..4a0d5da 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -24,6 +24,7 @@ from ..connection import Connection from ..utils.support_classes.download_result import DownloadResult from ..utils.config import main_settings, logging_settings from ..utils.shared import DEBUG + if DEBUG: from ..utils.debug_utils import dump_to_file @@ -38,7 +39,6 @@ def _get_host(source: Source) -> str: return urlunparse((parsed.scheme, parsed.netloc, "", "", "", "")) - class BandcampTypes(Enum): ARTIST = "b" ALBUM = "a" @@ -55,7 +55,7 @@ class Bandcamp(Page): host="https://bandcamp.com/", logger=self.LOGGER ) - + super().__init__(*args, **kwargs) def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: @@ -68,7 +68,7 @@ class Bandcamp(Page): return Album if path.startswith("track"): return Song - + return super().get_source_type(source) def _parse_autocomplete_api_result(self, data: dict) -> DatabaseObject: @@ -124,7 +124,7 @@ class Bandcamp(Page): ) ] ) - + def general_search(self, search_query: str, filter_string: str = "") -> List[DatabaseObject]: results = [] @@ -148,19 +148,18 @@ class Bandcamp(Page): results.append(r) return results - + def label_search(self, label: Label) -> List[Label]: return self.general_search(label.name, filter_string="b") - + def artist_search(self, artist: Artist) -> List[Artist]: return self.general_search(artist.name, filter_string="b") - + def album_search(self, album: Album) -> List[Album]: return self.general_search(album.title, filter_string="a") - + def song_search(self, song: Song) -> List[Song]: return self.general_search(song.title, filter_string="t") - def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() @@ -169,13 +168,13 @@ class Bandcamp(Page): name: str = None source_list: List[Source] = [] contact_list: List[Contact] = [] - + band_name_location: BeautifulSoup = soup.find("p", {"id": "band-name-location"}) if band_name_location is not None: title_span = band_name_location.find("span", {"class": "title"}) if title_span is not None: name = title_span.text.strip() - + link_container: BeautifulSoup = soup.find("ol", {"id": "band-links"}) if link_container is not None: li: BeautifulSoup @@ -189,7 +188,7 @@ class Bandcamp(Page): name=name, source_list=source_list ) - + def _parse_album(self, soup: BeautifulSoup, initial_source: Source) -> List[Album]: title = None source_list: List[Source] = [] @@ -197,7 +196,7 @@ class Bandcamp(Page): a = soup.find("a") if a is not None and a["href"] is not None: source_list.append(Source(self.SOURCE_TYPE, _get_host(initial_source) + a["href"])) - + title_p = soup.find("p", {"class": "title"}) if title_p is not None: title = title_p.text.strip() @@ -219,14 +218,13 @@ class Bandcamp(Page): return album_list - def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: artist = Artist() r = self.connection.get(_parse_artist_url(source.url)) if r is None: return artist - + soup = self.get_soup_from_response(r) if DEBUG: @@ -238,7 +236,7 @@ class Bandcamp(Page): if html_music_grid is not None: for subsoup in html_music_grid.find_all("li"): artist.main_album_collection.append(self._parse_album(soup=subsoup, initial_source=source)) - + for i, data_blob_soup in enumerate(soup.find_all("div", {"id": ["pagedata", "collectors-data"]})): data_blob = data_blob_soup["data-blob"] @@ -252,7 +250,7 @@ class Bandcamp(Page): artist.source_collection.append(source) return artist - + def _parse_track_element(self, track: dict) -> Optional[Song]: return Song( title=track["item"]["name"].strip(), @@ -266,11 +264,11 @@ class Bandcamp(Page): r = self.connection.get(source.url) if r is None: return album - + soup = self.get_soup_from_response(r) data_container = soup.find("script", {"type": "application/ld+json"}) - + if DEBUG: dump_to_file("album_data.json", data_container.text, is_json=True, exit_after_dump=False) @@ -279,7 +277,7 @@ class Bandcamp(Page): artist_source_list = [] if "@id" in artist_data: - artist_source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] + artist_source_list = [Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] album = Album( title=data["name"].strip(), source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]))], @@ -307,15 +305,14 @@ class Bandcamp(Page): if track_lyrics: self.LOGGER.debug(" Lyrics retrieved..") return [Lyrics(text=FormattedText(html=track_lyrics.prettify()))] - + return [] - def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: r = self.connection.get(source.url) if r is None: return Song() - + soup = self.get_soup_from_response(r) data_container = soup.find("script", {"type": "application/ld+json"}) @@ -340,7 +337,7 @@ class Bandcamp(Page): song = Song( title=data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), adio_url=mp3_url)], + source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], album_list=[Album( title=album_data["name"].strip(), date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), @@ -348,7 +345,7 @@ class Bandcamp(Page): )], main_artist_list=[Artist( name=artist_data["name"].strip(), - source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] + source_list=[Source(self.SOURCE_TYPE, _parse_artist_url(artist_data["@id"]))] )], lyrics_list=self._fetch_lyrics(soup=soup) ) @@ -359,5 +356,6 @@ class Bandcamp(Page): def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: if source.audio_url is None: + print(source) return DownloadResult(error_message="Couldn't find download link.") return self.connection.stream_into(url=source.audio_url, target=target, description=desc) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 44b61d9..371633a 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -628,7 +628,7 @@ class Musify(Page): source_list.append(Source( self.SOURCE_TYPE, url=current_url, - adio_url=self.HOST + download_href + audio_url=self.HOST + download_href )) return Song( From 564621b3325258e73442f4482e1b5e44e8c75d72 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 15 Jan 2024 11:52:31 +0100 Subject: [PATCH 048/104] fix: downloading into correct artist --- src/music_kraken/objects/parents.py | 2 +- src/music_kraken/objects/song.py | 10 +++++----- src/music_kraken/pages/abstract.py | 22 +++++++++++++++------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index f231bfd..e1d07f5 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -61,7 +61,7 @@ class OuterProxy: """ _default_factories: dict = {} - _outer_attribute: Set[str] = {"options", "metadata", "indexing_values"} + _outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"} DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 120f109..0b6b553 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -61,13 +61,13 @@ class Song(Base): "album_collection": Collection, "feature_artist_collection": Collection, - "title": lambda: None, + "title": lambda: "", "unified_title": lambda: None, "isrc": lambda: None, "genre": lambda: None, } - def __init__(self, title: str = None, unified_title: str = None, isrc: str = None, length: int = None, + def __init__(self, title: str = "", unified_title: str = None, isrc: str = None, length: int = None, genre: str = None, note: FormattedText = None, source_list: List[Source] = None, target_list: List[Target] = None, lyrics_list: List[Lyrics] = None, main_artist_list: List[Artist] = None, feature_artist_list: List[Artist] = None, @@ -264,7 +264,7 @@ class Album(Base): @property def option_string(self) -> str: return f"{self.__repr__()} " \ - f"by Artist({OPTION_STRING_DELIMITER.join([str(artist.name) for artist in self.artist_collection])}) " \ + f"by Artist({OPTION_STRING_DELIMITER.join([artist.name for artist in self.artist_collection])}) " \ f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" @property @@ -390,7 +390,7 @@ class Artist(Base): label_collection: Collection[Label] _default_factories = { - "name": lambda: None, + "name": str, "unified_name": lambda: None, "country": lambda: None, "unformated_location": lambda: None, @@ -408,7 +408,7 @@ class Artist(Base): } # This is automatically generated - def __init__(self, name: str = None, unified_name: str = None, country: Country = None, + def __init__(self, name: str = "", unified_name: str = None, country: Country = None, formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None, general_genre: str = None, unformated_location: str = None, source_list: List[Source] = None, contact_list: List[Contact] = None, feature_song_list: List[Song] = None, diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index da15133..7a8751c 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -240,20 +240,28 @@ class Page: """ # creating a new object, of the same type - new_music_object: DatabaseObject = type(music_object)() + new_music_object: Optional[DatabaseObject] = None # only certain database objects, have a source list if isinstance(music_object, INDEPENDENT_DB_OBJECTS): source: Source for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): - new_music_object.merge(self.fetch_object_from_source( - source=source, - enforce_type=type(music_object), - stop_at_level=stop_at_level, + tmp = self.fetch_object_from_source( + source=source, + enforce_type=type(music_object), + stop_at_level=stop_at_level, post_process=False - )) + ) - return music_object.merge(new_music_object) + if new_music_object is None: + new_music_object = tmp + else: + new_music_object.merge(tmp) + + if new_music_object is not None: + music_object.merge(new_music_object) + + return music_object def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: obj_type = self.get_source_type(source) From 6a8374d595d3b0dd4ae84fedb94977ad432b5703 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 15 Jan 2024 12:48:36 +0100 Subject: [PATCH 049/104] fix: fixed youtube music --- src/actual_donwload.py | 5 +- src/music_kraken/cli/main_downloader.py | 129 +++++++++--------- src/music_kraken/download/page_attributes.py | 6 +- src/music_kraken/objects/collection.py | 2 +- src/music_kraken/objects/parents.py | 13 ++ src/music_kraken/objects/song.py | 61 +++++++++ src/music_kraken/pages/abstract.py | 136 ++++++++++--------- 7 files changed, 216 insertions(+), 136 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index b7a468c..6ad3c34 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -42,9 +42,8 @@ if __name__ == "__main__": ] bandcamp_test = [ - "s: #a Ghost Bath", - "0", - "d: 4" + "s: #a Only Smile", + "d: 1", ] diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index 3958150..52d9847 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -16,7 +16,6 @@ from ..download.page_attributes import Pages from ..pages import Page from ..objects import Song, Album, Artist, DatabaseObject - """ This is the implementation of the Shell @@ -107,6 +106,7 @@ def get_existing_genre() -> List[str]: return existing_genres + def get_genre(): existing_genres = get_existing_genre() for i, genre_option in enumerate(existing_genres): @@ -129,19 +129,18 @@ def get_genre(): verification = input(f"create new genre \"{new_genre}\"? (Y/N): ").lower() if verification in agree_inputs: return new_genre - - + + def help_message(): print() print(main_settings["happy_messages"]) print() - class Downloader: def __init__( self, - exclude_pages: Set[Type[Page]] = None, + exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, max_displayed_options: int = 10, option_digits: int = 3, @@ -149,23 +148,22 @@ class Downloader: process_metadata_anyway: bool = False, ) -> None: self.pages: Pages = Pages(exclude_pages=exclude_pages, exclude_shady=exclude_shady) - + self.page_dict: Dict[str, Type[Page]] = dict() - + self.max_displayed_options = max_displayed_options self.option_digits: int = option_digits - + self.current_results: Results = None self._result_history: List[Results] = [] - + self.genre = genre or get_genre() self.process_metadata_anyway = process_metadata_anyway - + print() print(f"Downloading to: \"{self.genre}\"") print() - def print_current_options(self): self.page_dict = dict() @@ -176,12 +174,13 @@ class Downloader: if isinstance(option, Option): print(f"{option.index:0{self.option_digits}} {option.music_object.option_string}") else: - prefix = ALPHABET[page_count%len(ALPHABET)] - print(f"({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------") - + prefix = ALPHABET[page_count % len(ALPHABET)] + print( + f"({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------") + self.page_dict[prefix] = option self.page_dict[option.__name__] = option - + page_count += 1 print() @@ -189,47 +188,47 @@ class Downloader: def set_current_options(self, current_options: Results): if main_settings["result_history"]: self._result_history.append(current_options) - + if main_settings["history_length"] != -1: if len(self._result_history) > main_settings["history_length"]: self._result_history.pop(0) - + self.current_results = current_options - + def previous_option(self) -> bool: if not main_settings["result_history"]: print("History is turned of.\nGo to main_settings, and change the value at 'result_history' to 'true'.") return False - + if len(self._result_history) <= 1: print(f"No results in history.") return False self._result_history.pop() self.current_results = self._result_history[-1] return True - + def _process_parsed(self, key_text: Dict[str, str], query: str) -> Query: song = None if not "t" in key_text else Song(title=key_text["t"], dynamic=True) album = None if not "r" in key_text else Album(title=key_text["r"], dynamic=True) artist = None if not "a" in key_text else Artist(name=key_text["a"], dynamic=True) - + if song is not None: if album is not None: song.album_collection.append(album) if artist is not None: song.main_artist_collection.append(artist) return Query(raw_query=query, music_object=song) - + if album is not None: if artist is not None: album.artist_collection.append(artist) return Query(raw_query=query, music_object=album) - + if artist is not None: return Query(raw_query=query, music_object=artist) - + return Query(raw_query=query) - + def search(self, query: str): if re.match(URL_PATTERN, query) is not None: try: @@ -243,58 +242,57 @@ class Downloader: self.set_current_options(PageResults(page, data_object.options)) self.print_current_options() return - + special_characters = "#\\" query = query + " " - + key_text = {} - + skip_next = False escape_next = False new_text = "" latest_key: str = None for i in range(len(query) - 1): current_char = query[i] - next_char = query[i+1] - + next_char = query[i + 1] + if skip_next: skip_next = False continue - + if escape_next: new_text += current_char escape_next = False - + # escaping if current_char == "\\": if next_char in special_characters: escape_next = True continue - + if current_char == "#": if latest_key is not None: key_text[latest_key] = new_text new_text = "" - + latest_key = next_char skip_next = True continue - + new_text += current_char - + if latest_key is not None: key_text[latest_key] = new_text - - + parsed_query: Query = self._process_parsed(key_text, query) - + self.set_current_options(self.pages.search(parsed_query)) self.print_current_options() - + def goto(self, index: int): page: Type[Page] music_object: DatabaseObject - + try: page, music_object = self.current_results.get_music_object_by_index(index) except KeyError: @@ -302,23 +300,22 @@ class Downloader: print(f"The option {index} doesn't exist.") print() return - + self.pages.fetch_details(music_object) print(music_object) print(music_object.options) self.set_current_options(PageResults(page, music_object.options)) - + self.print_current_options() - - + def download(self, download_str: str, download_all: bool = False) -> bool: to_download: List[DatabaseObject] = [] if re.match(URL_PATTERN, download_str) is not None: _, music_objects = self.pages.fetch_url(download_str) to_download.append(music_objects) - + else: index: str for index in download_str.split(", "): @@ -327,66 +324,68 @@ class Downloader: print(f"Every download thingie has to be an index, not {index}.") print() return False - + for index in download_str.split(", "): to_download.append(self.current_results.get_music_object_by_index(int(index))[1]) - + print() print("Downloading:") for download_object in to_download: print(download_object.option_string) print() - + _result_map: Dict[DatabaseObject, DownloadResult] = dict() - + for database_object in to_download: - r = self.pages.download(music_object=database_object, genre=self.genre, download_all=download_all, process_metadata_anyway=self.process_metadata_anyway) + r = self.pages.download(music_object=database_object, genre=self.genre, download_all=download_all, + process_metadata_anyway=self.process_metadata_anyway) _result_map[database_object] = r - + for music_object, result in _result_map.items(): print() print(music_object.option_string) print(result) - + return True - + def process_input(self, input_str: str) -> bool: input_str = input_str.strip() processed_input: str = input_str.lower() - + if processed_input in EXIT_COMMANDS: return True - + if processed_input == ".": self.print_current_options() return False - + if processed_input == "..": if self.previous_option(): self.print_current_options() return False - + if processed_input.startswith("s: "): self.search(input_str[3:]) return False - + if processed_input.startswith("d: "): return self.download(input_str[3:]) - + if processed_input.isdigit(): self.goto(int(processed_input)) return False - + if processed_input != "help": print("Invalid input.") help_message() return False - + def mainloop(self): while True: if self.process_input(input("> ")): return + @cli_function def download( genre: str = None, @@ -403,9 +402,9 @@ def download( print("Restart the programm to use it.") else: print("Something went wrong configuring.") - + shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway) - + if command_list is not None: for command in command_list: shell.process_input(command) @@ -414,5 +413,5 @@ def download( if direct_download_url is not None: if shell.download(direct_download_url, download_all=download_all): return - + shell.mainloop() diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index c655d11..27ae19c 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -98,8 +98,10 @@ class Pages: def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): return DownloadResult(error_message=f"{type(music_object).__name__} can't be downloaded.") - - _page_types = set() + + self.fetch_details(music_object) + + _page_types = set(self._source_to_page) for src in music_object.source_collection.source_pages: if src in self._source_to_page: _page_types.add(self._source_to_page[src]) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index a3b1bbf..21330fa 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -315,7 +315,7 @@ class Collection(Generic[T]): yield element def __merge__(self, __other: Collection, override: bool = False): - self.extend(__other.shallow_list, from_map=True) + self.extend(__other._data, from_map=True) def __getitem__(self, item: int): if item < len(self._data): diff --git a/src/music_kraken/objects/parents.py b/src/music_kraken/objects/parents.py index e1d07f5..bdaa960 100644 --- a/src/music_kraken/objects/parents.py +++ b/src/music_kraken/objects/parents.py @@ -1,6 +1,7 @@ from __future__ import annotations import random +from collections import defaultdict from functools import lru_cache from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set @@ -131,6 +132,18 @@ class OuterProxy: return super().__setattr__(__name, __value) + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): + pass + + def add_list_of_other_objects(self, object_list: List[OuterProxy]): + d: Dict[Type[OuterProxy], List[OuterProxy]] = defaultdict(list) + + for db_object in object_list: + d[type(db_object)].append(db_object) + + for key, value in d.items(): + self._add_other_db_objects(key, value) + def __hash__(self): """ :raise: IsDynamicException diff --git a/src/music_kraken/objects/song.py b/src/music_kraken/objects/song.py index 0b6b553..f418330 100644 --- a/src/music_kraken/objects/song.py +++ b/src/music_kraken/objects/song.py @@ -95,6 +95,22 @@ class Song(Base): "feature_song_collection": self } + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): + if object_type is Song: + return + + if isinstance(object_list, Lyrics): + self.lyrics_collection.extend(object_list) + return + + if isinstance(object_list, Artist): + self.main_artist_collection.extend(object_list) + return + + if isinstance(object_list, Album): + self.album_collection.extend(object_list) + return + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -229,6 +245,22 @@ class Album(Base): "main_artist_collection": self.artist_collection } + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): + if object_type is Song: + self.song_collection.extend(object_list) + return + + if object_type is Artist: + self.artist_collection.extend(object_list) + return + + if object_type is Album: + return + + if object_type is Label: + self.label_collection.extend(object_list) + return + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ @@ -436,6 +468,23 @@ class Artist(Base): "current_artist_collection": self } + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): + if object_type is Song: + # this doesn't really make sense + # self.feature_song_collection.extend(object_list) + return + + if object_type is Artist: + return + + if object_type is Album: + self.main_album_collection.extend(object_list) + return + + if object_type is Label: + self.label_collection.extend(object_list) + return + @property def options(self) -> List[P]: options = [self, *self.main_album_collection.shallow_list, *self.feature_album] @@ -618,6 +667,18 @@ class Label(Base): *[('url', source.url) for source in self.source_collection] ] + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): + if object_type is Song: + return + + if object_type is Artist: + self.current_artist_collection.extend(object_list) + return + + if object_type is Album: + self.album_collection.extend(object_list) + return + @property def options(self) -> List[P]: options = [self] diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 7a8751c..50a4e7b 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -28,7 +28,6 @@ from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult from ..utils.string_processing import fit_to_file_system - INDEPENDENT_DB_OBJECTS = Union[Label, Album, Artist, Song] INDEPENDENT_DB_TYPES = Union[Type[Song], Type[Album], Type[Artist], Type[Label]] @@ -42,22 +41,22 @@ class NamingDict(dict): "album": "album.title", "album_type": "album.album_type_string" } - + def __init__(self, values: dict, object_mappings: Dict[str, DatabaseObject] = None): self.object_mappings: Dict[str, DatabaseObject] = object_mappings or dict() - + super().__init__(values) self["audio_format"] = main_settings["audio_format"] - + def add_object(self, music_object: DatabaseObject): self.object_mappings[type(music_object).__name__.lower()] = music_object - + def copy(self) -> dict: return type(self)(super().copy(), self.object_mappings.copy()) - + def __getitem__(self, key: str) -> str: return fit_to_file_system(super().__getitem__(key)) - + def default_value_for_name(self, name: str) -> str: return f'Various {name.replace("_", " ").title()}' @@ -67,23 +66,23 @@ class NamingDict(dict): return self.default_value_for_name(key) key = self.CUSTOM_KEYS[key] - + frag_list = key.split(".") - + object_name = frag_list[0].strip().lower() attribute_name = frag_list[-1].strip().lower() if object_name not in self.object_mappings: return self.default_value_for_name(attribute_name) - + music_object = self.object_mappings[object_name] try: value = getattr(music_object, attribute_name) if value is None: return self.default_value_for_name(attribute_name) - + return str(value) - + except AttributeError: return self.default_value_for_name(attribute_name) @@ -133,6 +132,7 @@ def _clean_song(song: Song, collections: Dict[INDEPENDENT_DB_TYPES, Collection]) _clean_collection(song.feature_artist_collection, collections) _clean_collection(song.main_artist_collection, collections) + def clean_object(dirty_object: DatabaseObject) -> DatabaseObject: if isinstance(dirty_object, INDEPENDENT_DB_OBJECTS): collections = { @@ -147,20 +147,22 @@ def clean_object(dirty_object: DatabaseObject) -> DatabaseObject: _clean_music_object(dirty_object, collections) return dirty_object - + + def build_new_object(new_object: DatabaseObject) -> DatabaseObject: new_object = clean_object(new_object) new_object.compile(merge_into=False) - + return new_object + def merge_together(old_object: DatabaseObject, new_object: DatabaseObject, do_compile: bool = True) -> DatabaseObject: new_object = clean_object(new_object) - + old_object.merge(new_object) if do_compile and False: old_object.compile(merge_into=False) - + return old_object @@ -169,60 +171,59 @@ class Page: This is an abstract class, laying out the functionality for every other class fetching something """ - + SOURCE_TYPE: SourcePages LOGGER = logging.getLogger("this shouldn't be used") - + # set this to true, if all song details can also be fetched by fetching album details NO_ADDITIONAL_DATA_FROM_SONG = False - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return None - + def get_soup_from_response(self, r: requests.Response) -> BeautifulSoup: return BeautifulSoup(r.content, "html.parser") # to search stuff def search(self, query: Query) -> List[DatabaseObject]: music_object = query.music_object - + search_functions = { Song: self.song_search, Album: self.album_search, Artist: self.artist_search, Label: self.label_search } - + if type(music_object) in search_functions: r = search_functions[type(music_object)](music_object) if r is not None and len(r) > 0: return r - + r = [] for default_query in query.default_search: for single_option in self.general_search(default_query): r.append(single_option) - + return r - + def general_search(self, search_query: str) -> List[DatabaseObject]: return [] - + def label_search(self, label: Label) -> List[Label]: return [] - + def artist_search(self, artist: Artist) -> List[Artist]: return [] - + def album_search(self, album: Album) -> List[Album]: return [] - + def song_search(self, song: Song) -> List[Song]: return [] - - def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1, post_process: bool = True) -> DatabaseObject: + def fetch_details(self, music_object: DatabaseObject, stop_at_level: int = 1, + post_process: bool = True) -> DatabaseObject: """ when a music object with lacking data is passed in, it returns the SAME object **(no copy)** with more detailed data. @@ -263,7 +264,9 @@ class Page: return music_object - def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[DatabaseObject]: + def fetch_object_from_source(self, source: Source, stop_at_level: int = 2, + enforce_type: Type[DatabaseObject] = None, post_process: bool = True) -> Optional[ + DatabaseObject]: obj_type = self.get_source_type(source) if obj_type is None: @@ -272,16 +275,16 @@ class Page: if enforce_type != obj_type and enforce_type is not None: self.LOGGER.warning(f"Object type isn't type to enforce: {enforce_type}, {obj_type}") return None - + music_object: DatabaseObject = None - + fetch_map = { Song: self.fetch_song, Album: self.fetch_album, Artist: self.fetch_artist, Label: self.fetch_label } - + if obj_type in fetch_map: music_object = fetch_map[obj_type](source, stop_at_level) else: @@ -294,10 +297,11 @@ class Page: collection = music_object.__getattribute__(collection_str) for sub_element in collection: - sub_element.merge(self.fetch_details(sub_element, stop_at_level=stop_at_level-1, post_process=False)) - + sub_element.merge( + self.fetch_details(sub_element, stop_at_level=stop_at_level - 1, post_process=False)) + return music_object - + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: return Song() @@ -310,41 +314,42 @@ class Page: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: return Label() - def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: + def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, + process_metadata_anyway: bool = False) -> DownloadResult: naming_dict: NamingDict = NamingDict({"genre": genre}) - + def fill_naming_objects(naming_music_object: DatabaseObject): nonlocal naming_dict - + for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES: collection: Collection = getattr(naming_music_object, collection_name) - + if collection.empty: continue - + dom_ordered_music_object: DatabaseObject = collection[0] naming_dict.add_object(dom_ordered_music_object) return fill_naming_objects(dom_ordered_music_object) - + fill_naming_objects(music_object) - + return self._download(music_object, naming_dict, download_all, process_metadata_anyway=process_metadata_anyway) - - def _download(self, music_object: DatabaseObject, naming_dict: NamingDict, download_all: bool = False, skip_details: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: + def _download(self, music_object: DatabaseObject, naming_dict: NamingDict, download_all: bool = False, + skip_details: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: skip_next_details = skip_details - + # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False if isinstance(music_object, Album): if self.NO_ADDITIONAL_DATA_FROM_SONG: skip_next_details = True - + if not download_all and music_object.album_type.value in main_settings["album_type_blacklist"]: return DownloadResult() if not isinstance(music_object, Song) or not self.NO_ADDITIONAL_DATA_FROM_SONG: self.fetch_details(music_object=music_object, stop_at_level=2) - + naming_dict.add_object(music_object) if isinstance(music_object, Song): @@ -357,7 +362,9 @@ class Page: sub_ordered_music_object: DatabaseObject for sub_ordered_music_object in collection: - download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all, skip_details=skip_next_details, process_metadata_anyway=process_metadata_anyway)) + download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy(), download_all, + skip_details=skip_next_details, + process_metadata_anyway=process_metadata_anyway)) return download_result @@ -378,7 +385,6 @@ class Page: ) ) - if song.target_collection.empty: song.target_collection.append(new_target) @@ -393,7 +399,7 @@ class Page: str(song.id) ) ) - + r = DownloadResult(1) found_on_disc = False @@ -403,10 +409,10 @@ class Page: if process_metadata_anyway: target.copy_content(temp_target) found_on_disc = True - + r.found_on_disk += 1 r.add_target(target) - + if found_on_disc and not process_metadata_anyway: self.LOGGER.info(f"{song.option_string} already exists, thus not downloading again.") return r @@ -415,18 +421,18 @@ class Page: if not found_on_disc: r = self.download_song_to_target(source=source, target=temp_target, desc=song.title) - if not r.is_fatal_error: - r.merge(self._post_process_targets(song, temp_target, [] if found_on_disc else self.get_skip_intervals(song, source))) + r.merge(self._post_process_targets(song, temp_target, + [] if found_on_disc else self.get_skip_intervals(song, source))) return r - + def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List) -> DownloadResult: correct_codec(temp_target, interval_list=interval_list) - + self.post_process_hook(song, temp_target) - + write_metadata_to_target(song.metadata, temp_target) r = DownloadResult() @@ -436,17 +442,17 @@ class Page: if temp_target is not target: temp_target.copy_content(target) r.add_target(target) - + temp_target.delete() r.sponsor_segments += len(interval_list) - + return r - + def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: return [] - + def post_process_hook(self, song: Song, temp_target: Target, **kwargs): pass - + def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: return DownloadResult() From fd0d22b751159b2df7068e852f9128c7867e8e70 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Jan 2024 09:53:51 +0100 Subject: [PATCH 050/104] feat: added colors --- src/music_kraken/cli/main_downloader.py | 3 ++- src/music_kraken/utils/enums/colors.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 src/music_kraken/utils/enums/colors.py diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index 52d9847..c18aa8f 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -11,6 +11,7 @@ from ..utils.string_processing import fit_to_file_system from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult from ..utils.exception.download import UrlNotFoundException +from ..utils.enums.colors import BColors from ..download.results import Results, Option, PageResults from ..download.page_attributes import Pages from ..pages import Page @@ -176,7 +177,7 @@ class Downloader: else: prefix = ALPHABET[page_count % len(ALPHABET)] print( - f"({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------") + f"{BColors.HEADER}({prefix}) ------------------------{option.__name__:{PAGE_NAME_FILL}<{MAX_PAGE_LEN}}------------{BColors.ENDC}") self.page_dict[prefix] = option self.page_dict[option.__name__] = option diff --git a/src/music_kraken/utils/enums/colors.py b/src/music_kraken/utils/enums/colors.py new file mode 100644 index 0000000..61fe1e4 --- /dev/null +++ b/src/music_kraken/utils/enums/colors.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class BColors: + # https://stackoverflow.com/a/287944 + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" \ No newline at end of file From b3ac152220977a8abb65d7588f58d663704b7bc8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Jan 2024 10:08:08 +0100 Subject: [PATCH 051/104] feat: highlight downloadebel options --- documentation/shell.md | 6 +++++- src/music_kraken/cli/main_downloader.py | 3 ++- src/music_kraken/download/page_attributes.py | 9 +++++++++ src/music_kraken/utils/enums/colors.py | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/documentation/shell.md b/documentation/shell.md index de72743..31da1a3 100644 --- a/documentation/shell.md +++ b/documentation/shell.md @@ -10,7 +10,7 @@ > s: r: #a an Artist #r some random Release ``` -Searches for an url, or an query +Searches for an url, or a query ### Query Syntax @@ -33,6 +33,10 @@ To download something, you either need a direct link, or you need to have alread > d: https://musify.club/release/some-random-release-183028492 ``` +## Results + +If options are printed in **bold** they can be downloaded. Else they may or maybe can't be downloaded + ## Misc ### Exit diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index c18aa8f..a7bf5a4 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -173,7 +173,8 @@ class Downloader: page_count = 0 for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options): if isinstance(option, Option): - print(f"{option.index:0{self.option_digits}} {option.music_object.option_string}") + color = BColors.BOLD if self.pages.is_downloadable(option.music_object) else BColors.ENDC + print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC}") else: prefix = ALPHABET[page_count % len(ALPHABET)] print( diff --git a/src/music_kraken/download/page_attributes.py b/src/music_kraken/download/page_attributes.py index 27ae19c..e3881f6 100644 --- a/src/music_kraken/download/page_attributes.py +++ b/src/music_kraken/download/page_attributes.py @@ -94,6 +94,15 @@ class Pages: music_object.merge(self._page_instances[page_type].fetch_details(music_object=music_object, stop_at_level=stop_at_level)) return music_object + + def is_downloadable(self, music_object: DatabaseObject) -> bool: + _page_types = set(self._source_to_page) + for src in music_object.source_collection.source_pages: + if src in self._source_to_page: + _page_types.add(self._source_to_page[src]) + + audio_pages = self._audio_pages_set.intersection(_page_types) + return len(audio_pages) > 0 def download(self, music_object: DatabaseObject, genre: str, download_all: bool = False, process_metadata_anyway: bool = False) -> DownloadResult: if not isinstance(music_object, INDEPENDENT_DB_OBJECTS): diff --git a/src/music_kraken/utils/enums/colors.py b/src/music_kraken/utils/enums/colors.py index 61fe1e4..6c23cc1 100644 --- a/src/music_kraken/utils/enums/colors.py +++ b/src/music_kraken/utils/enums/colors.py @@ -11,4 +11,4 @@ class BColors: FAIL = "\033[91m" ENDC = "\033[0m" BOLD = "\033[1m" - UNDERLINE = "\033[4m" \ No newline at end of file + UNDERLINE = "\033[4m" From 2c504d3123ad345c3bda0738ad1e889b6e1f2544 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Jan 2024 10:37:22 +0100 Subject: [PATCH 052/104] fear: formatted logger --- src/music_kraken/__init__.py | 28 +++++++++++++++++++++++++- src/music_kraken/utils/enums/colors.py | 5 +++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index 49c35c7..8eb5e9b 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -11,13 +11,39 @@ if True: import sys sys.setrecursionlimit(100) + +class CustomFormatter(logging.Formatter): + grey = "\x1b[38;20m" + yellow = "\x1b[33;20m" + red = "\x1b[31;20m" + bold_red = "\x1b[31;1m" + reset = "\x1b[0m" + format = logging_settings['logging_format'] + + FORMATS = { + logging.DEBUG: grey + format + reset, + logging.INFO: grey + format + reset, + logging.WARNING: yellow + format + reset, + logging.ERROR: red + format + reset, + logging.CRITICAL: bold_red + format + reset + } + + def format(self, record): + log_fmt = self.FORMATS.get(record.levelno) + formatter = logging.Formatter(log_fmt) + return formatter.format(record) + + +stream_handler = logging.StreamHandler() +stream_handler.setFormatter(CustomFormatter()) + # configure logger default logging.basicConfig( level=logging_settings['log_level'] if not DEBUG_LOGGIN else logging.DEBUG, format=logging_settings['logging_format'], handlers=[ logging.FileHandler(main_settings['log_file']), - logging.StreamHandler() + stream_handler ] ) diff --git a/src/music_kraken/utils/enums/colors.py b/src/music_kraken/utils/enums/colors.py index 6c23cc1..44f79da 100644 --- a/src/music_kraken/utils/enums/colors.py +++ b/src/music_kraken/utils/enums/colors.py @@ -12,3 +12,8 @@ class BColors: ENDC = "\033[0m" BOLD = "\033[1m" UNDERLINE = "\033[4m" + + GREY = "\x1b[38;20m" + YELLOW = "\x1b[33;20m" + RED = "\x1b[31;20m" + BOLD_RED = "\x1b[31;1m" From 206899e48a2816a87dd1f84d7f7a3dfbd3195125 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Jan 2024 11:09:01 +0100 Subject: [PATCH 053/104] feat: added color to couple error cases --- src/music_kraken/__init__.py | 3 ++- src/music_kraken/cli/main_downloader.py | 11 ++++++----- .../utils/support_classes/download_result.py | 16 ++++++++-------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index 8eb5e9b..5a32f8d 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -7,7 +7,8 @@ from .utils.config import logging_settings, main_settings, read_config read_config() from . import cli -if True: + +if DEBUG: import sys sys.setrecursionlimit(100) diff --git a/src/music_kraken/cli/main_downloader.py b/src/music_kraken/cli/main_downloader.py index a7bf5a4..ce048ca 100644 --- a/src/music_kraken/cli/main_downloader.py +++ b/src/music_kraken/cli/main_downloader.py @@ -1,3 +1,4 @@ +import random from typing import Set, Type, Dict, List from pathlib import Path import re @@ -134,7 +135,7 @@ def get_genre(): def help_message(): print() - print(main_settings["happy_messages"]) + print(random.choice(main_settings["happy_messages"])) print() @@ -173,7 +174,7 @@ class Downloader: page_count = 0 for option in self.current_results.formated_generator(max_items_per_page=self.max_displayed_options): if isinstance(option, Option): - color = BColors.BOLD if self.pages.is_downloadable(option.music_object) else BColors.ENDC + color = BColors.BOLD if self.pages.is_downloadable(option.music_object) else BColors.GREY print(f"{color}{option.index:0{self.option_digits}} {option.music_object.option_string}{BColors.ENDC}") else: prefix = ALPHABET[page_count % len(ALPHABET)] @@ -378,7 +379,7 @@ class Downloader: return False if processed_input != "help": - print("Invalid input.") + print(f"{BColors.WARNING}Invalid input.{BColors.ENDC}") help_message() return False @@ -401,9 +402,9 @@ def download( if code == 0: main_settings["hasnt_yet_started"] = False write_config() - print("Restart the programm to use it.") + print(f"{BColors.OKGREEN}Restart the programm to use it.{BColors.ENDC}") else: - print("Something went wrong configuring.") + print(f"{BColors.FAIL}Something went wrong configuring.{BColors.ENDC}") shell = Downloader(genre=genre, process_metadata_anyway=process_metadata_anyway) diff --git a/src/music_kraken/utils/support_classes/download_result.py b/src/music_kraken/utils/support_classes/download_result.py index 11f3417..7180b12 100644 --- a/src/music_kraken/utils/support_classes/download_result.py +++ b/src/music_kraken/utils/support_classes/download_result.py @@ -2,12 +2,12 @@ from dataclasses import dataclass, field from typing import List, Tuple from ...utils.config import main_settings, logging_settings +from ...utils.enums.colors import BColors from ...objects import Target UNIT_PREFIXES: List[str] = ["", "k", "m", "g", "t"] UNIT_DIVISOR = 1024 - LOGGER = logging_settings["download_logger"] @@ -83,16 +83,16 @@ class DownloadResult: def __str__(self): if self.is_fatal_error: return self.error_message - head = f"{self.fail} from {self.total} downloads failed:\n" \ - f"successrate:\t{int(self.success_percentage * 100)}%\n" \ - f"failrate:\t{int(self.failure_percentage * 100)}%\n" \ - f"total size:\t{self.formated_size}\n" \ - f"skipped segments:\t{self.sponsor_segments}\n" \ - f"found on disc:\t{self.found_on_disk}" + head = f"{self.fail} from {self.total} downloads failed:\n" \ + f"success-rate:\t{int(self.success_percentage * 100)}%\n" \ + f"fail-rate:\t{int(self.failure_percentage * 100)}%\n" \ + f"total size:\t{self.formated_size}\n" \ + f"skipped segments:\t{self.sponsor_segments}\n" \ + f"found on disc:\t{self.found_on_disk}" if not self.is_mild_failure: return head _lines = [head] - _lines.extend(self._error_message_list) + _lines.extend(BColors.FAIL + s + BColors.ENDC for s in self._error_message_list) return "\n".join(_lines) From 7a70167f6f3238f4f3f1818c2123644628d15dae Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Jan 2024 11:43:03 +0100 Subject: [PATCH 054/104] feat: removed redundand stuff --- src/music_kraken/pages/abstract.py | 33 ------------------------------ 1 file changed, 33 deletions(-) diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 50a4e7b..c253fc0 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -133,39 +133,6 @@ def _clean_song(song: Song, collections: Dict[INDEPENDENT_DB_TYPES, Collection]) _clean_collection(song.main_artist_collection, collections) -def clean_object(dirty_object: DatabaseObject) -> DatabaseObject: - if isinstance(dirty_object, INDEPENDENT_DB_OBJECTS): - collections = { - Label: Collection(element_type=Label), - Artist: Collection(element_type=Artist), - Album: Collection(element_type=Album), - Song: Collection(element_type=Song) - } - - if isinstance(dirty_object, Song): - return dirty_object - - _clean_music_object(dirty_object, collections) - return dirty_object - - -def build_new_object(new_object: DatabaseObject) -> DatabaseObject: - new_object = clean_object(new_object) - new_object.compile(merge_into=False) - - return new_object - - -def merge_together(old_object: DatabaseObject, new_object: DatabaseObject, do_compile: bool = True) -> DatabaseObject: - new_object = clean_object(new_object) - - old_object.merge(new_object) - if do_compile and False: - old_object.compile(merge_into=False) - - return old_object - - class Page: """ This is an abstract class, laying out the From f9b126001cf479c2266fec562bf8ae58cc44b910 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Jan 2024 13:55:24 +0100 Subject: [PATCH 055/104] feat: layout yt signatures --- src/actual_donwload.py | 2 +- src/music_kraken/__init__.py | 4 +- .../pages/youtube_music/super_youtube.py | 2 - .../pages/youtube_music/youtube_music.py | 188 +++++++++++++++--- src/music_kraken/utils/debug_utils.py | 3 + src/music_kraken/utils/shared.py | 10 +- 6 files changed, 170 insertions(+), 39 deletions(-) diff --git a/src/actual_donwload.py b/src/actual_donwload.py index 6ad3c34..817adde 100644 --- a/src/actual_donwload.py +++ b/src/actual_donwload.py @@ -43,7 +43,7 @@ if __name__ == "__main__": bandcamp_test = [ "s: #a Only Smile", - "d: 1", + "d: 18", ] diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index 5a32f8d..b3ee566 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -2,7 +2,7 @@ import logging import gc import sys -from .utils.shared import DEBUG, DEBUG_LOGGIN +from .utils.shared import DEBUG, DEBUG_LOGGING from .utils.config import logging_settings, main_settings, read_config read_config() from . import cli @@ -40,7 +40,7 @@ stream_handler.setFormatter(CustomFormatter()) # configure logger default logging.basicConfig( - level=logging_settings['log_level'] if not DEBUG_LOGGIN else logging.DEBUG, + level=logging_settings['log_level'] if not DEBUG_LOGGING else logging.DEBUG, format=logging_settings['logging_format'], handlers=[ logging.FileHandler(main_settings['log_file']), diff --git a/src/music_kraken/pages/youtube_music/super_youtube.py b/src/music_kraken/pages/youtube_music/super_youtube.py index 676a06e..e18a473 100644 --- a/src/music_kraken/pages/youtube_music/super_youtube.py +++ b/src/music_kraken/pages/youtube_music/super_youtube.py @@ -152,7 +152,6 @@ class SuperYouTube(Page): if parsed.url_type in _url_type: return _url_type[parsed.url_type] - def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: """ 1. getting the optimal source @@ -196,7 +195,6 @@ class SuperYouTube(Page): return self.download_connection.stream_into(endpoint, target, description=desc, raw_url=True) - def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: if not youtube_settings["use_sponsor_block"]: return [] diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 3ed65b3..a4c765a 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -10,6 +10,7 @@ from ...utils.exception.config import SettingValueError from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.functions import get_current_millis + if DEBUG: from ...utils.debug_utils import dump_to_file @@ -46,6 +47,7 @@ class YoutubeMusicConnection(Connection): --> average delay in between: 1.8875 min """ + def __init__(self, logger: logging.Logger, accept_language: str): # https://stackoverflow.com/questions/30561260/python-change-accept-language-using-requests super().__init__( @@ -61,18 +63,17 @@ class YoutubeMusicConnection(Connection): # https://stackoverflow.com/a/66940841/16804841 doesn't work for cookie_key, cookie_value in youtube_settings["youtube_music_consent_cookies"].items(): self.session.cookies.set( - name=cookie_key, + name=cookie_key, value=cookie_value, path='/', domain='.youtube.com' ) - def heartbeat(self): r = self.get("https://music.youtube.com/verify_session", is_heartbeat=True) if r is None: self.heartbeat_failed() return - + string = r.text data = json.loads(string[string.index("{"):]) @@ -100,7 +101,8 @@ class YoutubeMusic(SuperYouTube): LOGGER = logging_settings["youtube_music_logger"] def __init__(self, *args, **kwargs): - self.connection: YoutubeMusicConnection = YoutubeMusicConnection(logger=self.LOGGER, accept_language="en-US,en;q=0.5") + self.connection: YoutubeMusicConnection = YoutubeMusicConnection(logger=self.LOGGER, + accept_language="en-US,en;q=0.5") self.credentials: YouTubeMusicCredentials = YouTubeMusicCredentials( api_key=youtube_settings["youtube_music_api_key"], ctoken="", @@ -111,7 +113,7 @@ class YoutubeMusic(SuperYouTube): if self.credentials.api_key == "" or DEBUG_YOUTUBE_INITIALIZING: self._fetch_from_main_page() - + super().__init__(*args, **kwargs) def _fetch_from_main_page(self): @@ -125,7 +127,7 @@ class YoutubeMusic(SuperYouTube): r = self.connection.get("https://music.youtube.com/") if r is None: return - + if urlparse(r.url).netloc == "consent.youtube.com": self.LOGGER.info(f"Making cookie consent request for {type(self).__name__}.") r = self.connection.post("https://consent.youtube.com/save", data={ @@ -145,15 +147,15 @@ class YoutubeMusic(SuperYouTube): }) if r is None: return - + # load cookie dict from settings cookie_dict = youtube_settings["youtube_music_consent_cookies"] - + for cookie in r.cookies: cookie_dict[cookie.name] = cookie.value for cookie in self.connection.session.cookies: cookie_dict[cookie.name] = cookie.value - + # save cookies in settings youtube_settings["youtube_music_consent_cookies"] = cookie_dict @@ -171,16 +173,16 @@ class YoutubeMusic(SuperYouTube): r"(?<=\"innertubeApiKey\":\")(.*?)(?=\")", r"(?<=\"INNERTUBE_API_KEY\":\")(.*?)(?=\")", ) - + api_keys = [] for api_key_patter in api_key_pattern: api_keys.extend(re.findall(api_key_patter, content)) - + found_a_good_api_key = False for api_key in api_keys: # save the first api key api_key = api_keys[0] - + try: youtube_settings["youtube_music_api_key"] = api_key except SettingValueError: @@ -212,7 +214,7 @@ class YoutubeMusic(SuperYouTube): def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return super().get_source_type(source) - + def general_search(self, search_query: str) -> List[DatabaseObject]: search_query = search_query.strip() @@ -221,25 +223,27 @@ class YoutubeMusic(SuperYouTube): # approximate the ammount of time it would take to type the search, because google for some reason tracks that LAST_EDITED_TIME = get_current_millis() - random.randint(0, 20) _estimated_time = sum(len(search_query) * random.randint(50, 100) for _ in search_query.strip()) - FIRST_EDITED_TIME = LAST_EDITED_TIME - _estimated_time if LAST_EDITED_TIME - self.start_millis > _estimated_time else random.randint(50, 100) + FIRST_EDITED_TIME = LAST_EDITED_TIME - _estimated_time if LAST_EDITED_TIME - self.start_millis > _estimated_time else random.randint( + 50, 100) query_continue = "" if self.credentials.ctoken == "" else f"&ctoken={self.credentials.ctoken}&continuation={self.credentials.ctoken}" # construct the request r = self.connection.post( - url=get_youtube_url(path="/youtubei/v1/search", query=f"key={self.credentials.api_key}&prettyPrint=false"+query_continue), + url=get_youtube_url(path="/youtubei/v1/search", + query=f"key={self.credentials.api_key}&prettyPrint=false" + query_continue), json={ - "context": {**self.credentials.context, "adSignalsInfo":{"params":[]}}, + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, "query": search_query, "suggestStats": { "clientName": "youtube-music", "firstEditTimeMsec": FIRST_EDITED_TIME, "inputMethod": "KEYBOARD", - "lastEditTimeMsec": LAST_EDITED_TIME, + "lastEditTimeMsec": LAST_EDITED_TIME, "originalQuery": search_query, "parameterValidationStatus": "VALID_PARAMETERS", "searchMethod": "ENTER_KEY", - "validationStatus": "VALID", + "validationStatus": "VALID", "zeroPrefixEnabled": True, "availableSuggestions": [] } @@ -252,8 +256,9 @@ class YoutubeMusic(SuperYouTube): if r is None: return [] - renderer_list = r.json().get("contents", {}).get("tabbedSearchResultsRenderer", {}).get("tabs", [{}])[0].get("tabRenderer").get("content", {}).get("sectionListRenderer", {}).get("contents", []) - + renderer_list = r.json().get("contents", {}).get("tabbedSearchResultsRenderer", {}).get("tabs", [{}])[0].get( + "tabRenderer").get("content", {}).get("sectionListRenderer", {}).get("contents", []) + if DEBUG: for i, content in enumerate(renderer_list): dump_to_file(f"{i}-renderer.json", json.dumps(content), is_json=True, exit_after_dump=False) @@ -281,7 +286,7 @@ class YoutubeMusic(SuperYouTube): url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"), json={ "browseId": browse_id, - "context": {**self.credentials.context, "adSignalsInfo":{"params":[]}} + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} } ) if r is None: @@ -290,7 +295,8 @@ class YoutubeMusic(SuperYouTube): if DEBUG: dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False) - renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) + renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ + 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) if DEBUG: for i, content in enumerate(renderer_list): @@ -306,10 +312,10 @@ class YoutubeMusic(SuperYouTube): for renderer in renderer_list: results.extend(parse_renderer(renderer)) - artist.add_list_of_other_objects(results) + artist.add_list_of_other_objects(results) return artist - + def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: album = Album() @@ -323,7 +329,7 @@ class YoutubeMusic(SuperYouTube): url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"), json={ "browseId": browse_id, - "context": {**self.credentials.context, "adSignalsInfo":{"params":[]}} + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} } ) if r is None: @@ -332,7 +338,8 @@ class YoutubeMusic(SuperYouTube): if DEBUG: dump_to_file(f"{browse_id}.json", r.text, is_json=True, exit_after_dump=False) - renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) + renderer_list = r.json().get("contents", {}).get("singleColumnBrowseResultsRenderer", {}).get("tabs", [{}])[ + 0].get("tabRenderer", {}).get("content", {}).get("sectionListRenderer", {}).get("contents", []) if DEBUG: for i, content in enumerate(renderer_list): @@ -348,10 +355,133 @@ class YoutubeMusic(SuperYouTube): for renderer in renderer_list: results.extend(parse_renderer(renderer)) - album.add_list_of_other_objects(results) + album.add_list_of_other_objects(results) return album + @staticmethod + def _parse_adaptive_formats(data: list) -> str: + best_url = None + audio_format = None + best_bitrate = 0 + + def decode_url(_format: dict): + """ + s=0%3Dw9hlenzIQkU5qD55flWqkO-wn8G6CJxI%3Dn9_OSUAUh3AiACkI5TNetQixuV6PJAxH-NFqGWGQCivQMkqprwyv8z2NAhIQRwsSdQfJAfJA + sp=sig + url=https://rr1---sn-cxaf0x-nugl.googlevideo.com/videoplayback%3Fexpire%3D1705426390%26ei%3DdmmmZZK1OYf41gL26KGwDg%26ip%3D129.143.170.58%26id%3Do-APgHuP61UnxvMxskECWmga1BRWYDFv91DMB7E6R_b_CG%26itag%3D18%26source%3Dyoutube%26requiressl%3Dyes%26xpc%3DEgVo2aDSNQ%253D%253D%26mh%3D_V%26mm%3D31%252C29%26mn%3Dsn-cxaf0x-nugl%252Csn-4g5edndd%26ms%3Dau%252Crdu%26mv%3Dm%26mvi%3D1%26pl%3D16%26pcm2%3Dyes%26gcr%3Dde%26initcwndbps%3D1737500%26spc%3DUWF9f6gk6WFPUFJZkjGIeb9q8NjPmmcsXzCp%26vprv%3D1%26svpuc%3D1%26mime%3Dvideo%252Fmp4%26ns%3DJnQgwQe-JazkZpURVB2rmlUQ%26cnr%3D14%26ratebypass%3Dyes%26dur%3D170.643%26lmt%3D1697280536047282%26mt%3D1705404526%26fvip%3D4%26fexp%3D24007246%26c%3DWEB_REMIX%26txp%3D2318224%26n%3DEq7jcRmeC89oLlbr%26sparams%3Dexpire%252Cei%252Cip%252Cid%252Citag%252Csource%252Crequiressl%252Cxpc%252Cpcm2%252Cgcr%252Cspc%252Cvprv%252Csvpuc%252Cmime%252Cns%252Ccnr%252Cratebypass%252Cdur%252Clmt%26lsparams%3Dmh%252Cmm%252Cmn%252Cms%252Cmv%252Cmvi%252Cpl%252Cinitcwndbps%26lsig%3DAAO5W4owRQIhAOJSldsMn2QA8b-rMr8mJoPr-9-8piIMe6J-800YB0DiAiBKLBHGfr-a6d87K0-WbsJzVf9f2DhYgv0vcntWvHmvGA%253D%253D" + :param _format: + :return: + """ + sc = parse_qs(_format["signatureCipher"]) + + fmt_url = sc["url"][0] + encrypted_sig = sc['s'][0] + + if not (sc and fmt_url and encrypted_sig): + return + + """ + if not player_url: + player_url = self._extract_player_url(webpage) + if not player_url: + return + + signature = self._decrypt_signature(sc['s'][0], video_id, player_url) + sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' + fmt_url += '&' + sp + '=' + signature + """ + + for possible_format in data: + _url_list = parse_qs(possible_format["signatureCipher"])["url"] + if len(_url_list) <= 0: + continue + + url = _url_list[0] + if best_url is None: + best_url = url + + mime_type: str = possible_format["mimeType"] + if not mime_type.startswith("audio"): + continue + + bitrate = int(possible_format.get("bitrate", 0)) + + if bitrate >= main_settings["bitrate"]: + best_bitrate = bitrate + audio_format = possible_format + best_url = url + break + + if bitrate > best_bitrate: + best_bitrate = bitrate + audio_format = possible_format + best_url = url + + return best_url + def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - print(source) - return Song() + """ + curl 'https://music.youtube.com/youtubei/v1/player?key=AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30&prettyPrint=false' + --compressed -X POST + -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0' + -H 'Accept: */*' + -H 'Accept-Language: en-US,en;q=0.5' + -H 'Accept-Encoding: gzip, deflate, br' + -H 'Content-Type: application/json' + -H 'Referer: https://music.youtube.com/' + -H 'X-Goog-Visitor-Id: CgtHdmkzbGhaMDltVSj4j5mtBjIKCgJERRIEEgAgOA%3D%3D' + -H 'X-Youtube-Bootstrap-Logged-In: false' + -H 'X-Youtube-Client-Name: 67' -H 'X-Youtube-Client-Version: 1.20240103.01.00' + -H 'Origin: https://music.youtube.com' + -H 'Sec-Fetch-Dest: empty' -H 'Sec-Fetch-Mode: cors' -H 'Sec-Fetch-Site: same-origin' -H 'Connection: keep-alive' -H 'Alt-Used: music.youtube.com' + -H 'Cookie: SOCS=CAISNQgREitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMTA5LjA1X3AwGgJlbiACGgYIgI6XrQY; YSC=r46McyPx8dE; VISITOR_PRIVACY_METADATA=CgJERRIEEgAgOA%3D%3D; CONSENT=PENDING+663; VISITOR_INFO1_LIVE=Gvi3lhZ09mU; _gcl_au=1.1.396177275.1705396217; ST-1hw5vco=csn=MC4xNTI3OTkwMzQyOTc1MzQ2&itct=CNgDEMn0AhgDIhMItMS6_cfhgwMVDMtCBR1u5wb6' -H 'TE: trailers' + --data-raw '{ + "videoId":"QeQrfsqPMCs", + "context":{"client":{"hl":"en","gl":"DE","remoteHost":"129.143.170.58","deviceMake":"","deviceModel":"","visitorData":"CgtHdmkzbGhaMDltVSj4j5mtBjIKCgJERRIEEgAgOA%3D%3D","userAgent":"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0,gzip(gfe)","clientName":"WEB_REMIX","clientVersion":"1.20240103.01.00","osName":"X11","osVersion":"","originalUrl":"https://music.youtube.com/?cbrd=1","platform":"DESKTOP","clientFormFactor":"UNKNOWN_FORM_FACTOR","configInfo":{"appInstallData":"CPiPma0GEL2ZsAUQqJqwBRCmgbAFEP24_RIQjaKwBRDNlbAFENWIsAUQmaSwBRD6p7AFEL75rwUQmvCvBRDT4a8FEL2KsAUQrtT-EhC36v4SENnJrwUQnouwBRDJ968FEJP8rwUQuIuuBRDM364FEIiHsAUQ0I2wBRDnuq8FEPOhsAUQ2piwBRDMrv4SEIjjrwUQooGwBRDuorAFEM6osAUQ6-j-EhC3nbAFEKXC_hIQ9fmvBRDh8q8FEJmUsAUQt--vBRD8hbAFEKigsAUQrLevBRC_o7AFEOuTrgUQqfevBRDd6P4SEJj8_hIQ6YywBRC9tq4FEOupsAUQ5LP-EhDfhP8SEOrDrwUQqKGwBRC8-a8FEPKYsAU%3D"},"browserName":"Firefox","browserVersion":"121.0","acceptHeader":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8","deviceExperimentId":"ChxOek15TkRZeU1EazNOalE0TXpVNU1EQXhOZz09EPiPma0GGPiPma0G","screenWidthPoints":780,"screenHeightPoints":638,"screenPixelDensity":2,"screenDensityFloat":2,"utcOffsetMinutes":60,"userInterfaceTheme":"USER_INTERFACE_THEME_DARK","timeZone":"Europe/Berlin","playerType":"UNIPLAYER","tvAppInfo":{"livingRoomAppMode":"LIVING_ROOM_APP_MODE_UNSPECIFIED"},"clientScreen":"WATCH_FULL_SCREEN"},"user":{"lockedSafetyMode":false},"request":{"useSsl":true,"internalExperimentFlags":[],"consistencyTokenJars":[]},"clientScreenNonce":"MC4xNTI3OTkwMzQyOTc1MzQ2","adSignalsInfo":{"params":[{"key":"dt","value":"1705396224619"},{"key":"flash","value":"0"},{"key":"frm","value":"0"},{"key":"u_tz","value":"60"},{"key":"u_his","value":"5"},{"key":"u_h","value":"800"},{"key":"u_w","value":"1280"},{"key":"u_ah","value":"769"},{"key":"u_aw","value":"1280"},{"key":"u_cd","value":"24"},{"key":"bc","value":"31"},{"key":"bih","value":"638"},{"key":"biw","value":"780"},{"key":"brdim","value":"0,31,0,31,1280,31,1280,769,780,638"},{"key":"vis","value":"1"},{"key":"wgl","value":"true"},{"key":"ca_type","value":"image"}]},"clickTracking":{"clickTrackingParams":"CNgDEMn0AhgDIhMItMS6_cfhgwMVDMtCBR1u5wb6"}},"playbackContext":{"contentPlaybackContext":{"html5Preference":"HTML5_PREF_WANTS","lactMilliseconds":"22","referer":"https://music.youtube.com/","signatureTimestamp":19732,"autoCaptionsDefaultOn":false,"mdxContext":{}}},"cpn":"Aqv99K7Z_3tj9ACA","playlistId":"RDAMVMQeQrfsqPMCs","captionParams":{},"serviceIntegrityDimensions":{"poToken":"MnQLhidwfIVPEAu-woG_SQU69mfPclEz7kVUmC1dNP8EQN7NNyVdF3KcVIuKRKrcXlwOXEQg3hc5qXSBbbQU_M7lxx9zgQMelv9iZwWfWlLyI9RoZXB1wipAYHWNzxu7rMqDwRn5M6WS4RRIeHcld9P_YZRYdg=="} + }' + :param source: + :param stop_at_level: + :return: + """ + song = Song(source_list=[ + source + ]) + + parsed_url = urlparse(source.url) + video_id = parse_qs(parsed_url.query)['v'] + if len(video_id) <= 0: + return song + browse_id = video_id[0] + + r = self.connection.post( + url=get_youtube_url(path="/youtubei/v1/player", query=f"key={self.credentials.api_key}&prettyPrint=false"), + json={ + "videoId": browse_id, + "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} + } + ) + if r is None: + return song + + data = r.json() + + dump_to_file("yt_video_overview.json", data, exit_after_dump=False) + + available_formats = data.get("streamingData", {}).get("adaptiveFormats", []) + + if len(available_formats) > 0: + source.audio_url = self._parse_adaptive_formats(available_formats) + + return song + + def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: + if source.audio_url is None: + self.fetch_song(source) + + if source.audio_url is None: + self.LOGGER.warning(f"Couldn't fetch the audio source with the innertube api, falling back to invidious.") + return super().download_song_to_target(source, target) + + print(source.audio_url) + return self.download_connection.stream_into(source.audio_url, target, description=desc, raw_url=True) diff --git a/src/music_kraken/utils/debug_utils.py b/src/music_kraken/utils/debug_utils.py index 9541c53..457081b 100644 --- a/src/music_kraken/utils/debug_utils.py +++ b/src/music_kraken/utils/debug_utils.py @@ -11,6 +11,9 @@ def dump_to_file(file_name: str, payload: str, is_json: bool = False, exit_after if is_json: payload = json.dumps(json.loads(payload), indent=4) + if isinstance(payload, dict): + payload = json.dumps(payload, indent=4) + with path.open("w") as f: f.write(payload) diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index bfe483e..d1645f8 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -2,20 +2,20 @@ import random from .config import main_settings -DEBUG = False -DEBUG_LOGGIN = DEBUG and False -DEBUG_YOUTUBE_INITIALIZING = DEBUG and False +DEBUG = True +DEBUG_LOGGING = DEBUG and True +DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False if DEBUG: print("DEBUG ACTIVE") + def get_random_message() -> str: return random.choice(main_settings['happy_messages']) -HIGHEST_ID = 2**main_settings['id_bits'] - +HIGHEST_ID = 2 ** main_settings['id_bits'] HELP_MESSAGE = """to search: > s: {query or url} From ede8ce0e8944bd83cb5ba4538d3a46dfbe5c058d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 12:01:50 +0100 Subject: [PATCH 056/104] feat: added cache directory --- requirements.txt | 5 +- .../pages/youtube_music/youtube_music.py | 3 + src/music_kraken/utils/cache.py | 0 .../utils/config/config_files/main_config.py | 8 ++- src/music_kraken/utils/hooks.py | 29 ---------- .../utils/path_manager/locations.py | 57 +++++++++++++++++++ 6 files changed, 69 insertions(+), 33 deletions(-) create mode 100644 src/music_kraken/utils/cache.py delete mode 100644 src/music_kraken/utils/hooks.py diff --git a/requirements.txt b/requirements.txt index 0d644fb..4462589 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ pycountry~=22.3.5 python-dateutil~=2.8.2 pandoc~=2.3 SQLAlchemy~=2.0.7 -setuptools~=60.2.0 +setuptools~=68.2.0 tqdm~=4.65.0 ffmpeg-python~=0.2.0 platformdirs~=3.2.0 @@ -18,3 +18,6 @@ pyffmpeg~=2.4.2.18 ffmpeg-progress-yield~=0.7.8 pathvalidate~=2.5.2 guppy3~=3.1.3 + +toml~=0.10.2 +typing_extensions~=4.7.1 \ No newline at end of file diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index a4c765a..249a3db 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -6,6 +6,9 @@ import json from dataclasses import dataclass import re +from youtube_dl.jsinterp import JSInterpreter +from youtube_dl.extractor.youtube import YoutubeIE + from ...utils.exception.config import SettingValueError from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/utils/cache.py new file mode 100644 index 0000000..e69de29 diff --git a/src/music_kraken/utils/config/config_files/main_config.py b/src/music_kraken/utils/config/config_files/main_config.py index ba6ef91..6712a4c 100644 --- a/src/music_kraken/utils/config/config_files/main_config.py +++ b/src/music_kraken/utils/config/config_files/main_config.py @@ -12,7 +12,7 @@ from ..attributes.special_attributes import ( AudioFormatAttribute, ) -config = Config([ +config = Config(( Attribute(name="hasnt_yet_started", default_value=False, description="This will be set automatically, to look if it needs to run the scripts that run on start."), Attribute(name="bitrate", default_value=125, description="Streams the audio with given bitrate [kB/s]. Can't stream with a higher Bitrate, than the audio source provides."), AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format. @@ -64,6 +64,8 @@ all the error messages are shown."""), PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."), PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()), PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."), + PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(), + description="Set the path of the cache directory."), Attribute( name="not_a_genre_regex", description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n" @@ -93,7 +95,7 @@ But anyways... Freedom of thought, so go ahead and change the messages."""), Attribute(name="id_bits", default_value=64, description="I really dunno why I even made this a setting.. Modifying this is a REALLY dumb idea."), Description("🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️\n"), -], LOCATIONS.get_config_file("main")) +), LOCATIONS.get_config_file("main")) class SettingsStructure(TypedDict): @@ -126,4 +128,4 @@ class SettingsStructure(TypedDict): log_file: Path not_a_genre_regex: List[str] ffmpeg_binary: Path - + cache_directory: Path diff --git a/src/music_kraken/utils/hooks.py b/src/music_kraken/utils/hooks.py deleted file mode 100644 index e3cd954..0000000 --- a/src/music_kraken/utils/hooks.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import List, Iterable, Dict, TypeVar, Generic, Iterator, Any, Type -from enum import Enum -from dataclasses import dataclass -from collections import defaultdict - - -class HookEventTypes(Enum): - pass - - -@dataclass -class Event: - target: Any - - -class Hooks: - def __init__(self, target) -> None: - self.target = target - - self._callbacks: Dict[HookEventTypes, List[callable]] = defaultdict(list) - - def add_event_listener(self, event_type: HookEventTypes, callback: callable): - self._callbacks[event_type].append(callback) - - def trigger_event(self, event_type: HookEventTypes, *args, **kwargs): - event: Event = Event(target=self.target) - - for callback in self._callbacks[event_type]: - callback(event, *args, **kwargs) diff --git a/src/music_kraken/utils/path_manager/locations.py b/src/music_kraken/utils/path_manager/locations.py index 66953d1..a3917bf 100644 --- a/src/music_kraken/utils/path_manager/locations.py +++ b/src/music_kraken/utils/path_manager/locations.py @@ -1,14 +1,63 @@ +import configparser from pathlib import Path import os +from os.path import expandvars +import logging +from sys import platform import tempfile +from typing import Optional + from pyffmpeg import FFmpeg + from .music_directory import get_music_directory from .config_directory import get_config_directory class Locations: + @staticmethod + def _get_env(key: str, default: Path, default_for_windows: bool = True) -> Optional[Path]: + res = os.environ.get(key.upper()) + if res is not None: + return res + + xdg_user_dirs_file = os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config", "user-dirs.dirs") + xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults") + + def get_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]: + nonlocal key + + try: + with open(xdg_file_path, 'r') as f: + data = "[XDG_USER_DIRS]\n" + f.read() + config = configparser.ConfigParser(allow_no_value=True) + config.read_string(data) + xdg_config = config['XDG_USER_DIRS'] + + return Path(expandvars(xdg_config[key.lower()].strip('"'))) + + except (FileNotFoundError, KeyError) as e: + logging.warning( + f"Missing file or No entry found for \"{key}\" in: \"{xdg_file_path}\".\n" + ) + logging.debug(str(e)) + + res = get_dir_from_xdg_file(xdg_user_dirs_file) + if res is not None: + return res + + res = get_dir_from_xdg_file(xdg_user_dirs_default_file) + if res is not None: + return res + + logging.warning(f"couldn't find a {key}, falling back to: {default}") + + if not default_for_windows and platform == "linux": + return + + return default + def __init__(self, application_name: os.PathLike = "music-kraken"): self.FILE_ENCODING: str = "utf-8" @@ -21,6 +70,14 @@ class Locations: self.CONFIG_DIRECTORY.mkdir(exist_ok=True, parents=True) self.CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf") self.LEGACY_CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf") + + self.CACHE_DIRECTORY = self._get_env("XDG_CACHE_HOME", Path(Path.home(), ".cache")) + if self.CACHE_DIRECTORY is None: + logging.warning(f"Could not find a cache dir. Falling back to the temp dir: {self.TEMP_DIRECTORY}") + self.CACHE_DIRECTORY = self.TEMP_DIRECTORY + else: + self.CACHE_DIRECTORY = Path(self.CACHE_DIRECTORY, application_name) + self.CACHE_DIRECTORY.mkdir(parents=True, exist_ok=True) self.FFMPEG_BIN = Path(FFmpeg(enable_log=False).get_ffmpeg_bin()) From b0815fdac4cc0554f90176c6350ccd73b8e8ca59 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 12:46:55 +0100 Subject: [PATCH 057/104] feat: implemented caching --- src/music_kraken/utils/cache.py | 109 ++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/utils/cache.py index e69de29..dbf2c26 100644 --- a/src/music_kraken/utils/cache.py +++ b/src/music_kraken/utils/cache.py @@ -0,0 +1,109 @@ +import json +from pathlib import Path +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import List, Optional + +from .config import main_settings + + +@dataclass +class CacheAttribute: + module: str + name: str + + created: datetime + expires: datetime + + @property + def id(self): + return f"{self.module}_{self.name}" + + @property + def is_valid(self): + return datetime.now() < self.expires + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + +class Cache: + def __init__(self): + self._dir = main_settings["cache_directory"] + self.index = Path(self._dir, "index.json") + + if not self.index.is_file(): + with self.index.open("w") as i: + i.write(json.dumps([])) + + self.cached_attributes: List[CacheAttribute] = [] + self._id_to_attribute = {} + + self._time_fields = {"created", "expires"} + with self.index.open("r") as i: + for c in json.loads(i.read()): + for key in self._time_fields: + c[key] = datetime.fromisoformat(c[key]) + + self.cached_attributes.append(**c) + + def _init_module(self, module: str) -> Path: + """ + :param module: + :return: the module path + """ + r = Path(self._dir, module) + r.mkdir(exist_ok=True) + return r + + def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool: + existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id) + if existing_attribute is not None: + # the attribute exists + if existing_attribute == cached_attribute: + return True + + if existing_attribute.is_valid: + return False + + existing_attribute.__dict__ = cached_attribute.__dict__ + cached_attribute = existing_attribute + else: + self.cached_attributes.append(cached_attribute) + self._id_to_attribute[cached_attribute.id] = cached_attribute + + if write: + _json = [] + for c in self.cached_attributes: + d = c.__dict__ + for key in self._time_fields: + d[key] = d[key].isoformat() + + _json.append(d) + + with self.index.open("w") as f: + f.write(json.dumps(_json, indent=4)) + + return True + + def set(self, content: bytes, module: str, name: str, expires_in: int = 10): + """ + :param content: + :param module: + :param name: + :param expires_in: the unit is days + :return: + """ + + module_path = self._init_module(module) + + cache_attribute = CacheAttribute( + module=module, + name=name, + created=datetime.now(), + expires=datetime.now() + timedelta(days=expires_in), + ) + self._write_attribute(cache_attribute) + + with Path(module_path, name).open("wb") as content_file: + content_file.write(content) From 66f4ad3df5a2627855bf0704704e2eb6ee64d9b6 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 12:54:02 +0100 Subject: [PATCH 058/104] feat: implemented get function --- src/music_kraken/utils/cache.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/utils/cache.py index dbf2c26..b24b68f 100644 --- a/src/music_kraken/utils/cache.py +++ b/src/music_kraken/utils/cache.py @@ -3,6 +3,7 @@ from pathlib import Path from dataclasses import dataclass from datetime import datetime, timedelta from typing import List, Optional +from functools import lru_cache from .config import main_settings @@ -45,8 +46,11 @@ class Cache: for key in self._time_fields: c[key] = datetime.fromisoformat(c[key]) - self.cached_attributes.append(**c) + ca = CacheAttribute(**c) + self.cached_attributes.append(ca) + self._id_to_attribute[ca.id] = ca + @lru_cache() def _init_module(self, module: str) -> Path: """ :param module: @@ -67,7 +71,6 @@ class Cache: return False existing_attribute.__dict__ = cached_attribute.__dict__ - cached_attribute = existing_attribute else: self.cached_attributes.append(cached_attribute) self._id_to_attribute[cached_attribute.id] = cached_attribute @@ -107,3 +110,17 @@ class Cache: with Path(module_path, name).open("wb") as content_file: content_file.write(content) + + def get(self, module: str, name: str) -> Optional[bytes]: + path = Path(self._dir, module, name) + + if not path.is_file(): + return None + + # check if it is outdated + existing_attribute: CacheAttribute = self._id_to_attribute[f"{module}_{name}"] + if not existing_attribute.is_valid: + return + + with path.open("rb") as f: + return f.read() From 031f274d6911ca73f7b1aaba44ac6e8ff0ce52d9 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Jan 2024 15:10:50 +0100 Subject: [PATCH 059/104] feat: implemented caching in the request method --- .../{utils => connection}/cache.py | 26 ++++-- src/music_kraken/connection/connection.py | 83 ++++++++++++------- src/music_kraken/pages/bandcamp.py | 1 - .../pages/youtube_music/youtube_music.py | 7 +- src/music_kraken/utils/shared.py | 2 +- 5 files changed, 77 insertions(+), 42 deletions(-) rename src/music_kraken/{utils => connection}/cache.py (82%) diff --git a/src/music_kraken/utils/cache.py b/src/music_kraken/connection/cache.py similarity index 82% rename from src/music_kraken/utils/cache.py rename to src/music_kraken/connection/cache.py index b24b68f..d393be5 100644 --- a/src/music_kraken/utils/cache.py +++ b/src/music_kraken/connection/cache.py @@ -4,8 +4,9 @@ from dataclasses import dataclass from datetime import datetime, timedelta from typing import List, Optional from functools import lru_cache +import logging -from .config import main_settings +from ..utils.config import main_settings @dataclass @@ -29,7 +30,10 @@ class CacheAttribute: class Cache: - def __init__(self): + def __init__(self, module: str, logger: logging.Logger): + self.module = module + self.logger: logging.Logger = logger + self._dir = main_settings["cache_directory"] self.index = Path(self._dir, "index.json") @@ -89,7 +93,7 @@ class Cache: return True - def set(self, content: bytes, module: str, name: str, expires_in: int = 10): + def set(self, content: bytes, name: str, expires_in: float = 10): """ :param content: :param module: @@ -97,28 +101,32 @@ class Cache: :param expires_in: the unit is days :return: """ + if name == "": + return - module_path = self._init_module(module) + module_path = self._init_module(self.module) cache_attribute = CacheAttribute( - module=module, + module=self.module, name=name, created=datetime.now(), expires=datetime.now() + timedelta(days=expires_in), ) self._write_attribute(cache_attribute) - with Path(module_path, name).open("wb") as content_file: + cache_path = Path(module_path, name) + with cache_path.open("wb") as content_file: + self.logger.debug(f"writing cache to {cache_path}") content_file.write(content) - def get(self, module: str, name: str) -> Optional[bytes]: - path = Path(self._dir, module, name) + def get(self, name: str) -> Optional[bytes]: + path = Path(self._dir, self.module, name) if not path.is_file(): return None # check if it is outdated - existing_attribute: CacheAttribute = self._id_to_attribute[f"{module}_{name}"] + existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"] if not existing_attribute.is_valid: return diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index 3f294e9..7949545 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -5,9 +5,12 @@ import logging import threading import requests +import responses +from responses import matchers from tqdm import tqdm from .rotating import RotatingProxy +from .cache import Cache from ..utils.config import main_settings from ..utils.support_classes.download_result import DownloadResult from ..objects import Target @@ -25,13 +28,18 @@ class Connection: accepted_response_codes: Set[int] = None, semantic_not_found: bool = True, sleep_after_404: float = 0.0, - heartbeat_interval = 0, + heartbeat_interval=0, + module: str = "general", + cache_expiring_duration: float = 10 ): if proxies is None: proxies = main_settings["proxies"] if header_values is None: header_values = dict() + self.cache: Cache = Cache(module=module, logger=logger) + self.cache_expiring_duration = cache_expiring_duration + self.HEADER_VALUES = header_values self.LOGGER = logger @@ -55,23 +63,24 @@ class Connection: @property def user_agent(self) -> str: - return self.session.headers.get("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36") - + return self.session.headers.get("user-agent", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36") def start_heartbeat(self): if self.heartbeat_interval <= 0: self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.") - self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval, ), daemon=True) + self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,), + daemon=True) self.heartbeat_thread.start() def heartbeat_failed(self): self.LOGGER.warning(f"I just died... (The heartbeat failed)") - def heartbeat(self): # Your code to send heartbeat requests goes here - print("the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()") + print( + "the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()") def _heartbeat_loop(self, interval: float): def heartbeat_wrapper(): @@ -85,8 +94,6 @@ class Connection: heartbeat_wrapper() time.sleep(interval) - - def base_url(self, url: ParseResult = None): if url is None: url = self.HOST @@ -119,9 +126,12 @@ class Connection: return headers - def _request( + def save(self, r: requests.Response, name: str, **kwargs): + self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration)) + + def request( self, - request: Callable, + method: str, try_count: int, accepted_response_codes: set, url: str, @@ -131,8 +141,20 @@ class Connection: raw_url: bool = False, sleep_after_404: float = None, is_heartbeat: bool = False, + name: str = "", **kwargs ) -> Optional[requests.Response]: + if name != "": + cached = self.cache.get(name) + + with responses.RequestsMock() as resp: + resp.add( + method=method, + url=url, + body=cached, + ) + return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) + if sleep_after_404 is None: sleep_after_404 = self.sleep_after_404 if try_count >= self.TRIES: @@ -158,9 +180,10 @@ class Connection: while self.session_is_occupied and not is_heartbeat: pass - r: requests.Response = request(request_url, timeout=timeout, headers=headers, **kwargs) + r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) if r.status_code in accepted_response_codes: + self.save(r, name, **kwargs) return r if self.SEMANTIC_NOT_FOUND and r.status_code == 404: @@ -187,15 +210,16 @@ class Connection: if self.heartbeat_interval > 0 and self.heartbeat_thread is None: self.start_heartbeat() - return self._request( - request=request, - try_count=try_count+1, + return self.request( + method=method, + try_count=try_count + 1, accepted_response_codes=accepted_response_codes, url=url, timeout=timeout, headers=headers, sleep_after_404=sleep_after_404, is_heartbeat=is_heartbeat, + name=name, **kwargs ) @@ -213,8 +237,8 @@ class Connection: if accepted_response_codes is None: accepted_response_codes = self.ACCEPTED_RESPONSE_CODES - r = self._request( - request=self.session.get, + r = self.request( + method="GET", try_count=0, accepted_response_codes=accepted_response_codes, url=url, @@ -241,8 +265,8 @@ class Connection: raw_url: bool = False, **kwargs ) -> Optional[requests.Response]: - r = self._request( - request=self.session.post, + r = self.request( + method="POST", try_count=0, accepted_response_codes=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES, url=url, @@ -282,9 +306,9 @@ class Connection: if accepted_response_codes is None: accepted_response_codes = self.ACCEPTED_RESPONSE_CODES - - r = self._request( - request=self.session.get, + + r = self.request( + method="GET", try_count=0, accepted_response_codes=accepted_response_codes, url=url, @@ -310,8 +334,9 @@ class Connection: https://en.wikipedia.org/wiki/Kilobyte > The internationally recommended unit symbol for the kilobyte is kB. """ - - with tqdm(total=total_size-target.size, unit='B', unit_scale=True, unit_divisor=1024, desc=description) as t: + + with tqdm(total=total_size - target.size, unit='B', unit_scale=True, unit_divisor=1024, + desc=description) as t: try: for chunk in r.iter_content(chunk_size=chunk_size): size = f.write(chunk) @@ -321,7 +346,8 @@ class Connection: except requests.exceptions.ConnectionError: if try_count >= self.TRIES: self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.") - return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunksize might help.") + return DownloadResult( + error_message=f"Stream timed out from {url}, reducing the chunksize might help.") self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})") retry = True @@ -329,15 +355,14 @@ class Connection: if total_size > progress: retry = True - if retry: self.LOGGER.warning(f"Retrying stream...") accepted_response_codes.add(206) return self.stream_into( - url = url, - target = target, - description = description, - try_count=try_count+1, + url=url, + target=target, + description=description, + try_count=try_count + 1, progress=progress, accepted_response_codes=accepted_response_codes, timeout=timeout, diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 4a0d5da..1f6f050 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -356,6 +356,5 @@ class Bandcamp(Page): def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: if source.audio_url is None: - print(source) return DownloadResult(error_message="Couldn't find download link.") return self.connection.stream_into(url=source.audio_url, target=target, description=desc) diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 249a3db..01c7d09 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -59,7 +59,8 @@ class YoutubeMusicConnection(Connection): heartbeat_interval=113.25, header_values={ "Accept-Language": accept_language - } + }, + module="youtube_music", ) # cookie consent for youtube @@ -161,8 +162,10 @@ class YoutubeMusic(SuperYouTube): # save cookies in settings youtube_settings["youtube_music_consent_cookies"] = cookie_dict + else: + self.connection.save(r, "index.html") - r = self.connection.get("https://music.youtube.com/") + r = self.connection.get("https://music.youtube.com/", name="index.html") if r is None: return diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index d1645f8..925a3a6 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -4,7 +4,7 @@ from .config import main_settings DEBUG = True DEBUG_LOGGING = DEBUG and True -DEBUG_YOUTUBE_INITIALIZING = DEBUG and False +DEBUG_YOUTUBE_INITIALIZING = DEBUG and True DEBUG_PAGES = DEBUG and False if DEBUG: From 2d4ba50b57f25040f67a552f06b447052dee1263 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 19 Jan 2024 18:45:12 +0100 Subject: [PATCH 060/104] feat: clean and clear methods for the cache --- .idea/vcs.xml | 2 - requirements.txt | 5 +- src/music_kraken/connection/cache.py | 79 ++++++++++++++++++++--- src/music_kraken/connection/connection.py | 13 ++-- src/music_kraken/pages/bandcamp.py | 3 +- 5 files changed, 80 insertions(+), 22 deletions(-) diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 0823e82..35eb1dd 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,7 +2,5 @@ - - \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4462589..31605f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -requests~=2.28.1 +requests~=2.31.0 mutagen~=1.46.0 musicbrainzngs~=0.7.1 jellyfish~=0.9.0 @@ -20,4 +20,5 @@ pathvalidate~=2.5.2 guppy3~=3.1.3 toml~=0.10.2 -typing_extensions~=4.7.1 \ No newline at end of file +typing_extensions~=4.7.1 +responses~=0.24.1 \ No newline at end of file diff --git a/src/music_kraken/connection/cache.py b/src/music_kraken/connection/cache.py index d393be5..1f6b780 100644 --- a/src/music_kraken/connection/cache.py +++ b/src/music_kraken/connection/cache.py @@ -64,6 +64,18 @@ class Cache: r.mkdir(exist_ok=True) return r + def _write_index(self, indent: int = 4): + _json = [] + for c in self.cached_attributes: + d = c.__dict__ + for key in self._time_fields: + d[key] = d[key].isoformat() + + _json.append(d) + + with self.index.open("w") as f: + f.write(json.dumps(_json, indent=indent)) + def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool: existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id) if existing_attribute is not None: @@ -80,16 +92,7 @@ class Cache: self._id_to_attribute[cached_attribute.id] = cached_attribute if write: - _json = [] - for c in self.cached_attributes: - d = c.__dict__ - for key in self._time_fields: - d[key] = d[key].isoformat() - - _json.append(d) - - with self.index.open("w") as f: - f.write(json.dumps(_json, indent=4)) + self._write_index() return True @@ -132,3 +135,59 @@ class Cache: with path.open("rb") as f: return f.read() + + def clean(self): + keep = set() + + for ca in self.cached_attributes.copy(): + file = Path(self._dir, ca.module, ca.name) + + if not ca.is_valid: + self.logger.debug(f"deleting cache {ca.id}") + file.unlink() + self.cached_attributes.remove(ca) + del self._id_to_attribute[ca.id] + + else: + keep.add(file) + + # iterate through every module (folder) + for module_path in self._dir.iterdir(): + if not module_path.is_dir(): + continue + + # delete all files not in keep + for path in module_path.iterdir(): + if path not in keep: + self.logger.info(f"Deleting cache {path}") + path.unlink() + + # delete all empty directories + for path in module_path.iterdir(): + if path.is_dir() and not list(path.iterdir()): + self.logger.debug(f"Deleting cache directory {path}") + path.rmdir() + + self._write_index() + + def clear(self): + """ + delete every file in the cache directory + :return: + """ + + for path in self._dir.iterdir(): + if path.is_dir(): + for file in path.iterdir(): + file.unlink() + path.rmdir() + else: + path.unlink() + + self.cached_attributes.clear() + self._id_to_attribute.clear() + + self._write_index() + + def __repr__(self): + return f"" diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index 7949545..e32ad8a 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -1,19 +1,18 @@ -import time -from typing import List, Dict, Callable, Optional, Set -from urllib.parse import urlparse, urlunsplit, ParseResult import logging - import threading +import time +from typing import List, Dict, Optional, Set +from urllib.parse import urlparse, urlunsplit, ParseResult + import requests import responses -from responses import matchers from tqdm import tqdm -from .rotating import RotatingProxy from .cache import Cache +from .rotating import RotatingProxy +from ..objects import Target from ..utils.config import main_settings from ..utils.support_classes.download_result import DownloadResult -from ..objects import Target class Connection: diff --git a/src/music_kraken/pages/bandcamp.py b/src/music_kraken/pages/bandcamp.py index 1f6f050..6f65d6c 100644 --- a/src/music_kraken/pages/bandcamp.py +++ b/src/music_kraken/pages/bandcamp.py @@ -53,7 +53,8 @@ class Bandcamp(Page): def __init__(self, *args, **kwargs): self.connection: Connection = Connection( host="https://bandcamp.com/", - logger=self.LOGGER + logger=self.LOGGER, + module="bandcamp", ) super().__init__(*args, **kwargs) From fba9c31c505210d22204a217382573459cb6c8b8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 22 Jan 2024 09:36:14 +0100 Subject: [PATCH 061/104] feat: clean and clear methods for the cache --- src/music_kraken/__init__.py | 3 ++- src/music_kraken/__main__.py | 18 ++++++++++++++++++ src/music_kraken/cli/options/cache.py | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 src/music_kraken/cli/options/cache.py diff --git a/src/music_kraken/__init__.py b/src/music_kraken/__init__.py index b3ee566..57106fc 100644 --- a/src/music_kraken/__init__.py +++ b/src/music_kraken/__init__.py @@ -4,12 +4,13 @@ import sys from .utils.shared import DEBUG, DEBUG_LOGGING from .utils.config import logging_settings, main_settings, read_config + read_config() from . import cli - if DEBUG: import sys + sys.setrecursionlimit(100) diff --git a/src/music_kraken/__main__.py b/src/music_kraken/__main__.py index 0425d5d..9da441d 100644 --- a/src/music_kraken/__main__.py +++ b/src/music_kraken/__main__.py @@ -79,6 +79,18 @@ def cli(): action="store_true" ) + parser.add_argument( + "--clear-cache", + help="Deletes the cache.", + action="store_true" + ) + + parser.add_argument( + "--clean-cache", + help="Deletes the outdated cache. (all expired cached files, and not indexed files)", + action="store_true" + ) + arguments = parser.parse_args() if arguments.verbose or arguments.test: @@ -112,6 +124,12 @@ def cli(): if arguments.frontend: cli.set_frontend(silent=False) + if arguments.clear_cache: + cli.clear_cache() + + if arguments.clean_cache: + cli.clean_cache() + # getting the genre genre: str = arguments.genre if arguments.test: diff --git a/src/music_kraken/cli/options/cache.py b/src/music_kraken/cli/options/cache.py new file mode 100644 index 0000000..103696b --- /dev/null +++ b/src/music_kraken/cli/options/cache.py @@ -0,0 +1,21 @@ +from logging import getLogger + +from ...connection.cache import Cache + + +def clear_cache(): + """ + Deletes the cache. + :return: + """ + + Cache("main", getLogger("cache")).clear() + + +def clean_cache(): + """ + Deletes the outdated cache. (all expired cached files, and not indexed files) + :return: + """ + + Cache("main", getLogger("cache")).clean() From 311faabeab8ed1bf886db784ba300bfd81816d59 Mon Sep 17 00:00:00 2001 From: Hellow <74311245+HeIIow2@users.noreply.github.com> Date: Mon, 22 Jan 2024 18:36:16 +0100 Subject: [PATCH 062/104] feat: implemented decryption --- src/music_kraken/pages/abstract.py | 28 + .../pages/youtube_music/youtube_music.py | 177 +- .../pages/youtube_music/yt_utils/__init__.py | 0 .../pages/youtube_music/yt_utils/compat.py | 3308 +++++++++ .../pages/youtube_music/yt_utils/jsinterp.py | 1054 +++ .../pages/youtube_music/yt_utils/socks.py | 273 + .../pages/youtube_music/yt_utils/utils.py | 6513 +++++++++++++++++ src/music_kraken/utils/config/config.py | 2 +- .../config/config_files/youtube_config.py | 12 +- 9 files changed, 11317 insertions(+), 50 deletions(-) create mode 100644 src/music_kraken/pages/youtube_music/yt_utils/__init__.py create mode 100644 src/music_kraken/pages/youtube_music/yt_utils/compat.py create mode 100644 src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py create mode 100644 src/music_kraken/pages/youtube_music/yt_utils/socks.py create mode 100644 src/music_kraken/pages/youtube_music/yt_utils/utils.py diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index c253fc0..0e41927 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -1,5 +1,6 @@ import logging import random +import re from copy import copy from pathlib import Path from typing import Optional, Union, Type, Dict, Set, List, Tuple @@ -145,6 +146,33 @@ class Page: # set this to true, if all song details can also be fetched by fetching album details NO_ADDITIONAL_DATA_FROM_SONG = False + def _search_regex(self, pattern, string, default=None, fatal=True, flags=0, group=None): + """ + Perform a regex search on the given string, using a single or a list of + patterns returning the first matching group. + In case of failure return a default value or raise a WARNING or a + RegexNotFoundError, depending on fatal, specifying the field name. + """ + + if isinstance(pattern, str): + mobj = re.search(pattern, string, flags) + else: + for p in pattern: + mobj = re.search(p, string, flags) + if mobj: + break + + if mobj: + if group is None: + # return the first matching group + return next(g for g in mobj.groups() if g is not None) + elif isinstance(group, (list, tuple)): + return tuple(mobj.group(g) for g in group) + else: + return mobj.group(group) + + return default + def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return None diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index a4c765a..f157da5 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -5,12 +5,16 @@ import random import json from dataclasses import dataclass import re +from functools import lru_cache from ...utils.exception.config import SettingValueError from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.functions import get_current_millis +from .yt_utils.jsinterp import JSInterpreter + + if DEBUG: from ...utils.debug_utils import dump_to_file @@ -94,6 +98,32 @@ class YouTubeMusicCredentials: # the context in requests context: dict + player_url: str + + + + @property + def player_id(self): + @lru_cache(128) + def _extract_player_info(player_url): + _PLAYER_INFO_RE = ( + r'/s/player/(?P[a-zA-Z0-9_-]{8,})/player', + r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', + r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$', + ) + + for player_re in _PLAYER_INFO_RE: + id_m = re.search(player_re, player_url) + if id_m: + break + else: + return + + return id_m.group('id') + + return _extract_player_info(self.player_url) + + class YoutubeMusic(SuperYouTube): # CHANGE @@ -106,7 +136,8 @@ class YoutubeMusic(SuperYouTube): self.credentials: YouTubeMusicCredentials = YouTubeMusicCredentials( api_key=youtube_settings["youtube_music_api_key"], ctoken="", - context=youtube_settings["youtube_music_innertube_context"] + context=youtube_settings["youtube_music_innertube_context"], + player_url=youtube_settings["player_url"], ) self.start_millis = get_current_millis() @@ -114,7 +145,7 @@ class YoutubeMusic(SuperYouTube): if self.credentials.api_key == "" or DEBUG_YOUTUBE_INITIALIZING: self._fetch_from_main_page() - super().__init__(*args, **kwargs) + SuperYouTube.__init__(self,*args, **kwargs) def _fetch_from_main_page(self): """ @@ -212,6 +243,41 @@ class YoutubeMusic(SuperYouTube): if not found_context: self.LOGGER.warning(f"Couldn't find a context for {type(self).__name__}.") + # player url + """ + Thanks to youtube-dl <33 + """ + player_pattern = [ + r'(?<="jsUrl":")(.*?)(?=")', + r'(?<="PLAYER_JS_URL":")(.*?)(?=")' + ] + found_player_url = False + + for pattern in player_pattern: + for player_string in re.findall(pattern, content, re.M): + try: + youtube_settings["player_url"] = "https://music.youtube.com" + player_string + found_player_url = True + except json.decoder.JSONDecodeError: + continue + + self.credentials.player_url = youtube_settings["player_url"] + break + + if found_player_url: + break + + if not found_player_url: + self.LOGGER.warning(f"Couldn't find an url for the video player.") + + # ytcfg + youtube_settings["ytcfg"] = json.loads(self._search_regex( + r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', + content, + default='{}' + )) or {} + + def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return super().get_source_type(source) @@ -359,66 +425,85 @@ class YoutubeMusic(SuperYouTube): return album - @staticmethod - def _parse_adaptive_formats(data: list) -> str: - best_url = None - audio_format = None + @lru_cache() + def _extract_signature_function(self, player_url): + r = self.connection.get(player_url) + if r is None: + return lambda x: None + + code = r.text + + funcname = self._search_regex(( + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', + r'\bm=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', + r'\bc&&\(c=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', + r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', + r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + # Obsolete patterns + r'("|\')signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(' + ), + code, group='sig') + + jsi = JSInterpreter(code) + initial_function = jsi.extract_function(funcname) + return lambda s: initial_function([s]) + + def _decrypt_signature(self, s): + signing_func = self._extract_signature_function(player_url=youtube_settings["player_url"]) + print(signing_func) + return signing_func(s) + + def _parse_adaptive_formats(self, data: list, video_id) -> dict: + best_format = None best_bitrate = 0 - def decode_url(_format: dict): - """ - s=0%3Dw9hlenzIQkU5qD55flWqkO-wn8G6CJxI%3Dn9_OSUAUh3AiACkI5TNetQixuV6PJAxH-NFqGWGQCivQMkqprwyv8z2NAhIQRwsSdQfJAfJA - sp=sig - url=https://rr1---sn-cxaf0x-nugl.googlevideo.com/videoplayback%3Fexpire%3D1705426390%26ei%3DdmmmZZK1OYf41gL26KGwDg%26ip%3D129.143.170.58%26id%3Do-APgHuP61UnxvMxskECWmga1BRWYDFv91DMB7E6R_b_CG%26itag%3D18%26source%3Dyoutube%26requiressl%3Dyes%26xpc%3DEgVo2aDSNQ%253D%253D%26mh%3D_V%26mm%3D31%252C29%26mn%3Dsn-cxaf0x-nugl%252Csn-4g5edndd%26ms%3Dau%252Crdu%26mv%3Dm%26mvi%3D1%26pl%3D16%26pcm2%3Dyes%26gcr%3Dde%26initcwndbps%3D1737500%26spc%3DUWF9f6gk6WFPUFJZkjGIeb9q8NjPmmcsXzCp%26vprv%3D1%26svpuc%3D1%26mime%3Dvideo%252Fmp4%26ns%3DJnQgwQe-JazkZpURVB2rmlUQ%26cnr%3D14%26ratebypass%3Dyes%26dur%3D170.643%26lmt%3D1697280536047282%26mt%3D1705404526%26fvip%3D4%26fexp%3D24007246%26c%3DWEB_REMIX%26txp%3D2318224%26n%3DEq7jcRmeC89oLlbr%26sparams%3Dexpire%252Cei%252Cip%252Cid%252Citag%252Csource%252Crequiressl%252Cxpc%252Cpcm2%252Cgcr%252Cspc%252Cvprv%252Csvpuc%252Cmime%252Cns%252Ccnr%252Cratebypass%252Cdur%252Clmt%26lsparams%3Dmh%252Cmm%252Cmn%252Cms%252Cmv%252Cmvi%252Cpl%252Cinitcwndbps%26lsig%3DAAO5W4owRQIhAOJSldsMn2QA8b-rMr8mJoPr-9-8piIMe6J-800YB0DiAiBKLBHGfr-a6d87K0-WbsJzVf9f2DhYgv0vcntWvHmvGA%253D%253D" - :param _format: - :return: - """ - sc = parse_qs(_format["signatureCipher"]) + def parse_format(fmt: dict): + fmt_url = fmt.get('url') - fmt_url = sc["url"][0] - encrypted_sig = sc['s'][0] + if not fmt_url: + sc = parse_qs(possible_format["signatureCipher"]) + print(sc["s"][0]) + signature = self._decrypt_signature(sc['s'][0]) + print(signature) - if not (sc and fmt_url and encrypted_sig): - return + sp = sc.get("sp", ["sig"])[0] + fmt_url = sc.get("url", [None])[0] - """ - if not player_url: - player_url = self._extract_player_url(webpage) - if not player_url: - return - - signature = self._decrypt_signature(sc['s'][0], video_id, player_url) - sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' - fmt_url += '&' + sp + '=' + signature - """ + fmt_url += '&' + sp + '=' + signature - for possible_format in data: - _url_list = parse_qs(possible_format["signatureCipher"])["url"] - if len(_url_list) <= 0: + return { + "bitrate": fmt.get("bitrate"), + "url": fmt_url + } + + for possible_format in sorted(data, key=lambda x: x.get("bitrate", 0)): + if best_bitrate <= 0: + # no format has been found yet + best_format = possible_format + + if possible_format.get('targetDurationSec') or possible_format.get('drmFamilies'): continue - url = _url_list[0] - if best_url is None: - best_url = url - mime_type: str = possible_format["mimeType"] if not mime_type.startswith("audio"): continue bitrate = int(possible_format.get("bitrate", 0)) - if bitrate >= main_settings["bitrate"]: - best_bitrate = bitrate - audio_format = possible_format - best_url = url - break - if bitrate > best_bitrate: best_bitrate = bitrate - audio_format = possible_format - best_url = url + best_format = possible_format - return best_url + if bitrate >= main_settings["bitrate"]: + break + + return parse_format(best_format) def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: """ @@ -471,7 +556,7 @@ class YoutubeMusic(SuperYouTube): available_formats = data.get("streamingData", {}).get("adaptiveFormats", []) if len(available_formats) > 0: - source.audio_url = self._parse_adaptive_formats(available_formats) + source.audio_url = self._parse_adaptive_formats(available_formats, video_id=browse_id).get("url") return song diff --git a/src/music_kraken/pages/youtube_music/yt_utils/__init__.py b/src/music_kraken/pages/youtube_music/yt_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/music_kraken/pages/youtube_music/yt_utils/compat.py b/src/music_kraken/pages/youtube_music/yt_utils/compat.py new file mode 100644 index 0000000..3c526a7 --- /dev/null +++ b/src/music_kraken/pages/youtube_music/yt_utils/compat.py @@ -0,0 +1,3308 @@ +# coding: utf-8 +from __future__ import unicode_literals +from __future__ import division + +import base64 +import binascii +import collections +import ctypes +import datetime +import email +import getpass +import io +import itertools +import optparse +import os +import platform +import re +import shlex +import shutil +import socket +import struct +import subprocess +import sys +import types +import xml.etree.ElementTree + +# naming convention +# 'compat_' + Python3_name.replace('.', '_') +# other aliases exist for convenience and/or legacy + +# deal with critical unicode/str things first +try: + # Python 2 + compat_str, compat_basestring, compat_chr = ( + unicode, basestring, unichr + ) +except NameError: + compat_str, compat_basestring, compat_chr = ( + str, (str, bytes), chr + ) + +# casefold +try: + compat_str.casefold + compat_casefold = lambda s: s.casefold() +except AttributeError: + from .casefold import casefold as compat_casefold + +try: + import collections.abc as compat_collections_abc +except ImportError: + import collections as compat_collections_abc + +try: + import urllib.request as compat_urllib_request +except ImportError: # Python 2 + import urllib2 as compat_urllib_request + +# Also fix up lack of method arg in old Pythons +try: + _req = compat_urllib_request.Request + _req('http://127.0.0.1', method='GET') +except TypeError: + class _request(object): + def __new__(cls, url, *args, **kwargs): + method = kwargs.pop('method', None) + r = _req(url, *args, **kwargs) + if method: + r.get_method = types.MethodType(lambda _: method, r) + return r + + compat_urllib_request.Request = _request + + +try: + import urllib.error as compat_urllib_error +except ImportError: # Python 2 + import urllib2 as compat_urllib_error + +try: + import urllib.parse as compat_urllib_parse +except ImportError: # Python 2 + import urllib as compat_urllib_parse + import urlparse as _urlparse + for a in dir(_urlparse): + if not hasattr(compat_urllib_parse, a): + setattr(compat_urllib_parse, a, getattr(_urlparse, a)) + del _urlparse + +# unfavoured aliases +compat_urlparse = compat_urllib_parse +compat_urllib_parse_urlparse = compat_urllib_parse.urlparse + +try: + import urllib.response as compat_urllib_response +except ImportError: # Python 2 + import urllib as compat_urllib_response + +try: + compat_urllib_response.addinfourl.status +except AttributeError: + # .getcode() is deprecated in Py 3. + compat_urllib_response.addinfourl.status = property(lambda self: self.getcode()) + +try: + import http.cookiejar as compat_cookiejar +except ImportError: # Python 2 + import cookielib as compat_cookiejar +compat_http_cookiejar = compat_cookiejar + +if sys.version_info[0] == 2: + class compat_cookiejar_Cookie(compat_cookiejar.Cookie): + def __init__(self, version, name, value, *args, **kwargs): + if isinstance(name, compat_str): + name = name.encode() + if isinstance(value, compat_str): + value = value.encode() + compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs) +else: + compat_cookiejar_Cookie = compat_cookiejar.Cookie +compat_http_cookiejar_Cookie = compat_cookiejar_Cookie + +try: + import http.cookies as compat_cookies +except ImportError: # Python 2 + import Cookie as compat_cookies +compat_http_cookies = compat_cookies + +if sys.version_info[0] == 2 or sys.version_info < (3, 3): + class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie): + def load(self, rawdata): + must_have_value = 0 + if not isinstance(rawdata, dict): + if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'): + # attribute must have value for parsing + rawdata, must_have_value = re.subn( + r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata) + if sys.version_info[0] == 2: + if isinstance(rawdata, compat_str): + rawdata = str(rawdata) + super(compat_cookies_SimpleCookie, self).load(rawdata) + if must_have_value > 0: + for morsel in self.values(): + for attr in ('secure', 'httponly'): + if morsel.get(attr): + morsel[attr] = True +else: + compat_cookies_SimpleCookie = compat_cookies.SimpleCookie +compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie + +try: + import html.entities as compat_html_entities +except ImportError: # Python 2 + import htmlentitydefs as compat_html_entities + +try: # Python >= 3.3 + compat_html_entities_html5 = compat_html_entities.html5 +except AttributeError: + # Copied from CPython 3.5.1 html/entities.py + compat_html_entities_html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', + } + +try: + import http.client as compat_http_client +except ImportError: # Python 2 + import httplib as compat_http_client +try: + compat_http_client.HTTPResponse.getcode +except AttributeError: + # Py < 3.1 + compat_http_client.HTTPResponse.getcode = lambda self: self.status + +try: + from urllib.error import HTTPError as compat_HTTPError +except ImportError: # Python 2 + from urllib2 import HTTPError as compat_HTTPError +compat_urllib_HTTPError = compat_HTTPError + +try: + from urllib.request import urlretrieve as compat_urlretrieve +except ImportError: # Python 2 + from urllib import urlretrieve as compat_urlretrieve +compat_urllib_request_urlretrieve = compat_urlretrieve + +try: + from html.parser import HTMLParser as compat_HTMLParser +except ImportError: # Python 2 + from HTMLParser import HTMLParser as compat_HTMLParser +compat_html_parser_HTMLParser = compat_HTMLParser + +try: # Python 2 + from HTMLParser import HTMLParseError as compat_HTMLParseError +except ImportError: # Python <3.4 + try: + from html.parser import HTMLParseError as compat_HTMLParseError + except ImportError: # Python >3.4 + + # HTMLParseError has been deprecated in Python 3.3 and removed in + # Python 3.5. Introducing dummy exception for Python >3.5 for compatible + # and uniform cross-version exception handling + class compat_HTMLParseError(Exception): + pass +compat_html_parser_HTMLParseError = compat_HTMLParseError + +try: + from subprocess import DEVNULL + compat_subprocess_get_DEVNULL = lambda: DEVNULL +except ImportError: + compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') + +try: + import http.server as compat_http_server +except ImportError: + import BaseHTTPServer as compat_http_server + +try: + from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes + from urllib.parse import unquote as compat_urllib_parse_unquote + from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus + from urllib.parse import urlencode as compat_urllib_parse_urlencode + from urllib.parse import parse_qs as compat_parse_qs +except ImportError: # Python 2 + _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') + else re.compile(r'([\x00-\x7f]+)')) + + # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus + # implementations from cpython 3.4.3's stdlib. Python 2's version + # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244) + + def compat_urllib_parse_unquote_to_bytes(string): + """unquote_to_bytes('abc%20def') -> b'abc def'.""" + # Note: strings are encoded as UTF-8. This is only an issue if it contains + # unescaped non-ASCII characters, which URIs should not. + if not string: + # Is it a string-like object? + string.split + return b'' + if isinstance(string, compat_str): + string = string.encode('utf-8') + bits = string.split(b'%') + if len(bits) == 1: + return string + res = [bits[0]] + append = res.append + for item in bits[1:]: + try: + append(compat_urllib_parse._hextochr[item[:2]]) + append(item[2:]) + except KeyError: + append(b'%') + append(item) + return b''.join(res) + + def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if '%' not in string: + string.split + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'replace' + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return ''.join(res) + + def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return compat_urllib_parse_unquote(string, encoding, errors) + + # Python 2 will choke in urlencode on mixture of byte and unicode strings. + # Possible solutions are to either port it from python 3 with all + # the friends or manually ensure input query contains only byte strings. + # We will stick with latter thus recursively encoding the whole query. + def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): + def encode_elem(e): + if isinstance(e, dict): + e = encode_dict(e) + elif isinstance(e, (list, tuple,)): + list_e = encode_list(e) + e = tuple(list_e) if isinstance(e, tuple) else list_e + elif isinstance(e, compat_str): + e = e.encode(encoding) + return e + + def encode_dict(d): + return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) + + def encode_list(l): + return [encode_elem(e) for e in l] + + return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq) + + # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. + # Python 2's version is apparently totally broken + def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + qs, _coerce_result = qs, compat_str + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError('bad query field: %r' % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = nv[0].replace('+', ' ') + name = compat_urllib_parse_unquote( + name, encoding=encoding, errors=errors) + name = _coerce_result(name) + value = nv[1].replace('+', ' ') + value = compat_urllib_parse_unquote( + value, encoding=encoding, errors=errors) + value = _coerce_result(value) + r.append((name, value)) + return r + + def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + parsed_result = {} + pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, + encoding=encoding, errors=errors) + for name, value in pairs: + if name in parsed_result: + parsed_result[name].append(value) + else: + parsed_result[name] = [value] + return parsed_result + + setattr(compat_urllib_parse, '_urlencode', + getattr(compat_urllib_parse, 'urlencode')) + for name, fix in ( + ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes), + ('parse_unquote', compat_urllib_parse_unquote), + ('unquote_plus', compat_urllib_parse_unquote_plus), + ('urlencode', compat_urllib_parse_urlencode), + ('parse_qs', compat_parse_qs)): + setattr(compat_urllib_parse, name, fix) + +compat_urllib_parse_parse_qs = compat_parse_qs + +try: + from urllib.request import DataHandler as compat_urllib_request_DataHandler +except ImportError: # Python < 3.4 + # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py + class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler): + def data_open(self, req): + # data URLs as specified in RFC 2397. + # + # ignores POSTed data + # + # syntax: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + url = req.get_full_url() + + scheme, data = url.split(':', 1) + mediatype, data = data.split(',', 1) + + # even base64 encoded data URLs might be quoted so unquote in any case: + data = compat_urllib_parse_unquote_to_bytes(data) + if mediatype.endswith(';base64'): + data = binascii.a2b_base64(data) + mediatype = mediatype[:-7] + + if not mediatype: + mediatype = 'text/plain;charset=US-ASCII' + + headers = email.message_from_string( + 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data))) + + return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) + +try: + from xml.etree.ElementTree import ParseError as compat_xml_parse_error +except ImportError: # Python 2.6 + from xml.parsers.expat import ExpatError as compat_xml_parse_error +compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + + +try: + # xml.etree.ElementTree.Element is a method in Python <=2.6 and + # the following will crash with: + # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types + isinstance(None, etree.Element) + from xml.etree.ElementTree import Element as compat_etree_Element +except TypeError: # Python <=2.6 + from xml.etree.ElementTree import _ElementInterface as compat_etree_Element +compat_xml_etree_ElementTree_Element = compat_etree_Element + +if sys.version_info[0] >= 3: + def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) +else: + # python 2.x tries to encode unicode strings with ascii (see the + # XMLParser._fixtext method) + try: + _etree_iter = etree.Element.iter + except AttributeError: # Python <=2.6 + def _etree_iter(root): + for el in root.findall('*'): + yield el + for sub in _etree_iter(el): + yield sub + + # on 2.6 XML doesn't have a parser argument, function copied from CPython + # 2.7 source + def _XML(text, parser=None): + if not parser: + parser = etree.XMLParser(target=_TreeBuilder()) + parser.feed(text) + return parser.close() + + def _element_factory(*args, **kwargs): + el = etree.Element(*args, **kwargs) + for k, v in el.items(): + if isinstance(v, bytes): + el.set(k, v.decode('utf-8')) + return el + + def compat_etree_fromstring(text): + doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) + for el in _etree_iter(doc): + if el.text is not None and isinstance(el.text, bytes): + el.text = el.text.decode('utf-8') + return doc + +if hasattr(etree, 'register_namespace'): + compat_etree_register_namespace = etree.register_namespace +else: + def compat_etree_register_namespace(prefix, uri): + """Register a namespace prefix. + The registry is global, and any existing mapping for either the + given prefix or the namespace URI will be removed. + *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and + attributes in this namespace will be serialized with prefix if possible. + ValueError is raised if prefix is reserved or is invalid. + """ + if re.match(r"ns\d+$", prefix): + raise ValueError("Prefix format reserved for internal use") + for k, v in list(etree._namespace_map.items()): + if k == uri or v == prefix: + del etree._namespace_map[k] + etree._namespace_map[uri] = prefix +compat_xml_etree_register_namespace = compat_etree_register_namespace + +if sys.version_info < (2, 7): + # Here comes the crazy part: In 2.6, if the xpath is a unicode, + # .//node does not match if a node is a direct child of . ! + def compat_xpath(xpath): + if isinstance(xpath, compat_str): + xpath = xpath.encode('ascii') + return xpath +else: + compat_xpath = lambda xpath: xpath + + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': + def compat_shlex_quote(s): + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + try: + from shlex import quote as compat_shlex_quote + except ImportError: # Python < 3.3 + def compat_shlex_quote(s): + if re.match(r'^[-_\w./]+$', s): + return s + else: + return "'" + s.replace("'", "'\"'\"'") + "'" + + +try: + args = shlex.split('中文') + assert (isinstance(args, list) + and isinstance(args[0], compat_str) + and args[0] == '中文') + compat_shlex_split = shlex.split +except (AssertionError, UnicodeEncodeError): + # Working around shlex issue with unicode strings on some python 2 + # versions (see http://bugs.python.org/issue1548891) + def compat_shlex_split(s, comments=False, posix=True): + if isinstance(s, compat_str): + s = s.encode('utf-8') + return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) + + +def compat_ord(c): + if type(c) is int: + return c + else: + return ord(c) + + +if sys.version_info >= (3, 0): + compat_getenv = os.getenv + compat_expanduser = os.path.expanduser + + def compat_setenv(key, value, env=os.environ): + env[key] = value +else: + # Environment variables should be decoded with filesystem encoding. + # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) + + def compat_getenv(key, default=None): + from .utils import get_filesystem_encoding + env = os.getenv(key, default) + if env: + env = env.decode(get_filesystem_encoding()) + return env + + def compat_setenv(key, value, env=os.environ): + def encode(v): + from .utils import get_filesystem_encoding + return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v + env[encode(key)] = encode(value) + + # HACK: The default implementations of os.path.expanduser from cpython do not decode + # environment variables with filesystem encoding. We will work around this by + # providing adjusted implementations. + # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib + # for different platforms with correct environment variables decoding. + + if compat_os_name == 'posix': + def compat_expanduser(path): + """Expand ~ and ~user constructions. If user or $HOME is unknown, + do nothing.""" + if not path.startswith('~'): + return path + i = path.find('/', 1) + if i < 0: + i = len(path) + if i == 1: + if 'HOME' not in os.environ: + import pwd + userhome = pwd.getpwuid(os.getuid()).pw_dir + else: + userhome = compat_getenv('HOME') + else: + import pwd + try: + pwent = pwd.getpwnam(path[1:i]) + except KeyError: + return path + userhome = pwent.pw_dir + userhome = userhome.rstrip('/') + return (userhome + path[i:]) or '/' + elif compat_os_name in ('nt', 'ce'): + def compat_expanduser(path): + """Expand ~ and ~user constructs. + + If user or $HOME is unknown, do nothing.""" + if path[:1] != '~': + return path + i, n = 1, len(path) + while i < n and path[i] not in '/\\': + i = i + 1 + + if 'HOME' in os.environ: + userhome = compat_getenv('HOME') + elif 'USERPROFILE' in os.environ: + userhome = compat_getenv('USERPROFILE') + elif 'HOMEPATH' not in os.environ: + return path + else: + try: + drive = compat_getenv('HOMEDRIVE') + except KeyError: + drive = '' + userhome = os.path.join(drive, compat_getenv('HOMEPATH')) + + if i != 1: # ~user + userhome = os.path.join(os.path.dirname(userhome), path[1:i]) + + return userhome + path[i:] + else: + compat_expanduser = os.path.expanduser + +compat_os_path_expanduser = compat_expanduser + + +if compat_os_name == 'nt' and sys.version_info < (3, 8): + # os.path.realpath on Windows does not follow symbolic links + # prior to Python 3.8 (see https://bugs.python.org/issue9949) + def compat_realpath(path): + while os.path.islink(path): + path = os.path.abspath(os.readlink(path)) + return path +else: + compat_realpath = os.path.realpath + +compat_os_path_realpath = compat_realpath + + +if sys.version_info < (3, 0): + def compat_print(s): + from .utils import preferredencoding + print(s.encode(preferredencoding(), 'xmlcharrefreplace')) +else: + def compat_print(s): + assert isinstance(s, compat_str) + print(s) + + +if sys.version_info < (3, 0) and sys.platform == 'win32': + def compat_getpass(prompt, *args, **kwargs): + if isinstance(prompt, compat_str): + from .utils import preferredencoding + prompt = prompt.encode(preferredencoding()) + return getpass.getpass(prompt, *args, **kwargs) +else: + compat_getpass = getpass.getpass + +compat_getpass_getpass = compat_getpass + + +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + + +# Python < 2.6.5 require kwargs to be bytes +try: + def _testfunc(x): + pass + _testfunc(**{'x': 0}) +except TypeError: + def compat_kwargs(kwargs): + return dict((bytes(k), v) for k, v in kwargs.items()) +else: + compat_kwargs = lambda kwargs: kwargs + + +try: + compat_numeric_types = (int, float, long, complex) +except NameError: # Python 3 + compat_numeric_types = (int, float, complex) + + +try: + compat_integer_types = (int, long) +except NameError: # Python 3 + compat_integer_types = (int, ) + + +if sys.version_info < (2, 7): + def compat_socket_create_connection(address, timeout, source_address=None): + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + except socket.error as _: + err = _ + if sock is not None: + sock.close() + if err is not None: + raise err + else: + raise socket.error('getaddrinfo returns an empty list') +else: + compat_socket_create_connection = socket.create_connection + + +# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 +# See http://bugs.python.org/issue9161 for what is broken +def workaround_optparse_bug9161(): + op = optparse.OptionParser() + og = optparse.OptionGroup(op, 'foo') + try: + og.add_option('-t') + except TypeError: + real_add_option = optparse.OptionGroup.add_option + + def _compat_add_option(self, *args, **kwargs): + enc = lambda v: ( + v.encode('ascii', 'replace') if isinstance(v, compat_str) + else v) + bargs = [enc(a) for a in args] + bkwargs = dict( + (k, enc(v)) for k, v in kwargs.items()) + return real_add_option(self, *bargs, **bkwargs) + optparse.OptionGroup.add_option = _compat_add_option + + +if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 + compat_get_terminal_size = shutil.get_terminal_size +else: + _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) + + def compat_get_terminal_size(fallback=(80, 24)): + from .utils import process_communicate_or_kill + columns = compat_getenv('COLUMNS') + if columns: + columns = int(columns) + else: + columns = None + lines = compat_getenv('LINES') + if lines: + lines = int(lines) + else: + lines = None + + if columns is None or lines is None or columns <= 0 or lines <= 0: + try: + sp = subprocess.Popen( + ['stty', 'size'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = process_communicate_or_kill(sp) + _lines, _columns = map(int, out.split()) + except Exception: + _columns, _lines = _terminal_size(*fallback) + + if columns is None or columns <= 0: + columns = _columns + if lines is None or lines <= 0: + lines = _lines + return _terminal_size(columns, lines) + + +try: + itertools.count(start=0, step=1) + compat_itertools_count = itertools.count +except TypeError: # Python 2.6 + def compat_itertools_count(start=0, step=1): + while True: + yield start + start += step + + +if sys.version_info >= (3, 0): + from tokenize import tokenize as compat_tokenize_tokenize +else: + from tokenize import generate_tokens as compat_tokenize_tokenize + + +try: + struct.pack('!I', 0) +except TypeError: + # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument + # See https://bugs.python.org/issue19099 + def compat_struct_pack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.pack(spec, *args) + + def compat_struct_unpack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.unpack(spec, *args) + + class compat_Struct(struct.Struct): + def __init__(self, fmt): + if isinstance(fmt, compat_str): + fmt = fmt.encode('ascii') + super(compat_Struct, self).__init__(fmt) +else: + compat_struct_pack = struct.pack + compat_struct_unpack = struct.unpack + if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8): + class compat_Struct(struct.Struct): + def unpack(self, string): + if not isinstance(string, buffer): # noqa: F821 + string = buffer(string) # noqa: F821 + return super(compat_Struct, self).unpack(string) + else: + compat_Struct = struct.Struct + + +# compat_map/filter() returning an iterator, supposedly the +# same versioning as for zip below +try: + from future_builtins import map as compat_map +except ImportError: + try: + from itertools import imap as compat_map + except ImportError: + compat_map = map + +try: + from future_builtins import filter as compat_filter +except ImportError: + try: + from itertools import ifilter as compat_filter + except ImportError: + compat_filter = filter + +try: + from future_builtins import zip as compat_zip +except ImportError: # not 2.6+ or is 3.x + try: + from itertools import izip as compat_zip # < 2.5 or 3.x + except ImportError: + compat_zip = zip + + +# method renamed between Py2/3 +try: + from itertools import zip_longest as compat_itertools_zip_longest +except ImportError: + from itertools import izip_longest as compat_itertools_zip_longest + + +# new class in collections +try: + from collections import ChainMap as compat_collections_chain_map + # Py3.3's ChainMap is deficient + if sys.version_info < (3, 4): + raise ImportError +except ImportError: + # Py <= 3.3 + class compat_collections_chain_map(compat_collections_abc.MutableMapping): + + maps = [{}] + + def __init__(self, *maps): + self.maps = list(maps) or [{}] + + def __getitem__(self, k): + for m in self.maps: + if k in m: + return m[k] + raise KeyError(k) + + def __setitem__(self, k, v): + self.maps[0].__setitem__(k, v) + return + + def __contains__(self, k): + return any((k in m) for m in self.maps) + + def __delitem(self, k): + if k in self.maps[0]: + del self.maps[0][k] + return + raise KeyError(k) + + def __delitem__(self, k): + self.__delitem(k) + + def __iter__(self): + return itertools.chain(*reversed(self.maps)) + + def __len__(self): + return len(iter(self)) + + # to match Py3, don't del directly + def pop(self, k, *args): + if self.__contains__(k): + off = self.__getitem__(k) + self.__delitem(k) + return off + elif len(args) > 0: + return args[0] + raise KeyError(k) + + def new_child(self, m=None, **kwargs): + m = m or {} + m.update(kwargs) + return compat_collections_chain_map(m, *self.maps) + + @property + def parents(self): + return compat_collections_chain_map(*(self.maps[1:])) + + +# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?) +compat_re_Pattern = type(re.compile('')) +# and on the type of a match +compat_re_Match = type(re.match('a', 'a')) + + +if sys.version_info < (3, 3): + def compat_b64decode(s, *args, **kwargs): + if isinstance(s, compat_str): + s = s.encode('ascii') + return base64.b64decode(s, *args, **kwargs) +else: + compat_b64decode = base64.b64decode + +compat_base64_b64decode = compat_b64decode + + +if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): + # PyPy2 prior to version 5.4.0 expects byte strings as Windows function + # names, see the original PyPy issue [1] and the youtube-dl one [2]. + # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name + # 2. https://github.com/ytdl-org/youtube-dl/pull/4392 + def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + real = ctypes.WINFUNCTYPE(*args, **kwargs) + + def resf(tpl, *args, **kwargs): + funcname, dll = tpl + return real((str(funcname), dll), *args, **kwargs) + + return resf +else: + def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) + + +if sys.version_info < (3, 0): + # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None + def compat_open(file_, *args, **kwargs): + if len(args) > 6 or 'opener' in kwargs: + raise ValueError('open: unsupported argument "opener"') + return io.open(file_, *args, **kwargs) +else: + compat_open = open + + +# compat_register_utf8 +def compat_register_utf8(): + if sys.platform == 'win32': + # https://github.com/ytdl-org/youtube-dl/issues/820 + from codecs import register, lookup + register( + lambda name: lookup('utf-8') if name == 'cp65001' else None) + + +# compat_datetime_timedelta_total_seconds +try: + compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds +except AttributeError: + # Py 2.6 + def compat_datetime_timedelta_total_seconds(td): + return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + +# optional decompression packages +# PyPi brotli package implements 'br' Content-Encoding +try: + import brotli as compat_brotli +except ImportError: + compat_brotli = None +# PyPi ncompress package implements 'compress' Content-Encoding +try: + import ncompress as compat_ncompress +except ImportError: + compat_ncompress = None + + +legacy = [ + 'compat_HTMLParseError', + 'compat_HTMLParser', + 'compat_HTTPError', + 'compat_b64decode', + 'compat_cookiejar', + 'compat_cookiejar_Cookie', + 'compat_cookies', + 'compat_cookies_SimpleCookie', + 'compat_etree_Element', + 'compat_etree_register_namespace', + 'compat_expanduser', + 'compat_getpass', + 'compat_parse_qs', + 'compat_realpath', + 'compat_urllib_parse_parse_qs', + 'compat_urllib_parse_unquote', + 'compat_urllib_parse_unquote_plus', + 'compat_urllib_parse_unquote_to_bytes', + 'compat_urllib_parse_urlencode', + 'compat_urllib_parse_urlparse', + 'compat_urlparse', + 'compat_urlretrieve', + 'compat_xml_parse_error', +] + + +__all__ = [ + 'compat_html_parser_HTMLParseError', + 'compat_html_parser_HTMLParser', + 'compat_Struct', + 'compat_base64_b64decode', + 'compat_basestring', + 'compat_brotli', + 'compat_casefold', + 'compat_chr', + 'compat_collections_abc', + 'compat_collections_chain_map', + 'compat_datetime_timedelta_total_seconds', + 'compat_http_cookiejar', + 'compat_http_cookiejar_Cookie', + 'compat_http_cookies', + 'compat_http_cookies_SimpleCookie', + 'compat_ctypes_WINFUNCTYPE', + 'compat_etree_fromstring', + 'compat_filter', + 'compat_get_terminal_size', + 'compat_getenv', + 'compat_getpass_getpass', + 'compat_html_entities', + 'compat_html_entities_html5', + 'compat_http_client', + 'compat_http_server', + 'compat_input', + 'compat_integer_types', + 'compat_itertools_count', + 'compat_itertools_zip_longest', + 'compat_kwargs', + 'compat_map', + 'compat_ncompress', + 'compat_numeric_types', + 'compat_open', + 'compat_ord', + 'compat_os_name', + 'compat_os_path_expanduser', + 'compat_os_path_realpath', + 'compat_print', + 'compat_re_Match', + 'compat_re_Pattern', + 'compat_register_utf8', + 'compat_setenv', + 'compat_shlex_quote', + 'compat_shlex_split', + 'compat_socket_create_connection', + 'compat_str', + 'compat_struct_pack', + 'compat_struct_unpack', + 'compat_subprocess_get_DEVNULL', + 'compat_tokenize_tokenize', + 'compat_urllib_error', + 'compat_urllib_parse', + 'compat_urllib_request', + 'compat_urllib_request_DataHandler', + 'compat_urllib_response', + 'compat_urllib_request_urlretrieve', + 'compat_urllib_HTTPError', + 'compat_xml_etree_ElementTree_Element', + 'compat_xml_etree_ElementTree_ParseError', + 'compat_xml_etree_register_namespace', + 'compat_xpath', + 'compat_zip', + 'workaround_optparse_bug9161', +] diff --git a/src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py b/src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py new file mode 100644 index 0000000..86d9022 --- /dev/null +++ b/src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py @@ -0,0 +1,1054 @@ +from __future__ import unicode_literals + +import itertools +import json +import operator +import re + +from functools import update_wrapper + +from .utils import ( + error_to_compat_str, + ExtractorError, + js_to_json, + remove_quotes, + unified_timestamp, + variadic, +) +from .compat import ( + compat_basestring, + compat_chr, + compat_collections_chain_map as ChainMap, + compat_itertools_zip_longest as zip_longest, + compat_str, +) + + +# name JS functions +class function_with_repr(object): + # from yt_dlp/utils.py, but in this module + # repr_ is always set + def __init__(self, func, repr_): + update_wrapper(self, func) + self.func, self.__repr = func, repr_ + + def __call__(self, *args, **kwargs): + return self.func(*args, **kwargs) + + def __repr__(self): + return self.__repr + + +# name JS operators +def wraps_op(op): + + def update_and_rename_wrapper(w): + f = update_wrapper(w, op) + # fn names are str in both Py 2/3 + f.__name__ = str('JS_') + f.__name__ + return f + + return update_and_rename_wrapper + + +# NB In principle NaN cannot be checked by membership. +# Here all NaN values are actually this one, so _NaN is _NaN, +# although _NaN != _NaN. + +_NaN = float('nan') + + +def _js_bit_op(op): + + def zeroise(x): + return 0 if x in (None, JS_Undefined, _NaN) else x + + @wraps_op(op) + def wrapped(a, b): + return op(zeroise(a), zeroise(b)) & 0xffffffff + + return wrapped + + +def _js_arith_op(op): + + @wraps_op(op) + def wrapped(a, b): + if JS_Undefined in (a, b): + return _NaN + return op(a or 0, b or 0) + + return wrapped + + +def _js_div(a, b): + if JS_Undefined in (a, b) or not (a or b): + return _NaN + return operator.truediv(a or 0, b) if b else float('inf') + + +def _js_mod(a, b): + if JS_Undefined in (a, b) or not b: + return _NaN + return (a or 0) % b + + +def _js_exp(a, b): + if not b: + return 1 # even 0 ** 0 !! + elif JS_Undefined in (a, b): + return _NaN + return (a or 0) ** b + + +def _js_eq_op(op): + + @wraps_op(op) + def wrapped(a, b): + if set((a, b)) <= set((None, JS_Undefined)): + return op(a, a) + return op(a, b) + + return wrapped + + +def _js_comp_op(op): + + @wraps_op(op) + def wrapped(a, b): + if JS_Undefined in (a, b): + return False + if isinstance(a, compat_basestring): + b = compat_str(b or 0) + elif isinstance(b, compat_basestring): + a = compat_str(a or 0) + return op(a or 0, b or 0) + + return wrapped + + +def _js_ternary(cndn, if_true=True, if_false=False): + """Simulate JS's ternary operator (cndn?if_true:if_false)""" + if cndn in (False, None, 0, '', JS_Undefined, _NaN): + return if_false + return if_true + + +# (op, definition) in order of binding priority, tightest first +# avoid dict to maintain order +# definition None => Defined in JSInterpreter._operator +_OPERATORS = ( + ('>>', _js_bit_op(operator.rshift)), + ('<<', _js_bit_op(operator.lshift)), + ('+', _js_arith_op(operator.add)), + ('-', _js_arith_op(operator.sub)), + ('*', _js_arith_op(operator.mul)), + ('%', _js_mod), + ('/', _js_div), + ('**', _js_exp), +) + +_COMP_OPERATORS = ( + ('===', operator.is_), + ('!==', operator.is_not), + ('==', _js_eq_op(operator.eq)), + ('!=', _js_eq_op(operator.ne)), + ('<=', _js_comp_op(operator.le)), + ('>=', _js_comp_op(operator.ge)), + ('<', _js_comp_op(operator.lt)), + ('>', _js_comp_op(operator.gt)), +) + +_LOG_OPERATORS = ( + ('|', _js_bit_op(operator.or_)), + ('^', _js_bit_op(operator.xor)), + ('&', _js_bit_op(operator.and_)), +) + +_SC_OPERATORS = ( + ('?', None), + ('??', None), + ('||', None), + ('&&', None), +) + +_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS)) + +_NAME_RE = r'[a-zA-Z_$][\w$]*' +_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) +_QUOTES = '\'"/' + + +class JS_Undefined(object): + pass + + +class JS_Break(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid break') + + +class JS_Continue(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid continue') + + +class JS_Throw(ExtractorError): + def __init__(self, e): + self.error = e + ExtractorError.__init__(self, 'Uncaught exception ' + error_to_compat_str(e)) + + +class LocalNameSpace(ChainMap): + def __getitem__(self, key): + try: + return super(LocalNameSpace, self).__getitem__(key) + except KeyError: + return JS_Undefined + + def __setitem__(self, key, value): + for scope in self.maps: + if key in scope: + scope[key] = value + return + self.maps[0][key] = value + + def __delitem__(self, key): + raise NotImplementedError('Deleting is not supported') + + def __repr__(self): + return 'LocalNameSpace%s' % (self.maps, ) + + +class JSInterpreter(object): + __named_object_counter = 0 + + _OBJ_NAME = '__youtube_dl_jsinterp_obj' + + OP_CHARS = None + + def __init__(self, code, objects=None): + self.code, self._functions = code, {} + self._objects = {} if objects is None else objects + if type(self).OP_CHARS is None: + type(self).OP_CHARS = self.OP_CHARS = self.__op_chars() + + class Exception(ExtractorError): + def __init__(self, msg, *args, **kwargs): + expr = kwargs.pop('expr', None) + if expr is not None: + msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr) + super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) + + class JS_RegExp(object): + RE_FLAGS = { + # special knowledge: Python's re flags are bitmask values, current max 128 + # invent new bitmask values well above that for literal parsing + # TODO: execute matches with these flags (remaining: d, y) + 'd': 1024, # Generate indices for substring matches + 'g': 2048, # Global search + 'i': re.I, # Case-insensitive search + 'm': re.M, # Multi-line search + 's': re.S, # Allows . to match newline characters + 'u': re.U, # Treat a pattern as a sequence of unicode code points + 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string + } + + def __init__(self, pattern_txt, flags=0): + if isinstance(flags, compat_str): + flags, _ = self.regex_flags(flags) + # First, avoid https://github.com/python/cpython/issues/74534 + self.__self = None + self.__pattern_txt = pattern_txt.replace('[[', r'[\[') + self.__flags = flags + + def __instantiate(self): + if self.__self: + return + self.__self = re.compile(self.__pattern_txt, self.__flags) + # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern + for name in dir(self.__self): + # Only these? Obviously __class__, __init__. + # PyPy creates a __weakref__ attribute with value None + # that can't be setattr'd but also can't need to be copied. + if name in ('__class__', '__init__', '__weakref__'): + continue + setattr(self, name, getattr(self.__self, name)) + + def __getattr__(self, name): + self.__instantiate() + # make Py 2.6 conform to its lying documentation + if name == 'flags': + self.flags = self.__flags + return self.flags + elif name == 'pattern': + self.pattern = self.__pattern_txt + return self.pattern + elif hasattr(self.__self, name): + v = getattr(self.__self, name) + setattr(self, name, v) + return v + elif name in ('groupindex', 'groups'): + return 0 if name == 'groupindex' else {} + raise AttributeError('{0} has no attribute named {1}'.format(self, name)) + + @classmethod + def regex_flags(cls, expr): + flags = 0 + if not expr: + return flags, expr + for idx, ch in enumerate(expr): + if ch not in cls.RE_FLAGS: + break + flags |= cls.RE_FLAGS[ch] + return flags, expr[idx + 1:] + + @classmethod + def __op_chars(cls): + op_chars = set(';,[') + for op in cls._all_operators(): + for c in op[0]: + op_chars.add(c) + return op_chars + + def _named_object(self, namespace, obj): + self.__named_object_counter += 1 + name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter) + if callable(obj) and not isinstance(obj, function_with_repr): + obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, )) + namespace[name] = obj + return name + + @classmethod + def _separate(cls, expr, delim=',', max_split=None, skip_delims=None): + if not expr: + return + # collections.Counter() is ~10% slower in both 2.7 and 3.9 + counters = dict((k, 0) for k in _MATCHING_PARENS.values()) + start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 + in_quote, escaping, skipping = None, False, 0 + after_op, in_regex_char_group = True, False + + for idx, char in enumerate(expr): + paren_delta = 0 + if not in_quote: + if char in _MATCHING_PARENS: + counters[_MATCHING_PARENS[char]] += 1 + paren_delta = 1 + elif char in counters: + counters[char] -= 1 + paren_delta = -1 + if not escaping: + if char in _QUOTES and in_quote in (char, None): + if in_quote or after_op or char != '/': + in_quote = None if in_quote and not in_regex_char_group else char + elif in_quote == '/' and char in '[]': + in_regex_char_group = char == '[' + escaping = not escaping and in_quote and char == '\\' + after_op = not in_quote and (char in cls.OP_CHARS or paren_delta > 0 or (after_op and char.isspace())) + + if char != delim[pos] or any(counters.values()) or in_quote: + pos = skipping = 0 + continue + elif skipping > 0: + skipping -= 1 + continue + elif pos == 0 and skip_delims: + here = expr[idx:] + for s in variadic(skip_delims): + if here.startswith(s) and s: + skipping = len(s) - 1 + break + if skipping > 0: + continue + if pos < delim_len: + pos += 1 + continue + yield expr[start: idx - delim_len] + start, pos = idx + 1, 0 + splits += 1 + if max_split and splits >= max_split: + break + yield expr[start:] + + @classmethod + def _separate_at_paren(cls, expr, delim=None): + if delim is None: + delim = expr and _MATCHING_PARENS[expr[0]] + separated = list(cls._separate(expr, delim, 1)) + + if len(separated) < 2: + raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals())) + return separated[0][1:].strip(), separated[1].strip() + + @staticmethod + def _all_operators(): + return itertools.chain( + # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence + _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS) + + def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): + if op in ('||', '&&'): + if (op == '&&') ^ _js_ternary(left_val): + return left_val # short circuiting + elif op == '??': + if left_val not in (None, JS_Undefined): + return left_val + elif op == '?': + right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1)) + + right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) + opfunc = op and next((v for k, v in self._all_operators() if k == op), None) + if not opfunc: + return right_val + + try: + # print('Eval:', opfunc.__name__, left_val, right_val) + return opfunc(left_val, right_val) + except Exception as e: + raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e) + + def _index(self, obj, idx, allow_undefined=False): + if idx == 'length': + return len(obj) + try: + return obj[int(idx)] if isinstance(obj, list) else obj[idx] + except Exception as e: + if allow_undefined: + return JS_Undefined + raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e) + + def _dump(self, obj, namespace): + try: + return json.dumps(obj) + except TypeError: + return self._named_object(namespace, obj) + + # used below + _VAR_RET_THROW_RE = re.compile(r'''(?x) + (?P(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?Pthrow\s+) + ''') + _COMPOUND_RE = re.compile(r'''(?x) + (?Ptry)\s*\{| + (?Pif)\s*\(| + (?Pswitch)\s*\(| + (?Pfor)\s*\(| + (?Pwhile)\s*\( + ''') + _FINALLY_RE = re.compile(r'finally\s*\{') + _SWITCH_RE = re.compile(r'switch\s*\(') + + def interpret_statement(self, stmt, local_vars, allow_recursion=100): + if allow_recursion < 0: + raise self.Exception('Recursion limit reached') + allow_recursion -= 1 + + # print('At: ' + stmt[:60]) + should_return = False + # fails on (eg) if (...) stmt1; else stmt2; + sub_statements = list(self._separate(stmt, ';')) or [''] + expr = stmt = sub_statements.pop().strip() + for sub_stmt in sub_statements: + ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion) + if should_return: + return ret, should_return + + m = self._VAR_RET_THROW_RE.match(stmt) + if m: + expr = stmt[len(m.group(0)):].strip() + if m.group('throw'): + raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) + should_return = not m.group('var') + if not expr: + return None, should_return + + if expr[0] in _QUOTES: + inner, outer = self._separate(expr, expr[0], 1) + if expr[0] == '/': + flags, outer = self.JS_RegExp.regex_flags(outer) + inner = self.JS_RegExp(inner[1:], flags=flags) + else: + inner = json.loads(js_to_json(inner + expr[0])) # , strict=True)) + if not outer: + return inner, should_return + expr = self._named_object(local_vars, inner) + outer + + new_kw, _, obj = expr.partition('new ') + if not new_kw: + for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)), + ('RegExp', self.JS_RegExp), + ('Error', self.Exception)): + if not obj.startswith(klass + '('): + continue + left, right = self._separate_at_paren(obj[len(klass):]) + argvals = self.interpret_iter(left, local_vars, allow_recursion) + expr = konstr(*argvals) + if expr is None: + raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr) + expr = self._dump(expr, local_vars) + right + break + else: + raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr) + + if expr.startswith('void '): + left = self.interpret_expression(expr[5:], local_vars, allow_recursion) + return None, should_return + + if expr.startswith('{'): + inner, outer = self._separate_at_paren(expr) + # try for object expression (Map) + sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)] + if all(len(sub_expr) == 2 for sub_expr in sub_expressions): + return dict( + (key_expr if re.match(_NAME_RE, key_expr) else key_expr, + self.interpret_expression(val_expr, local_vars, allow_recursion)) + for key_expr, val_expr in sub_expressions), should_return + # or statement list + inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) + if not outer or should_abort: + return inner, should_abort or should_return + else: + expr = self._dump(inner, local_vars) + outer + + if expr.startswith('('): + + m = re.match(r'\((?P[a-z])%(?P[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr) + if m: + # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig` + outer = None + inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars) + else: + inner, outer = self._separate_at_paren(expr) + inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) + if not outer or should_abort: + return inner, should_abort or should_return + else: + expr = self._dump(inner, local_vars) + outer + + if expr.startswith('['): + inner, outer = self._separate_at_paren(expr) + name = self._named_object(local_vars, [ + self.interpret_expression(item, local_vars, allow_recursion) + for item in self._separate(inner)]) + expr = name + outer + + m = self._COMPOUND_RE.match(expr) + md = m.groupdict() if m else {} + if md.get('if'): + cndn, expr = self._separate_at_paren(expr[m.end() - 1:]) + if expr.startswith('{'): + if_expr, expr = self._separate_at_paren(expr) + else: + # may lose ... else ... because of ll.368-374 + if_expr, expr = self._separate_at_paren(expr, delim=';') + else_expr = None + m = re.match(r'else\s*(?P\{)?', expr) + if m: + if m.group('block'): + else_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + else: + # handle subset ... else if (...) {...} else ... + # TODO: make interpret_statement do this properly, if possible + exprs = list(self._separate(expr[m.end():], delim='}', max_split=2)) + if len(exprs) > 1: + if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]): + else_expr = exprs[0] + '}' + exprs[1] + expr = (exprs[2] + '}') if len(exprs) == 3 else None + else: + else_expr = exprs[0] + exprs.append('') + expr = '}'.join(exprs[1:]) + else: + else_expr = exprs[0] + expr = None + else_expr = else_expr.lstrip() + '}' + cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)) + ret, should_abort = self.interpret_statement( + if_expr if cndn else else_expr, local_vars, allow_recursion) + if should_abort: + return ret, True + + elif md.get('try'): + try_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + err = None + try: + ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion) + if should_abort: + return ret, True + except Exception as e: + # XXX: This works for now, but makes debugging future issues very hard + err = e + + pending = (None, False) + m = re.match(r'catch\s*(?P\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr) + if m: + sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + if err: + catch_vars = {} + if m.group('err'): + catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err + catch_vars = local_vars.new_child(m=catch_vars) + err = None + pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion) + + m = self._FINALLY_RE.match(expr) + if m: + sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) + ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) + if should_abort: + return ret, True + + ret, should_abort = pending + if should_abort: + return ret, True + + if err: + raise err + + elif md.get('for') or md.get('while'): + init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:]) + if remaining.startswith('{'): + body, expr = self._separate_at_paren(remaining) + else: + switch_m = self._SWITCH_RE.match(remaining) # FIXME + if switch_m: + switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:]) + body, expr = self._separate_at_paren(remaining, '}') + body = 'switch(%s){%s}' % (switch_val, body) + else: + body, expr = remaining, '' + if md.get('for'): + start, cndn, increment = self._separate(init_or_cond, ';') + self.interpret_expression(start, local_vars, allow_recursion) + else: + cndn, increment = init_or_cond, None + while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): + try: + ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion) + if should_abort: + return ret, True + except JS_Break: + break + except JS_Continue: + pass + if increment: + self.interpret_expression(increment, local_vars, allow_recursion) + + elif md.get('switch'): + switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:]) + switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) + body, expr = self._separate_at_paren(remaining, '}') + items = body.replace('default:', 'case default:').split('case ')[1:] + for default in (False, True): + matched = False + for item in items: + case, stmt = (i.strip() for i in self._separate(item, ':', 1)) + if default: + matched = matched or case == 'default' + elif not matched: + matched = (case != 'default' + and switch_val == self.interpret_expression(case, local_vars, allow_recursion)) + if not matched: + continue + try: + ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion) + if should_abort: + return ret + except JS_Break: + break + if matched: + break + + if md: + ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) + return ret, should_abort or should_return + + # Comma separated statements + sub_expressions = list(self._separate(expr)) + if len(sub_expressions) > 1: + for sub_expr in sub_expressions: + ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) + if should_abort: + return ret, True + return ret, False + + for m in re.finditer(r'''(?x) + (?P\+\+|--)(?P{_NAME_RE})| + (?P{_NAME_RE})(?P\+\+|--)'''.format(**globals()), expr): + var = m.group('var1') or m.group('var2') + start, end = m.span() + sign = m.group('pre_sign') or m.group('post_sign') + ret = local_vars[var] + local_vars[var] += 1 if sign[0] == '+' else -1 + if m.group('pre_sign'): + ret = local_vars[var] + expr = expr[:start] + self._dump(ret, local_vars) + expr[end:] + + if not expr: + return None, should_return + + m = re.match(r'''(?x) + (?P + (?P{_NAME_RE})(?:\[(?P[^\]]+?)\])?\s* + (?P{_OPERATOR_RE})? + =(?!=)(?P.*)$ + )|(?P + (?!if|return|true|false|null|undefined)(?P{_NAME_RE})$ + )|(?P + (?P{_NAME_RE})\[(?P.+)\]$ + )|(?P + (?P{_NAME_RE})(?:(?P\?)?\.(?P[^(]+)|\[(?P[^\]]+)\])\s* + )|(?P + (?P{_NAME_RE})\((?P.*)\)$ + )'''.format(**globals()), expr) + md = m.groupdict() if m else {} + if md.get('assign'): + left_val = local_vars.get(m.group('out')) + + if not m.group('index'): + local_vars[m.group('out')] = self._operator( + m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) + return local_vars[m.group('out')], should_return + elif left_val in (None, JS_Undefined): + raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr) + + idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) + if not isinstance(idx, (int, float)): + raise self.Exception('List index %s must be integer' % (idx, ), expr=expr) + idx = int(idx) + left_val[idx] = self._operator( + m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion) + return left_val[idx], should_return + + elif expr.isdigit(): + return int(expr), should_return + + elif expr == 'break': + raise JS_Break() + elif expr == 'continue': + raise JS_Continue() + + elif expr == 'undefined': + return JS_Undefined, should_return + elif expr == 'NaN': + return _NaN, should_return + + elif md.get('return'): + return local_vars[m.group('name')], should_return + + try: + ret = json.loads(js_to_json(expr)) # strict=True) + if not md.get('attribute'): + return ret, should_return + except ValueError: + pass + + if md.get('indexing'): + val = local_vars[m.group('in')] + idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) + return self._index(val, idx), should_return + + for op, _ in self._all_operators(): + # hackety: have higher priority than <>, but don't confuse them + skip_delim = (op + op) if op in '<>*?' else None + if op == '?': + skip_delim = (skip_delim, '?.') + separated = list(self._separate(expr, op, skip_delims=skip_delim)) + if len(separated) < 2: + continue + + right_expr = separated.pop() + # handle operators that are both unary and binary, minimal BODMAS + if op in ('+', '-'): + undone = 0 + while len(separated) > 1 and not separated[-1].strip(): + undone += 1 + separated.pop() + if op == '-' and undone % 2 != 0: + right_expr = op + right_expr + left_val = separated[-1] + for dm_op in ('*', '%', '/', '**'): + bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) + if len(bodmas) > 1 and not bodmas[-1].strip(): + expr = op.join(separated) + op + right_expr + right_expr = None + break + if right_expr is None: + continue + + left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) + return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return + + if md.get('attribute'): + variable, member, nullish = m.group('var', 'member', 'nullish') + if not member: + member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion) + arg_str = expr[m.end():] + if arg_str.startswith('('): + arg_str, remaining = self._separate_at_paren(arg_str) + else: + arg_str, remaining = None, arg_str + + def assertion(cndn, msg): + """ assert, but without risk of getting optimized out """ + if not cndn: + memb = member + raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr) + + def eval_method(): + if (variable, member) == ('console', 'debug'): + return + types = { + 'String': compat_str, + 'Math': float, + } + obj = local_vars.get(variable) + if obj in (JS_Undefined, None): + obj = types.get(variable, JS_Undefined) + if obj is JS_Undefined: + try: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + except self.Exception: + if not nullish: + raise + + if nullish and obj is JS_Undefined: + return JS_Undefined + + # Member access + if arg_str is None: + return self._index(obj, member, nullish) + + # Function call + argvals = [ + self.interpret_expression(v, local_vars, allow_recursion) + for v in self._separate(arg_str)] + + if obj == compat_str: + if member == 'fromCharCode': + assertion(argvals, 'takes one or more arguments') + return ''.join(map(compat_chr, argvals)) + raise self.Exception('Unsupported string method ' + member, expr=expr) + elif obj == float: + if member == 'pow': + assertion(len(argvals) == 2, 'takes two arguments') + return argvals[0] ** argvals[1] + raise self.Exception('Unsupported Math method ' + member, expr=expr) + + if member == 'split': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) == 1, 'with limit argument is not implemented') + return obj.split(argvals[0]) if argvals[0] else list(obj) + elif member == 'join': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return argvals[0].join(obj) + elif member == 'reverse': + assertion(not argvals, 'does not take any arguments') + obj.reverse() + return obj + elif member == 'slice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return obj[argvals[0]:] + elif member == 'splice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + index, howMany = map(int, (argvals + [len(obj)])[:2]) + if index < 0: + index += len(obj) + add_items = argvals[2:] + res = [] + for i in range(index, min(index + howMany, len(obj))): + res.append(obj.pop(index)) + for i, item in enumerate(add_items): + obj.insert(index + i, item) + return res + elif member == 'unshift': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + for item in reversed(argvals): + obj.insert(0, item) + return obj + elif member == 'pop': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(not argvals, 'does not take any arguments') + if not obj: + return + return obj.pop() + elif member == 'push': + assertion(argvals, 'takes one or more arguments') + obj.extend(argvals) + return obj + elif member == 'forEach': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + f, this = (argvals + [''])[:2] + return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)] + elif member == 'indexOf': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + idx, start = (argvals + [0])[:2] + try: + return obj.index(idx, start) + except ValueError: + return -1 + elif member == 'charCodeAt': + assertion(isinstance(obj, compat_str), 'must be applied on a string') + # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced + idx = argvals[0] if isinstance(argvals[0], int) else 0 + if idx >= len(obj): + return None + return ord(obj[idx]) + elif member in ('replace', 'replaceAll'): + assertion(isinstance(obj, compat_str), 'must be applied on a string') + assertion(len(argvals) == 2, 'takes exactly two arguments') + # TODO: argvals[1] callable, other Py vs JS edge cases + if isinstance(argvals[0], self.JS_RegExp): + count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1 + assertion(member != 'replaceAll' or count == 0, + 'replaceAll must be called with a global RegExp') + return argvals[0].sub(argvals[1], obj, count=count) + count = ('replaceAll', 'replace').index(member) + return re.sub(re.escape(argvals[0]), argvals[1], obj, count=count) + + idx = int(member) if isinstance(obj, list) else member + return obj[idx](argvals, allow_recursion=allow_recursion) + + if remaining: + ret, should_abort = self.interpret_statement( + self._named_object(local_vars, eval_method()) + remaining, + local_vars, allow_recursion) + return ret, should_return or should_abort + else: + return eval_method(), should_return + + elif md.get('function'): + fname = m.group('fname') + argvals = [self.interpret_expression(v, local_vars, allow_recursion) + for v in self._separate(m.group('args'))] + if fname in local_vars: + return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return + elif fname not in self._functions: + self._functions[fname] = self.extract_function(fname) + return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return + + raise self.Exception( + 'Unsupported JS expression ' + (expr[:40] if expr != stmt else ''), expr=stmt) + + def interpret_expression(self, expr, local_vars, allow_recursion): + ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion) + if should_return: + raise self.Exception('Cannot return from an expression', expr) + return ret + + def interpret_iter(self, list_txt, local_vars, allow_recursion): + for v in self._separate(list_txt): + yield self.interpret_expression(v, local_vars, allow_recursion) + + def extract_object(self, objname): + _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' + obj = {} + fields = None + for obj_m in re.finditer( + r'''(?xs) + {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* + (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) + }}\s*; + '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), + self.code): + fields = obj_m.group('fields') + if fields: + break + else: + raise self.Exception('Could not find object ' + objname) + # Currently, it only supports function definitions + fields_m = re.finditer( + r'''(?x) + (?P%s)\s*:\s*function\s*\((?P(?:%s|,)*)\){(?P[^}]+)} + ''' % (_FUNC_NAME_RE, _NAME_RE), + fields) + for f in fields_m: + argnames = self.build_arglist(f.group('args')) + obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) + + return obj + + @staticmethod + def _offset_e_by_d(d, e, local_vars): + """ Short-cut eval: (d%e.length+e.length)%e.length """ + try: + d = local_vars[d] + e = local_vars[e] + e = len(e) + return _js_mod(_js_mod(d, e) + e, e), False + except Exception: + return None, True + + def extract_function_code(self, funcname): + """ @returns argnames, code """ + func_m = re.search( + r'''(?xs) + (?: + function\s+%(name)s| + [{;,]\s*%(name)s\s*=\s*function| + (?:var|const|let)\s+%(name)s\s*=\s*function + )\s* + \((?P[^)]*)\)\s* + (?P{.+})''' % {'name': re.escape(funcname)}, + self.code) + if func_m is None: + raise self.Exception('Could not find JS function "{funcname}"'.format(**locals())) + code, _ = self._separate_at_paren(func_m.group('code')) # refine the match + return self.build_arglist(func_m.group('args')), code + + def extract_function(self, funcname): + return function_with_repr( + self.extract_function_from_code(*self.extract_function_code(funcname)), + 'F<%s>' % (funcname, )) + + def extract_function_from_code(self, argnames, code, *global_stack): + local_vars = {} + while True: + mobj = re.search(r'function\((?P[^)]*)\)\s*{', code) + if mobj is None: + break + start, body_start = mobj.span() + body, remaining = self._separate_at_paren(code[body_start - 1:], '}') + name = self._named_object(local_vars, self.extract_function_from_code( + [x.strip() for x in mobj.group('args').split(',')], + body, local_vars, *global_stack)) + code = code[:start] + name + remaining + return self.build_function(argnames, code, local_vars, *global_stack) + + def call_function(self, funcname, *args): + return self.extract_function(funcname)(args) + + @classmethod + def build_arglist(cls, arg_text): + if not arg_text: + return [] + + def valid_arg(y): + y = y.strip() + if not y: + raise cls.Exception('Missing arg in "%s"' % (arg_text, )) + return y + + return [valid_arg(x) for x in cls._separate(arg_text)] + + def build_function(self, argnames, code, *global_stack): + global_stack = list(global_stack) or [{}] + argnames = tuple(argnames) + + def resf(args, kwargs={}, allow_recursion=100): + global_stack[0].update( + zip_longest(argnames, args, fillvalue=None)) + global_stack[0].update(kwargs) + var_stack = LocalNameSpace(*global_stack) + ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1) + if should_abort: + return ret + return resf diff --git a/src/music_kraken/pages/youtube_music/yt_utils/socks.py b/src/music_kraken/pages/youtube_music/yt_utils/socks.py new file mode 100644 index 0000000..5d4adbe --- /dev/null +++ b/src/music_kraken/pages/youtube_music/yt_utils/socks.py @@ -0,0 +1,273 @@ +# Public Domain SOCKS proxy protocol implementation +# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 + +from __future__ import unicode_literals + +# References: +# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol +# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol +# SOCKS5 protocol https://tools.ietf.org/html/rfc1928 +# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 + +import collections +import socket + +from .compat import ( + compat_ord, + compat_struct_pack, + compat_struct_unpack, +) + +__author__ = 'Timo Schmid ' + +SOCKS4_VERSION = 4 +SOCKS4_REPLY_VERSION = 0x00 +# Excerpt from SOCKS4A protocol: +# if the client cannot resolve the destination host's domain name to find its +# IP address, it should set the first three bytes of DSTIP to NULL and the last +# byte to a non-zero value. +SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF) + +SOCKS5_VERSION = 5 +SOCKS5_USER_AUTH_VERSION = 0x01 +SOCKS5_USER_AUTH_SUCCESS = 0x00 + + +class Socks4Command(object): + CMD_CONNECT = 0x01 + CMD_BIND = 0x02 + + +class Socks5Command(Socks4Command): + CMD_UDP_ASSOCIATE = 0x03 + + +class Socks5Auth(object): + AUTH_NONE = 0x00 + AUTH_GSSAPI = 0x01 + AUTH_USER_PASS = 0x02 + AUTH_NO_ACCEPTABLE = 0xFF # For server response + + +class Socks5AddressType(object): + ATYP_IPV4 = 0x01 + ATYP_DOMAINNAME = 0x03 + ATYP_IPV6 = 0x04 + + +class ProxyError(socket.error): + ERR_SUCCESS = 0x00 + + def __init__(self, code=None, msg=None): + if code is not None and msg is None: + msg = self.CODES.get(code) or 'unknown error' + super(ProxyError, self).__init__(code, msg) + + +class InvalidVersionError(ProxyError): + def __init__(self, expected_version, got_version): + msg = ('Invalid response version from server. Expected {0:02x} got ' + '{1:02x}'.format(expected_version, got_version)) + super(InvalidVersionError, self).__init__(0, msg) + + +class Socks4Error(ProxyError): + ERR_SUCCESS = 90 + + CODES = { + 91: 'request rejected or failed', + 92: 'request rejected because SOCKS server cannot connect to identd on the client', + 93: 'request rejected because the client program and identd report different user-ids' + } + + +class Socks5Error(ProxyError): + ERR_GENERAL_FAILURE = 0x01 + + CODES = { + 0x01: 'general SOCKS server failure', + 0x02: 'connection not allowed by ruleset', + 0x03: 'Network unreachable', + 0x04: 'Host unreachable', + 0x05: 'Connection refused', + 0x06: 'TTL expired', + 0x07: 'Command not supported', + 0x08: 'Address type not supported', + 0xFE: 'unknown username or invalid password', + 0xFF: 'all offered authentication methods were rejected' + } + + +class ProxyType(object): + SOCKS4 = 0 + SOCKS4A = 1 + SOCKS5 = 2 + + +Proxy = collections.namedtuple('Proxy', ( + 'type', 'host', 'port', 'username', 'password', 'remote_dns')) + + +class sockssocket(socket.socket): + def __init__(self, *args, **kwargs): + self._proxy = None + super(sockssocket, self).__init__(*args, **kwargs) + + def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): + assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) + + self._proxy = Proxy(proxytype, addr, port, username, password, rdns) + + def recvall(self, cnt): + data = b'' + while len(data) < cnt: + cur = self.recv(cnt - len(data)) + if not cur: + raise EOFError('{0} bytes missing'.format(cnt - len(data))) + data += cur + return data + + def _recv_bytes(self, cnt): + data = self.recvall(cnt) + return compat_struct_unpack('!{0}B'.format(cnt), data) + + @staticmethod + def _len_and_data(data): + return compat_struct_pack('!B', len(data)) + data + + def _check_response_version(self, expected_version, got_version): + if got_version != expected_version: + self.close() + raise InvalidVersionError(expected_version, got_version) + + def _resolve_address(self, destaddr, default, use_remote_dns): + try: + return socket.inet_aton(destaddr) + except socket.error: + if use_remote_dns and self._proxy.remote_dns: + return default + else: + return socket.inet_aton(socket.gethostbyname(destaddr)) + + def _setup_socks4(self, address, is_4a=False): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) + + packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + + username = (self._proxy.username or '').encode('utf-8') + packet += username + b'\x00' + + if is_4a and self._proxy.remote_dns: + packet += destaddr.encode('utf-8') + b'\x00' + + self.sendall(packet) + + version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8)) + + self._check_response_version(SOCKS4_REPLY_VERSION, version) + + if resp_code != Socks4Error.ERR_SUCCESS: + self.close() + raise Socks4Error(resp_code) + + return (dsthost, dstport) + + def _setup_socks4a(self, address): + self._setup_socks4(address, is_4a=True) + + def _socks5_auth(self): + packet = compat_struct_pack('!B', SOCKS5_VERSION) + + auth_methods = [Socks5Auth.AUTH_NONE] + if self._proxy.username and self._proxy.password: + auth_methods.append(Socks5Auth.AUTH_USER_PASS) + + packet += compat_struct_pack('!B', len(auth_methods)) + packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) + + self.sendall(packet) + + version, method = self._recv_bytes(2) + + self._check_response_version(SOCKS5_VERSION, version) + + if method == Socks5Auth.AUTH_NO_ACCEPTABLE or ( + method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)): + self.close() + raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE) + + if method == Socks5Auth.AUTH_USER_PASS: + username = self._proxy.username.encode('utf-8') + password = self._proxy.password.encode('utf-8') + packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet += self._len_and_data(username) + self._len_and_data(password) + self.sendall(packet) + + version, status = self._recv_bytes(2) + + self._check_response_version(SOCKS5_USER_AUTH_VERSION, version) + + if status != SOCKS5_USER_AUTH_SUCCESS: + self.close() + raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) + + def _setup_socks5(self, address): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True) + + self._socks5_auth() + + reserved = 0 + packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) + if ipaddr is None: + destaddr = destaddr.encode('utf-8') + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += self._len_and_data(destaddr) + else: + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr + packet += compat_struct_pack('!H', port) + + self.sendall(packet) + + version, status, reserved, atype = self._recv_bytes(4) + + self._check_response_version(SOCKS5_VERSION, version) + + if status != Socks5Error.ERR_SUCCESS: + self.close() + raise Socks5Error(status) + + if atype == Socks5AddressType.ATYP_IPV4: + destaddr = self.recvall(4) + elif atype == Socks5AddressType.ATYP_DOMAINNAME: + alen = compat_ord(self.recv(1)) + destaddr = self.recvall(alen) + elif atype == Socks5AddressType.ATYP_IPV6: + destaddr = self.recvall(16) + destport = compat_struct_unpack('!H', self.recvall(2))[0] + + return (destaddr, destport) + + def _make_proxy(self, connect_func, address): + if not self._proxy: + return connect_func(self, address) + + result = connect_func(self, (self._proxy.host, self._proxy.port)) + if result != 0 and result is not None: + return result + setup_funcs = { + ProxyType.SOCKS4: self._setup_socks4, + ProxyType.SOCKS4A: self._setup_socks4a, + ProxyType.SOCKS5: self._setup_socks5, + } + setup_funcs[self._proxy.type](address) + return result + + def connect(self, address): + self._make_proxy(socket.socket.connect, address) + + def connect_ex(self, address): + return self._make_proxy(socket.socket.connect_ex, address) diff --git a/src/music_kraken/pages/youtube_music/yt_utils/utils.py b/src/music_kraken/pages/youtube_music/yt_utils/utils.py new file mode 100644 index 0000000..61b94d8 --- /dev/null +++ b/src/music_kraken/pages/youtube_music/yt_utils/utils.py @@ -0,0 +1,6513 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import base64 +import binascii +import calendar +import codecs +import collections +import contextlib +import ctypes +import datetime +import email.utils +import email.header +import errno +import functools +import inspect +import io +import itertools +import json +import locale +import math +import operator +import os +import platform +import random +import re +import socket +import ssl +import subprocess +import sys +import tempfile +import time +import traceback +import unicodedata +import xml.etree.ElementTree +import zlib + +from .compat import ( + compat_HTMLParseError, + compat_HTMLParser, + compat_basestring, + compat_brotli as brotli, + compat_casefold, + compat_chr, + compat_collections_abc, + compat_cookiejar, + compat_ctypes_WINFUNCTYPE, + compat_datetime_timedelta_total_seconds, + compat_etree_fromstring, + compat_expanduser, + compat_html_entities, + compat_html_entities_html5, + compat_http_client, + compat_integer_types, + compat_kwargs, + compat_ncompress as ncompress, + compat_os_name, + compat_re_Match, + compat_re_Pattern, + compat_shlex_quote, + compat_str, + compat_struct_pack, + compat_struct_unpack, + compat_urllib_error, + compat_urllib_HTTPError, + compat_urllib_parse, + compat_urllib_parse_parse_qs as compat_parse_qs, + compat_urllib_parse_urlencode, + compat_urllib_parse_urlparse, + compat_urllib_parse_unquote_plus, + compat_urllib_request, + compat_xpath, +) + +from .socks import ( + ProxyType, + sockssocket, +) + + +def register_socks_protocols(): + # "Register" SOCKS protocols + # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 + # URLs with protocols not in urlparse.uses_netloc are not handled correctly + for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): + if scheme not in compat_urllib_parse.uses_netloc: + compat_urllib_parse.uses_netloc.append(scheme) + + +# Unfavoured alias +compiled_regex_type = compat_re_Pattern + + +def random_user_agent(): + _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' + _CHROME_VERSIONS = ( + '74.0.3729.129', + '76.0.3780.3', + '76.0.3780.2', + '74.0.3729.128', + '76.0.3780.1', + '76.0.3780.0', + '75.0.3770.15', + '74.0.3729.127', + '74.0.3729.126', + '76.0.3779.1', + '76.0.3779.0', + '75.0.3770.14', + '74.0.3729.125', + '76.0.3778.1', + '76.0.3778.0', + '75.0.3770.13', + '74.0.3729.124', + '74.0.3729.123', + '73.0.3683.121', + '76.0.3777.1', + '76.0.3777.0', + '75.0.3770.12', + '74.0.3729.122', + '76.0.3776.4', + '75.0.3770.11', + '74.0.3729.121', + '76.0.3776.3', + '76.0.3776.2', + '73.0.3683.120', + '74.0.3729.120', + '74.0.3729.119', + '74.0.3729.118', + '76.0.3776.1', + '76.0.3776.0', + '76.0.3775.5', + '75.0.3770.10', + '74.0.3729.117', + '76.0.3775.4', + '76.0.3775.3', + '74.0.3729.116', + '75.0.3770.9', + '76.0.3775.2', + '76.0.3775.1', + '76.0.3775.0', + '75.0.3770.8', + '74.0.3729.115', + '74.0.3729.114', + '76.0.3774.1', + '76.0.3774.0', + '75.0.3770.7', + '74.0.3729.113', + '74.0.3729.112', + '74.0.3729.111', + '76.0.3773.1', + '76.0.3773.0', + '75.0.3770.6', + '74.0.3729.110', + '74.0.3729.109', + '76.0.3772.1', + '76.0.3772.0', + '75.0.3770.5', + '74.0.3729.108', + '74.0.3729.107', + '76.0.3771.1', + '76.0.3771.0', + '75.0.3770.4', + '74.0.3729.106', + '74.0.3729.105', + '75.0.3770.3', + '74.0.3729.104', + '74.0.3729.103', + '74.0.3729.102', + '75.0.3770.2', + '74.0.3729.101', + '75.0.3770.1', + '75.0.3770.0', + '74.0.3729.100', + '75.0.3769.5', + '75.0.3769.4', + '74.0.3729.99', + '75.0.3769.3', + '75.0.3769.2', + '75.0.3768.6', + '74.0.3729.98', + '75.0.3769.1', + '75.0.3769.0', + '74.0.3729.97', + '73.0.3683.119', + '73.0.3683.118', + '74.0.3729.96', + '75.0.3768.5', + '75.0.3768.4', + '75.0.3768.3', + '75.0.3768.2', + '74.0.3729.95', + '74.0.3729.94', + '75.0.3768.1', + '75.0.3768.0', + '74.0.3729.93', + '74.0.3729.92', + '73.0.3683.117', + '74.0.3729.91', + '75.0.3766.3', + '74.0.3729.90', + '75.0.3767.2', + '75.0.3767.1', + '75.0.3767.0', + '74.0.3729.89', + '73.0.3683.116', + '75.0.3766.2', + '74.0.3729.88', + '75.0.3766.1', + '75.0.3766.0', + '74.0.3729.87', + '73.0.3683.115', + '74.0.3729.86', + '75.0.3765.1', + '75.0.3765.0', + '74.0.3729.85', + '73.0.3683.114', + '74.0.3729.84', + '75.0.3764.1', + '75.0.3764.0', + '74.0.3729.83', + '73.0.3683.113', + '75.0.3763.2', + '75.0.3761.4', + '74.0.3729.82', + '75.0.3763.1', + '75.0.3763.0', + '74.0.3729.81', + '73.0.3683.112', + '75.0.3762.1', + '75.0.3762.0', + '74.0.3729.80', + '75.0.3761.3', + '74.0.3729.79', + '73.0.3683.111', + '75.0.3761.2', + '74.0.3729.78', + '74.0.3729.77', + '75.0.3761.1', + '75.0.3761.0', + '73.0.3683.110', + '74.0.3729.76', + '74.0.3729.75', + '75.0.3760.0', + '74.0.3729.74', + '75.0.3759.8', + '75.0.3759.7', + '75.0.3759.6', + '74.0.3729.73', + '75.0.3759.5', + '74.0.3729.72', + '73.0.3683.109', + '75.0.3759.4', + '75.0.3759.3', + '74.0.3729.71', + '75.0.3759.2', + '74.0.3729.70', + '73.0.3683.108', + '74.0.3729.69', + '75.0.3759.1', + '75.0.3759.0', + '74.0.3729.68', + '73.0.3683.107', + '74.0.3729.67', + '75.0.3758.1', + '75.0.3758.0', + '74.0.3729.66', + '73.0.3683.106', + '74.0.3729.65', + '75.0.3757.1', + '75.0.3757.0', + '74.0.3729.64', + '73.0.3683.105', + '74.0.3729.63', + '75.0.3756.1', + '75.0.3756.0', + '74.0.3729.62', + '73.0.3683.104', + '75.0.3755.3', + '75.0.3755.2', + '73.0.3683.103', + '75.0.3755.1', + '75.0.3755.0', + '74.0.3729.61', + '73.0.3683.102', + '74.0.3729.60', + '75.0.3754.2', + '74.0.3729.59', + '75.0.3753.4', + '74.0.3729.58', + '75.0.3754.1', + '75.0.3754.0', + '74.0.3729.57', + '73.0.3683.101', + '75.0.3753.3', + '75.0.3752.2', + '75.0.3753.2', + '74.0.3729.56', + '75.0.3753.1', + '75.0.3753.0', + '74.0.3729.55', + '73.0.3683.100', + '74.0.3729.54', + '75.0.3752.1', + '75.0.3752.0', + '74.0.3729.53', + '73.0.3683.99', + '74.0.3729.52', + '75.0.3751.1', + '75.0.3751.0', + '74.0.3729.51', + '73.0.3683.98', + '74.0.3729.50', + '75.0.3750.0', + '74.0.3729.49', + '74.0.3729.48', + '74.0.3729.47', + '75.0.3749.3', + '74.0.3729.46', + '73.0.3683.97', + '75.0.3749.2', + '74.0.3729.45', + '75.0.3749.1', + '75.0.3749.0', + '74.0.3729.44', + '73.0.3683.96', + '74.0.3729.43', + '74.0.3729.42', + '75.0.3748.1', + '75.0.3748.0', + '74.0.3729.41', + '75.0.3747.1', + '73.0.3683.95', + '75.0.3746.4', + '74.0.3729.40', + '74.0.3729.39', + '75.0.3747.0', + '75.0.3746.3', + '75.0.3746.2', + '74.0.3729.38', + '75.0.3746.1', + '75.0.3746.0', + '74.0.3729.37', + '73.0.3683.94', + '75.0.3745.5', + '75.0.3745.4', + '75.0.3745.3', + '75.0.3745.2', + '74.0.3729.36', + '75.0.3745.1', + '75.0.3745.0', + '75.0.3744.2', + '74.0.3729.35', + '73.0.3683.93', + '74.0.3729.34', + '75.0.3744.1', + '75.0.3744.0', + '74.0.3729.33', + '73.0.3683.92', + '74.0.3729.32', + '74.0.3729.31', + '73.0.3683.91', + '75.0.3741.2', + '75.0.3740.5', + '74.0.3729.30', + '75.0.3741.1', + '75.0.3741.0', + '74.0.3729.29', + '75.0.3740.4', + '73.0.3683.90', + '74.0.3729.28', + '75.0.3740.3', + '73.0.3683.89', + '75.0.3740.2', + '74.0.3729.27', + '75.0.3740.1', + '75.0.3740.0', + '74.0.3729.26', + '73.0.3683.88', + '73.0.3683.87', + '74.0.3729.25', + '75.0.3739.1', + '75.0.3739.0', + '73.0.3683.86', + '74.0.3729.24', + '73.0.3683.85', + '75.0.3738.4', + '75.0.3738.3', + '75.0.3738.2', + '75.0.3738.1', + '75.0.3738.0', + '74.0.3729.23', + '73.0.3683.84', + '74.0.3729.22', + '74.0.3729.21', + '75.0.3737.1', + '75.0.3737.0', + '74.0.3729.20', + '73.0.3683.83', + '74.0.3729.19', + '75.0.3736.1', + '75.0.3736.0', + '74.0.3729.18', + '73.0.3683.82', + '74.0.3729.17', + '75.0.3735.1', + '75.0.3735.0', + '74.0.3729.16', + '73.0.3683.81', + '75.0.3734.1', + '75.0.3734.0', + '74.0.3729.15', + '73.0.3683.80', + '74.0.3729.14', + '75.0.3733.1', + '75.0.3733.0', + '75.0.3732.1', + '74.0.3729.13', + '74.0.3729.12', + '73.0.3683.79', + '74.0.3729.11', + '75.0.3732.0', + '74.0.3729.10', + '73.0.3683.78', + '74.0.3729.9', + '74.0.3729.8', + '74.0.3729.7', + '75.0.3731.3', + '75.0.3731.2', + '75.0.3731.0', + '74.0.3729.6', + '73.0.3683.77', + '73.0.3683.76', + '75.0.3730.5', + '75.0.3730.4', + '73.0.3683.75', + '74.0.3729.5', + '73.0.3683.74', + '75.0.3730.3', + '75.0.3730.2', + '74.0.3729.4', + '73.0.3683.73', + '73.0.3683.72', + '75.0.3730.1', + '75.0.3730.0', + '74.0.3729.3', + '73.0.3683.71', + '74.0.3729.2', + '73.0.3683.70', + '74.0.3729.1', + '74.0.3729.0', + '74.0.3726.4', + '73.0.3683.69', + '74.0.3726.3', + '74.0.3728.0', + '74.0.3726.2', + '73.0.3683.68', + '74.0.3726.1', + '74.0.3726.0', + '74.0.3725.4', + '73.0.3683.67', + '73.0.3683.66', + '74.0.3725.3', + '74.0.3725.2', + '74.0.3725.1', + '74.0.3724.8', + '74.0.3725.0', + '73.0.3683.65', + '74.0.3724.7', + '74.0.3724.6', + '74.0.3724.5', + '74.0.3724.4', + '74.0.3724.3', + '74.0.3724.2', + '74.0.3724.1', + '74.0.3724.0', + '73.0.3683.64', + '74.0.3723.1', + '74.0.3723.0', + '73.0.3683.63', + '74.0.3722.1', + '74.0.3722.0', + '73.0.3683.62', + '74.0.3718.9', + '74.0.3702.3', + '74.0.3721.3', + '74.0.3721.2', + '74.0.3721.1', + '74.0.3721.0', + '74.0.3720.6', + '73.0.3683.61', + '72.0.3626.122', + '73.0.3683.60', + '74.0.3720.5', + '72.0.3626.121', + '74.0.3718.8', + '74.0.3720.4', + '74.0.3720.3', + '74.0.3718.7', + '74.0.3720.2', + '74.0.3720.1', + '74.0.3720.0', + '74.0.3718.6', + '74.0.3719.5', + '73.0.3683.59', + '74.0.3718.5', + '74.0.3718.4', + '74.0.3719.4', + '74.0.3719.3', + '74.0.3719.2', + '74.0.3719.1', + '73.0.3683.58', + '74.0.3719.0', + '73.0.3683.57', + '73.0.3683.56', + '74.0.3718.3', + '73.0.3683.55', + '74.0.3718.2', + '74.0.3718.1', + '74.0.3718.0', + '73.0.3683.54', + '74.0.3717.2', + '73.0.3683.53', + '74.0.3717.1', + '74.0.3717.0', + '73.0.3683.52', + '74.0.3716.1', + '74.0.3716.0', + '73.0.3683.51', + '74.0.3715.1', + '74.0.3715.0', + '73.0.3683.50', + '74.0.3711.2', + '74.0.3714.2', + '74.0.3713.3', + '74.0.3714.1', + '74.0.3714.0', + '73.0.3683.49', + '74.0.3713.1', + '74.0.3713.0', + '72.0.3626.120', + '73.0.3683.48', + '74.0.3712.2', + '74.0.3712.1', + '74.0.3712.0', + '73.0.3683.47', + '72.0.3626.119', + '73.0.3683.46', + '74.0.3710.2', + '72.0.3626.118', + '74.0.3711.1', + '74.0.3711.0', + '73.0.3683.45', + '72.0.3626.117', + '74.0.3710.1', + '74.0.3710.0', + '73.0.3683.44', + '72.0.3626.116', + '74.0.3709.1', + '74.0.3709.0', + '74.0.3704.9', + '73.0.3683.43', + '72.0.3626.115', + '74.0.3704.8', + '74.0.3704.7', + '74.0.3708.0', + '74.0.3706.7', + '74.0.3704.6', + '73.0.3683.42', + '72.0.3626.114', + '74.0.3706.6', + '72.0.3626.113', + '74.0.3704.5', + '74.0.3706.5', + '74.0.3706.4', + '74.0.3706.3', + '74.0.3706.2', + '74.0.3706.1', + '74.0.3706.0', + '73.0.3683.41', + '72.0.3626.112', + '74.0.3705.1', + '74.0.3705.0', + '73.0.3683.40', + '72.0.3626.111', + '73.0.3683.39', + '74.0.3704.4', + '73.0.3683.38', + '74.0.3704.3', + '74.0.3704.2', + '74.0.3704.1', + '74.0.3704.0', + '73.0.3683.37', + '72.0.3626.110', + '72.0.3626.109', + '74.0.3703.3', + '74.0.3703.2', + '73.0.3683.36', + '74.0.3703.1', + '74.0.3703.0', + '73.0.3683.35', + '72.0.3626.108', + '74.0.3702.2', + '74.0.3699.3', + '74.0.3702.1', + '74.0.3702.0', + '73.0.3683.34', + '72.0.3626.107', + '73.0.3683.33', + '74.0.3701.1', + '74.0.3701.0', + '73.0.3683.32', + '73.0.3683.31', + '72.0.3626.105', + '74.0.3700.1', + '74.0.3700.0', + '73.0.3683.29', + '72.0.3626.103', + '74.0.3699.2', + '74.0.3699.1', + '74.0.3699.0', + '73.0.3683.28', + '72.0.3626.102', + '73.0.3683.27', + '73.0.3683.26', + '74.0.3698.0', + '74.0.3696.2', + '72.0.3626.101', + '73.0.3683.25', + '74.0.3696.1', + '74.0.3696.0', + '74.0.3694.8', + '72.0.3626.100', + '74.0.3694.7', + '74.0.3694.6', + '74.0.3694.5', + '74.0.3694.4', + '72.0.3626.99', + '72.0.3626.98', + '74.0.3694.3', + '73.0.3683.24', + '72.0.3626.97', + '72.0.3626.96', + '72.0.3626.95', + '73.0.3683.23', + '72.0.3626.94', + '73.0.3683.22', + '73.0.3683.21', + '72.0.3626.93', + '74.0.3694.2', + '72.0.3626.92', + '74.0.3694.1', + '74.0.3694.0', + '74.0.3693.6', + '73.0.3683.20', + '72.0.3626.91', + '74.0.3693.5', + '74.0.3693.4', + '74.0.3693.3', + '74.0.3693.2', + '73.0.3683.19', + '74.0.3693.1', + '74.0.3693.0', + '73.0.3683.18', + '72.0.3626.90', + '74.0.3692.1', + '74.0.3692.0', + '73.0.3683.17', + '72.0.3626.89', + '74.0.3687.3', + '74.0.3691.1', + '74.0.3691.0', + '73.0.3683.16', + '72.0.3626.88', + '72.0.3626.87', + '73.0.3683.15', + '74.0.3690.1', + '74.0.3690.0', + '73.0.3683.14', + '72.0.3626.86', + '73.0.3683.13', + '73.0.3683.12', + '74.0.3689.1', + '74.0.3689.0', + '73.0.3683.11', + '72.0.3626.85', + '73.0.3683.10', + '72.0.3626.84', + '73.0.3683.9', + '74.0.3688.1', + '74.0.3688.0', + '73.0.3683.8', + '72.0.3626.83', + '74.0.3687.2', + '74.0.3687.1', + '74.0.3687.0', + '73.0.3683.7', + '72.0.3626.82', + '74.0.3686.4', + '72.0.3626.81', + '74.0.3686.3', + '74.0.3686.2', + '74.0.3686.1', + '74.0.3686.0', + '73.0.3683.6', + '72.0.3626.80', + '74.0.3685.1', + '74.0.3685.0', + '73.0.3683.5', + '72.0.3626.79', + '74.0.3684.1', + '74.0.3684.0', + '73.0.3683.4', + '72.0.3626.78', + '72.0.3626.77', + '73.0.3683.3', + '73.0.3683.2', + '72.0.3626.76', + '73.0.3683.1', + '73.0.3683.0', + '72.0.3626.75', + '71.0.3578.141', + '73.0.3682.1', + '73.0.3682.0', + '72.0.3626.74', + '71.0.3578.140', + '73.0.3681.4', + '73.0.3681.3', + '73.0.3681.2', + '73.0.3681.1', + '73.0.3681.0', + '72.0.3626.73', + '71.0.3578.139', + '72.0.3626.72', + '72.0.3626.71', + '73.0.3680.1', + '73.0.3680.0', + '72.0.3626.70', + '71.0.3578.138', + '73.0.3678.2', + '73.0.3679.1', + '73.0.3679.0', + '72.0.3626.69', + '71.0.3578.137', + '73.0.3678.1', + '73.0.3678.0', + '71.0.3578.136', + '73.0.3677.1', + '73.0.3677.0', + '72.0.3626.68', + '72.0.3626.67', + '71.0.3578.135', + '73.0.3676.1', + '73.0.3676.0', + '73.0.3674.2', + '72.0.3626.66', + '71.0.3578.134', + '73.0.3674.1', + '73.0.3674.0', + '72.0.3626.65', + '71.0.3578.133', + '73.0.3673.2', + '73.0.3673.1', + '73.0.3673.0', + '72.0.3626.64', + '71.0.3578.132', + '72.0.3626.63', + '72.0.3626.62', + '72.0.3626.61', + '72.0.3626.60', + '73.0.3672.1', + '73.0.3672.0', + '72.0.3626.59', + '71.0.3578.131', + '73.0.3671.3', + '73.0.3671.2', + '73.0.3671.1', + '73.0.3671.0', + '72.0.3626.58', + '71.0.3578.130', + '73.0.3670.1', + '73.0.3670.0', + '72.0.3626.57', + '71.0.3578.129', + '73.0.3669.1', + '73.0.3669.0', + '72.0.3626.56', + '71.0.3578.128', + '73.0.3668.2', + '73.0.3668.1', + '73.0.3668.0', + '72.0.3626.55', + '71.0.3578.127', + '73.0.3667.2', + '73.0.3667.1', + '73.0.3667.0', + '72.0.3626.54', + '71.0.3578.126', + '73.0.3666.1', + '73.0.3666.0', + '72.0.3626.53', + '71.0.3578.125', + '73.0.3665.4', + '73.0.3665.3', + '72.0.3626.52', + '73.0.3665.2', + '73.0.3664.4', + '73.0.3665.1', + '73.0.3665.0', + '72.0.3626.51', + '71.0.3578.124', + '72.0.3626.50', + '73.0.3664.3', + '73.0.3664.2', + '73.0.3664.1', + '73.0.3664.0', + '73.0.3663.2', + '72.0.3626.49', + '71.0.3578.123', + '73.0.3663.1', + '73.0.3663.0', + '72.0.3626.48', + '71.0.3578.122', + '73.0.3662.1', + '73.0.3662.0', + '72.0.3626.47', + '71.0.3578.121', + '73.0.3661.1', + '72.0.3626.46', + '73.0.3661.0', + '72.0.3626.45', + '71.0.3578.120', + '73.0.3660.2', + '73.0.3660.1', + '73.0.3660.0', + '72.0.3626.44', + '71.0.3578.119', + '73.0.3659.1', + '73.0.3659.0', + '72.0.3626.43', + '71.0.3578.118', + '73.0.3658.1', + '73.0.3658.0', + '72.0.3626.42', + '71.0.3578.117', + '73.0.3657.1', + '73.0.3657.0', + '72.0.3626.41', + '71.0.3578.116', + '73.0.3656.1', + '73.0.3656.0', + '72.0.3626.40', + '71.0.3578.115', + '73.0.3655.1', + '73.0.3655.0', + '72.0.3626.39', + '71.0.3578.114', + '73.0.3654.1', + '73.0.3654.0', + '72.0.3626.38', + '71.0.3578.113', + '73.0.3653.1', + '73.0.3653.0', + '72.0.3626.37', + '71.0.3578.112', + '73.0.3652.1', + '73.0.3652.0', + '72.0.3626.36', + '71.0.3578.111', + '73.0.3651.1', + '73.0.3651.0', + '72.0.3626.35', + '71.0.3578.110', + '73.0.3650.1', + '73.0.3650.0', + '72.0.3626.34', + '71.0.3578.109', + '73.0.3649.1', + '73.0.3649.0', + '72.0.3626.33', + '71.0.3578.108', + '73.0.3648.2', + '73.0.3648.1', + '73.0.3648.0', + '72.0.3626.32', + '71.0.3578.107', + '73.0.3647.2', + '73.0.3647.1', + '73.0.3647.0', + '72.0.3626.31', + '71.0.3578.106', + '73.0.3635.3', + '73.0.3646.2', + '73.0.3646.1', + '73.0.3646.0', + '72.0.3626.30', + '71.0.3578.105', + '72.0.3626.29', + '73.0.3645.2', + '73.0.3645.1', + '73.0.3645.0', + '72.0.3626.28', + '71.0.3578.104', + '72.0.3626.27', + '72.0.3626.26', + '72.0.3626.25', + '72.0.3626.24', + '73.0.3644.0', + '73.0.3643.2', + '72.0.3626.23', + '71.0.3578.103', + '73.0.3643.1', + '73.0.3643.0', + '72.0.3626.22', + '71.0.3578.102', + '73.0.3642.1', + '73.0.3642.0', + '72.0.3626.21', + '71.0.3578.101', + '73.0.3641.1', + '73.0.3641.0', + '72.0.3626.20', + '71.0.3578.100', + '72.0.3626.19', + '73.0.3640.1', + '73.0.3640.0', + '72.0.3626.18', + '73.0.3639.1', + '71.0.3578.99', + '73.0.3639.0', + '72.0.3626.17', + '73.0.3638.2', + '72.0.3626.16', + '73.0.3638.1', + '73.0.3638.0', + '72.0.3626.15', + '71.0.3578.98', + '73.0.3635.2', + '71.0.3578.97', + '73.0.3637.1', + '73.0.3637.0', + '72.0.3626.14', + '71.0.3578.96', + '71.0.3578.95', + '72.0.3626.13', + '71.0.3578.94', + '73.0.3636.2', + '71.0.3578.93', + '73.0.3636.1', + '73.0.3636.0', + '72.0.3626.12', + '71.0.3578.92', + '73.0.3635.1', + '73.0.3635.0', + '72.0.3626.11', + '71.0.3578.91', + '73.0.3634.2', + '73.0.3634.1', + '73.0.3634.0', + '72.0.3626.10', + '71.0.3578.90', + '71.0.3578.89', + '73.0.3633.2', + '73.0.3633.1', + '73.0.3633.0', + '72.0.3610.4', + '72.0.3626.9', + '71.0.3578.88', + '73.0.3632.5', + '73.0.3632.4', + '73.0.3632.3', + '73.0.3632.2', + '73.0.3632.1', + '73.0.3632.0', + '72.0.3626.8', + '71.0.3578.87', + '73.0.3631.2', + '73.0.3631.1', + '73.0.3631.0', + '72.0.3626.7', + '71.0.3578.86', + '72.0.3626.6', + '73.0.3630.1', + '73.0.3630.0', + '72.0.3626.5', + '71.0.3578.85', + '72.0.3626.4', + '73.0.3628.3', + '73.0.3628.2', + '73.0.3629.1', + '73.0.3629.0', + '72.0.3626.3', + '71.0.3578.84', + '73.0.3628.1', + '73.0.3628.0', + '71.0.3578.83', + '73.0.3627.1', + '73.0.3627.0', + '72.0.3626.2', + '71.0.3578.82', + '71.0.3578.81', + '71.0.3578.80', + '72.0.3626.1', + '72.0.3626.0', + '71.0.3578.79', + '70.0.3538.124', + '71.0.3578.78', + '72.0.3623.4', + '72.0.3625.2', + '72.0.3625.1', + '72.0.3625.0', + '71.0.3578.77', + '70.0.3538.123', + '72.0.3624.4', + '72.0.3624.3', + '72.0.3624.2', + '71.0.3578.76', + '72.0.3624.1', + '72.0.3624.0', + '72.0.3623.3', + '71.0.3578.75', + '70.0.3538.122', + '71.0.3578.74', + '72.0.3623.2', + '72.0.3610.3', + '72.0.3623.1', + '72.0.3623.0', + '72.0.3622.3', + '72.0.3622.2', + '71.0.3578.73', + '70.0.3538.121', + '72.0.3622.1', + '72.0.3622.0', + '71.0.3578.72', + '70.0.3538.120', + '72.0.3621.1', + '72.0.3621.0', + '71.0.3578.71', + '70.0.3538.119', + '72.0.3620.1', + '72.0.3620.0', + '71.0.3578.70', + '70.0.3538.118', + '71.0.3578.69', + '72.0.3619.1', + '72.0.3619.0', + '71.0.3578.68', + '70.0.3538.117', + '71.0.3578.67', + '72.0.3618.1', + '72.0.3618.0', + '71.0.3578.66', + '70.0.3538.116', + '72.0.3617.1', + '72.0.3617.0', + '71.0.3578.65', + '70.0.3538.115', + '72.0.3602.3', + '71.0.3578.64', + '72.0.3616.1', + '72.0.3616.0', + '71.0.3578.63', + '70.0.3538.114', + '71.0.3578.62', + '72.0.3615.1', + '72.0.3615.0', + '71.0.3578.61', + '70.0.3538.113', + '72.0.3614.1', + '72.0.3614.0', + '71.0.3578.60', + '70.0.3538.112', + '72.0.3613.1', + '72.0.3613.0', + '71.0.3578.59', + '70.0.3538.111', + '72.0.3612.2', + '72.0.3612.1', + '72.0.3612.0', + '70.0.3538.110', + '71.0.3578.58', + '70.0.3538.109', + '72.0.3611.2', + '72.0.3611.1', + '72.0.3611.0', + '71.0.3578.57', + '70.0.3538.108', + '72.0.3610.2', + '71.0.3578.56', + '71.0.3578.55', + '72.0.3610.1', + '72.0.3610.0', + '71.0.3578.54', + '70.0.3538.107', + '71.0.3578.53', + '72.0.3609.3', + '71.0.3578.52', + '72.0.3609.2', + '71.0.3578.51', + '72.0.3608.5', + '72.0.3609.1', + '72.0.3609.0', + '71.0.3578.50', + '70.0.3538.106', + '72.0.3608.4', + '72.0.3608.3', + '72.0.3608.2', + '71.0.3578.49', + '72.0.3608.1', + '72.0.3608.0', + '70.0.3538.105', + '71.0.3578.48', + '72.0.3607.1', + '72.0.3607.0', + '71.0.3578.47', + '70.0.3538.104', + '72.0.3606.2', + '72.0.3606.1', + '72.0.3606.0', + '71.0.3578.46', + '70.0.3538.103', + '70.0.3538.102', + '72.0.3605.3', + '72.0.3605.2', + '72.0.3605.1', + '72.0.3605.0', + '71.0.3578.45', + '70.0.3538.101', + '71.0.3578.44', + '71.0.3578.43', + '70.0.3538.100', + '70.0.3538.99', + '71.0.3578.42', + '72.0.3604.1', + '72.0.3604.0', + '71.0.3578.41', + '70.0.3538.98', + '71.0.3578.40', + '72.0.3603.2', + '72.0.3603.1', + '72.0.3603.0', + '71.0.3578.39', + '70.0.3538.97', + '72.0.3602.2', + '71.0.3578.38', + '71.0.3578.37', + '72.0.3602.1', + '72.0.3602.0', + '71.0.3578.36', + '70.0.3538.96', + '72.0.3601.1', + '72.0.3601.0', + '71.0.3578.35', + '70.0.3538.95', + '72.0.3600.1', + '72.0.3600.0', + '71.0.3578.34', + '70.0.3538.94', + '72.0.3599.3', + '72.0.3599.2', + '72.0.3599.1', + '72.0.3599.0', + '71.0.3578.33', + '70.0.3538.93', + '72.0.3598.1', + '72.0.3598.0', + '71.0.3578.32', + '70.0.3538.87', + '72.0.3597.1', + '72.0.3597.0', + '72.0.3596.2', + '71.0.3578.31', + '70.0.3538.86', + '71.0.3578.30', + '71.0.3578.29', + '72.0.3596.1', + '72.0.3596.0', + '71.0.3578.28', + '70.0.3538.85', + '72.0.3595.2', + '72.0.3591.3', + '72.0.3595.1', + '72.0.3595.0', + '71.0.3578.27', + '70.0.3538.84', + '72.0.3594.1', + '72.0.3594.0', + '71.0.3578.26', + '70.0.3538.83', + '72.0.3593.2', + '72.0.3593.1', + '72.0.3593.0', + '71.0.3578.25', + '70.0.3538.82', + '72.0.3589.3', + '72.0.3592.2', + '72.0.3592.1', + '72.0.3592.0', + '71.0.3578.24', + '72.0.3589.2', + '70.0.3538.81', + '70.0.3538.80', + '72.0.3591.2', + '72.0.3591.1', + '72.0.3591.0', + '71.0.3578.23', + '70.0.3538.79', + '71.0.3578.22', + '72.0.3590.1', + '72.0.3590.0', + '71.0.3578.21', + '70.0.3538.78', + '70.0.3538.77', + '72.0.3589.1', + '72.0.3589.0', + '71.0.3578.20', + '70.0.3538.76', + '71.0.3578.19', + '70.0.3538.75', + '72.0.3588.1', + '72.0.3588.0', + '71.0.3578.18', + '70.0.3538.74', + '72.0.3586.2', + '72.0.3587.0', + '71.0.3578.17', + '70.0.3538.73', + '72.0.3586.1', + '72.0.3586.0', + '71.0.3578.16', + '70.0.3538.72', + '72.0.3585.1', + '72.0.3585.0', + '71.0.3578.15', + '70.0.3538.71', + '71.0.3578.14', + '72.0.3584.1', + '72.0.3584.0', + '71.0.3578.13', + '70.0.3538.70', + '72.0.3583.2', + '71.0.3578.12', + '72.0.3583.1', + '72.0.3583.0', + '71.0.3578.11', + '70.0.3538.69', + '71.0.3578.10', + '72.0.3582.0', + '72.0.3581.4', + '71.0.3578.9', + '70.0.3538.67', + '72.0.3581.3', + '72.0.3581.2', + '72.0.3581.1', + '72.0.3581.0', + '71.0.3578.8', + '70.0.3538.66', + '72.0.3580.1', + '72.0.3580.0', + '71.0.3578.7', + '70.0.3538.65', + '71.0.3578.6', + '72.0.3579.1', + '72.0.3579.0', + '71.0.3578.5', + '70.0.3538.64', + '71.0.3578.4', + '71.0.3578.3', + '71.0.3578.2', + '71.0.3578.1', + '71.0.3578.0', + '70.0.3538.63', + '69.0.3497.128', + '70.0.3538.62', + '70.0.3538.61', + '70.0.3538.60', + '70.0.3538.59', + '71.0.3577.1', + '71.0.3577.0', + '70.0.3538.58', + '69.0.3497.127', + '71.0.3576.2', + '71.0.3576.1', + '71.0.3576.0', + '70.0.3538.57', + '70.0.3538.56', + '71.0.3575.2', + '70.0.3538.55', + '69.0.3497.126', + '70.0.3538.54', + '71.0.3575.1', + '71.0.3575.0', + '71.0.3574.1', + '71.0.3574.0', + '70.0.3538.53', + '69.0.3497.125', + '70.0.3538.52', + '71.0.3573.1', + '71.0.3573.0', + '70.0.3538.51', + '69.0.3497.124', + '71.0.3572.1', + '71.0.3572.0', + '70.0.3538.50', + '69.0.3497.123', + '71.0.3571.2', + '70.0.3538.49', + '69.0.3497.122', + '71.0.3571.1', + '71.0.3571.0', + '70.0.3538.48', + '69.0.3497.121', + '71.0.3570.1', + '71.0.3570.0', + '70.0.3538.47', + '69.0.3497.120', + '71.0.3568.2', + '71.0.3569.1', + '71.0.3569.0', + '70.0.3538.46', + '69.0.3497.119', + '70.0.3538.45', + '71.0.3568.1', + '71.0.3568.0', + '70.0.3538.44', + '69.0.3497.118', + '70.0.3538.43', + '70.0.3538.42', + '71.0.3567.1', + '71.0.3567.0', + '70.0.3538.41', + '69.0.3497.117', + '71.0.3566.1', + '71.0.3566.0', + '70.0.3538.40', + '69.0.3497.116', + '71.0.3565.1', + '71.0.3565.0', + '70.0.3538.39', + '69.0.3497.115', + '71.0.3564.1', + '71.0.3564.0', + '70.0.3538.38', + '69.0.3497.114', + '71.0.3563.0', + '71.0.3562.2', + '70.0.3538.37', + '69.0.3497.113', + '70.0.3538.36', + '70.0.3538.35', + '71.0.3562.1', + '71.0.3562.0', + '70.0.3538.34', + '69.0.3497.112', + '70.0.3538.33', + '71.0.3561.1', + '71.0.3561.0', + '70.0.3538.32', + '69.0.3497.111', + '71.0.3559.6', + '71.0.3560.1', + '71.0.3560.0', + '71.0.3559.5', + '71.0.3559.4', + '70.0.3538.31', + '69.0.3497.110', + '71.0.3559.3', + '70.0.3538.30', + '69.0.3497.109', + '71.0.3559.2', + '71.0.3559.1', + '71.0.3559.0', + '70.0.3538.29', + '69.0.3497.108', + '71.0.3558.2', + '71.0.3558.1', + '71.0.3558.0', + '70.0.3538.28', + '69.0.3497.107', + '71.0.3557.2', + '71.0.3557.1', + '71.0.3557.0', + '70.0.3538.27', + '69.0.3497.106', + '71.0.3554.4', + '70.0.3538.26', + '71.0.3556.1', + '71.0.3556.0', + '70.0.3538.25', + '71.0.3554.3', + '69.0.3497.105', + '71.0.3554.2', + '70.0.3538.24', + '69.0.3497.104', + '71.0.3555.2', + '70.0.3538.23', + '71.0.3555.1', + '71.0.3555.0', + '70.0.3538.22', + '69.0.3497.103', + '71.0.3554.1', + '71.0.3554.0', + '70.0.3538.21', + '69.0.3497.102', + '71.0.3553.3', + '70.0.3538.20', + '69.0.3497.101', + '71.0.3553.2', + '69.0.3497.100', + '71.0.3553.1', + '71.0.3553.0', + '70.0.3538.19', + '69.0.3497.99', + '69.0.3497.98', + '69.0.3497.97', + '71.0.3552.6', + '71.0.3552.5', + '71.0.3552.4', + '71.0.3552.3', + '71.0.3552.2', + '71.0.3552.1', + '71.0.3552.0', + '70.0.3538.18', + '69.0.3497.96', + '71.0.3551.3', + '71.0.3551.2', + '71.0.3551.1', + '71.0.3551.0', + '70.0.3538.17', + '69.0.3497.95', + '71.0.3550.3', + '71.0.3550.2', + '71.0.3550.1', + '71.0.3550.0', + '70.0.3538.16', + '69.0.3497.94', + '71.0.3549.1', + '71.0.3549.0', + '70.0.3538.15', + '69.0.3497.93', + '69.0.3497.92', + '71.0.3548.1', + '71.0.3548.0', + '70.0.3538.14', + '69.0.3497.91', + '71.0.3547.1', + '71.0.3547.0', + '70.0.3538.13', + '69.0.3497.90', + '71.0.3546.2', + '69.0.3497.89', + '71.0.3546.1', + '71.0.3546.0', + '70.0.3538.12', + '69.0.3497.88', + '71.0.3545.4', + '71.0.3545.3', + '71.0.3545.2', + '71.0.3545.1', + '71.0.3545.0', + '70.0.3538.11', + '69.0.3497.87', + '71.0.3544.5', + '71.0.3544.4', + '71.0.3544.3', + '71.0.3544.2', + '71.0.3544.1', + '71.0.3544.0', + '69.0.3497.86', + '70.0.3538.10', + '69.0.3497.85', + '70.0.3538.9', + '69.0.3497.84', + '71.0.3543.4', + '70.0.3538.8', + '71.0.3543.3', + '71.0.3543.2', + '71.0.3543.1', + '71.0.3543.0', + '70.0.3538.7', + '69.0.3497.83', + '71.0.3542.2', + '71.0.3542.1', + '71.0.3542.0', + '70.0.3538.6', + '69.0.3497.82', + '69.0.3497.81', + '71.0.3541.1', + '71.0.3541.0', + '70.0.3538.5', + '69.0.3497.80', + '71.0.3540.1', + '71.0.3540.0', + '70.0.3538.4', + '69.0.3497.79', + '70.0.3538.3', + '71.0.3539.1', + '71.0.3539.0', + '69.0.3497.78', + '68.0.3440.134', + '69.0.3497.77', + '70.0.3538.2', + '70.0.3538.1', + '70.0.3538.0', + '69.0.3497.76', + '68.0.3440.133', + '69.0.3497.75', + '70.0.3537.2', + '70.0.3537.1', + '70.0.3537.0', + '69.0.3497.74', + '68.0.3440.132', + '70.0.3536.0', + '70.0.3535.5', + '70.0.3535.4', + '70.0.3535.3', + '69.0.3497.73', + '68.0.3440.131', + '70.0.3532.8', + '70.0.3532.7', + '69.0.3497.72', + '69.0.3497.71', + '70.0.3535.2', + '70.0.3535.1', + '70.0.3535.0', + '69.0.3497.70', + '68.0.3440.130', + '69.0.3497.69', + '68.0.3440.129', + '70.0.3534.4', + '70.0.3534.3', + '70.0.3534.2', + '70.0.3534.1', + '70.0.3534.0', + '69.0.3497.68', + '68.0.3440.128', + '70.0.3533.2', + '70.0.3533.1', + '70.0.3533.0', + '69.0.3497.67', + '68.0.3440.127', + '70.0.3532.6', + '70.0.3532.5', + '70.0.3532.4', + '69.0.3497.66', + '68.0.3440.126', + '70.0.3532.3', + '70.0.3532.2', + '70.0.3532.1', + '69.0.3497.60', + '69.0.3497.65', + '69.0.3497.64', + '70.0.3532.0', + '70.0.3531.0', + '70.0.3530.4', + '70.0.3530.3', + '70.0.3530.2', + '69.0.3497.58', + '68.0.3440.125', + '69.0.3497.57', + '69.0.3497.56', + '69.0.3497.55', + '69.0.3497.54', + '70.0.3530.1', + '70.0.3530.0', + '69.0.3497.53', + '68.0.3440.124', + '69.0.3497.52', + '70.0.3529.3', + '70.0.3529.2', + '70.0.3529.1', + '70.0.3529.0', + '69.0.3497.51', + '70.0.3528.4', + '68.0.3440.123', + '70.0.3528.3', + '70.0.3528.2', + '70.0.3528.1', + '70.0.3528.0', + '69.0.3497.50', + '68.0.3440.122', + '70.0.3527.1', + '70.0.3527.0', + '69.0.3497.49', + '68.0.3440.121', + '70.0.3526.1', + '70.0.3526.0', + '68.0.3440.120', + '69.0.3497.48', + '69.0.3497.47', + '68.0.3440.119', + '68.0.3440.118', + '70.0.3525.5', + '70.0.3525.4', + '70.0.3525.3', + '68.0.3440.117', + '69.0.3497.46', + '70.0.3525.2', + '70.0.3525.1', + '70.0.3525.0', + '69.0.3497.45', + '68.0.3440.116', + '70.0.3524.4', + '70.0.3524.3', + '69.0.3497.44', + '70.0.3524.2', + '70.0.3524.1', + '70.0.3524.0', + '70.0.3523.2', + '69.0.3497.43', + '68.0.3440.115', + '70.0.3505.9', + '69.0.3497.42', + '70.0.3505.8', + '70.0.3523.1', + '70.0.3523.0', + '69.0.3497.41', + '68.0.3440.114', + '70.0.3505.7', + '69.0.3497.40', + '70.0.3522.1', + '70.0.3522.0', + '70.0.3521.2', + '69.0.3497.39', + '68.0.3440.113', + '70.0.3505.6', + '70.0.3521.1', + '70.0.3521.0', + '69.0.3497.38', + '68.0.3440.112', + '70.0.3520.1', + '70.0.3520.0', + '69.0.3497.37', + '68.0.3440.111', + '70.0.3519.3', + '70.0.3519.2', + '70.0.3519.1', + '70.0.3519.0', + '69.0.3497.36', + '68.0.3440.110', + '70.0.3518.1', + '70.0.3518.0', + '69.0.3497.35', + '69.0.3497.34', + '68.0.3440.109', + '70.0.3517.1', + '70.0.3517.0', + '69.0.3497.33', + '68.0.3440.108', + '69.0.3497.32', + '70.0.3516.3', + '70.0.3516.2', + '70.0.3516.1', + '70.0.3516.0', + '69.0.3497.31', + '68.0.3440.107', + '70.0.3515.4', + '68.0.3440.106', + '70.0.3515.3', + '70.0.3515.2', + '70.0.3515.1', + '70.0.3515.0', + '69.0.3497.30', + '68.0.3440.105', + '68.0.3440.104', + '70.0.3514.2', + '70.0.3514.1', + '70.0.3514.0', + '69.0.3497.29', + '68.0.3440.103', + '70.0.3513.1', + '70.0.3513.0', + '69.0.3497.28', + ) + return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + + +std_headers = { + 'User-Agent': random_user_agent(), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-us,en;q=0.5', +} + + +USER_AGENTS = { + 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', +} + + +NO_DEFAULT = object() +IDENTITY = lambda x: x + +ENGLISH_MONTH_NAMES = [ + 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', 'December'] + +MONTH_NAMES = { + 'en': ENGLISH_MONTH_NAMES, + 'fr': [ + 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], +} + +# Timezone names for RFC2822 obs-zone +# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42 +TIMEZONE_NAMES = { + 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0, + 'AST': -4, 'ADT': -3, # Atlantic (used in Canada) + 'EST': -5, 'EDT': -4, # Eastern + 'CST': -6, 'CDT': -5, # Central + 'MST': -7, 'MDT': -6, # Mountain + 'PST': -8, 'PDT': -7 # Pacific +} + +KNOWN_EXTENSIONS = ( + 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', + 'flv', 'f4v', 'f4a', 'f4b', + 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', + 'mkv', 'mka', 'mk3d', + 'avi', 'divx', + 'mov', + 'asf', 'wmv', 'wma', + '3gp', '3g2', + 'mp3', + 'flac', + 'ape', + 'wav', + 'f4f', 'f4m', 'm3u8', 'smil') + +# needed for sanitizing filenames in restricted mode +ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) + +DATE_FORMATS = ( + '%d %B %Y', + '%d %b %Y', + '%B %d %Y', + '%B %dst %Y', + '%B %dnd %Y', + '%B %drd %Y', + '%B %dth %Y', + '%b %d %Y', + '%b %dst %Y', + '%b %dnd %Y', + '%b %drd %Y', + '%b %dth %Y', + '%b %dst %Y %I:%M', + '%b %dnd %Y %I:%M', + '%b %drd %Y %I:%M', + '%b %dth %Y %I:%M', + '%Y %m %d', + '%Y-%m-%d', + '%Y.%m.%d.', + '%Y/%m/%d', + '%Y/%m/%d %H:%M', + '%Y/%m/%d %H:%M:%S', + '%Y%m%d%H%M', + '%Y%m%d%H%M%S', + '%Y%m%d', + '%Y-%m-%d %H:%M', + '%Y-%m-%d %H:%M:%S', + '%Y-%m-%d %H:%M:%S.%f', + '%Y-%m-%d %H:%M:%S:%f', + '%d.%m.%Y %H:%M', + '%d.%m.%Y %H.%M', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S.%fZ', + '%Y-%m-%dT%H:%M:%S.%f0Z', + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%S.%f', + '%Y-%m-%dT%H:%M', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', + '%B %d %Y at %H:%M', + '%B %d %Y at %H:%M:%S', + '%H:%M %d-%b-%Y', +) + +DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) +DATE_FORMATS_DAY_FIRST.extend([ + '%d-%m-%Y', + '%d.%m.%Y', + '%d.%m.%y', + '%d/%m/%Y', + '%d/%m/%y', + '%d/%m/%Y %H:%M:%S', + '%d-%m-%Y %H:%M', +]) + +DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) +DATE_FORMATS_MONTH_FIRST.extend([ + '%m-%d-%Y', + '%m.%d.%Y', + '%m/%d/%Y', + '%m/%d/%y', + '%m/%d/%Y %H:%M:%S', +]) + +PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" +JSON_LD_RE = r'(?is)]+type=(["\']?)application/ld\+json\1[^>]*>(?P.+?)' + + +def preferredencoding(): + """Get preferred encoding. + + Returns the best encoding scheme for the system, based on + locale.getpreferredencoding() and some further tweaks. + """ + try: + pref = locale.getpreferredencoding() + 'TEST'.encode(pref) + except Exception: + pref = 'UTF-8' + + return pref + + +def write_json_file(obj, fn): + """ Encode obj as JSON and write it to fn, atomically if possible """ + + fn = encodeFilename(fn) + if sys.version_info < (3, 0) and sys.platform != 'win32': + encoding = get_filesystem_encoding() + # os.path.basename returns a bytes object, but NamedTemporaryFile + # will fail if the filename contains non-ascii characters unless we + # use a unicode object + path_basename = lambda f: os.path.basename(f).decode(encoding) + # the same for os.path.dirname + path_dirname = lambda f: os.path.dirname(f).decode(encoding) + else: + path_basename = os.path.basename + path_dirname = os.path.dirname + + args = { + 'suffix': '.tmp', + 'prefix': path_basename(fn) + '.', + 'dir': path_dirname(fn), + 'delete': False, + } + + # In Python 2.x, json.dump expects a bytestream. + # In Python 3.x, it writes to a character stream + if sys.version_info < (3, 0): + args['mode'] = 'wb' + else: + args.update({ + 'mode': 'w', + 'encoding': 'utf-8', + }) + + tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) + + try: + with tf: + json.dump(obj, tf) + if sys.platform == 'win32': + # Need to remove existing file on Windows, else os.rename raises + # WindowsError or FileExistsError. + try: + os.unlink(fn) + except OSError: + pass + try: + mask = os.umask(0) + os.umask(mask) + os.chmod(tf.name, 0o666 & ~mask) + except OSError: + pass + os.rename(tf.name, fn) + except Exception: + try: + os.remove(tf.name) + except OSError: + pass + raise + + +if sys.version_info >= (2, 7): + def find_xpath_attr(node, xpath, key, val=None): + """ Find the xpath xpath[@key=val] """ + assert re.match(r'^[a-zA-Z_-]+$', key) + expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) + return node.find(expr) +else: + def find_xpath_attr(node, xpath, key, val=None): + for f in node.findall(compat_xpath(xpath)): + if key not in f.attrib: + continue + if val is None or f.attrib.get(key) == val: + return f + return None + + +# On python2.6 the xml.etree.ElementTree.Element methods don't support +# the namespace parameter + +def xpath_with_ns(path, ns_map): + components = [c.split(':') for c in path.split('/')] + replaced = [] + for c in components: + if len(c) == 1: + replaced.append(c[0]) + else: + ns, tag = c + replaced.append('{%s}%s' % (ns_map[ns], tag)) + return '/'.join(replaced) + + +def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): + def _find_xpath(xpath): + return node.find(compat_xpath(xpath)) + + if isinstance(xpath, compat_basestring): + n = _find_xpath(xpath) + else: + for xp in xpath: + n = _find_xpath(xp) + if n is not None: + break + + if n is None: + if default is not NO_DEFAULT: + return default + elif fatal: + name = xpath if name is None else name + raise ExtractorError('Could not find XML element %s' % name) + else: + return None + return n + + +def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT): + n = xpath_element(node, xpath, name, fatal=fatal, default=default) + if n is None or n == default: + return n + if n.text is None: + if default is not NO_DEFAULT: + return default + elif fatal: + name = xpath if name is None else name + raise ExtractorError('Could not find XML element\'s text %s' % name) + else: + return None + return n.text + + +def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT): + n = find_xpath_attr(node, xpath, key) + if n is None: + if default is not NO_DEFAULT: + return default + elif fatal: + name = '%s[@%s]' % (xpath, key) if name is None else name + raise ExtractorError('Could not find XML attribute %s' % name) + else: + return None + return n.attrib[key] + + +def get_element_by_id(id, html): + """Return the content of the tag with the specified ID in the passed HTML document""" + return get_element_by_attribute('id', id, html) + + +def get_element_by_class(class_name, html): + """Return the content of the first tag with the specified class in the passed HTML document""" + retval = get_elements_by_class(class_name, html) + return retval[0] if retval else None + + +def get_element_by_attribute(attribute, value, html, escape_value=True): + retval = get_elements_by_attribute(attribute, value, html, escape_value) + return retval[0] if retval else None + + +def get_elements_by_class(class_name, html): + """Return the content of all tags with the specified class in the passed HTML document as a list""" + return get_elements_by_attribute( + 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), + html, escape_value=False) + + +def get_elements_by_attribute(attribute, value, html, escape_value=True): + """Return the content of the tag with the specified attribute in the passed HTML document""" + + value = re.escape(value) if escape_value else value + + retlist = [] + for m in re.finditer(r'''(?xs) + <([a-zA-Z0-9:._-]+) + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? + \s+%s=['"]?%s['"]? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? + \s*> + (?P.*?) + + ''' % (re.escape(attribute), value), html): + res = m.group('content') + + if res.startswith('"') or res.startswith("'"): + res = res[1:-1] + + retlist.append(unescapeHTML(res)) + + return retlist + + +class HTMLAttributeParser(compat_HTMLParser): + """Trivial HTML parser to gather the attributes for a single element""" + def __init__(self): + self.attrs = {} + compat_HTMLParser.__init__(self) + + def handle_starttag(self, tag, attrs): + self.attrs = dict(attrs) + + +def extract_attributes(html_element): + """Given a string for an HTML element such as + + Decode and return a dictionary of attributes. + { + 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz', + 'empty': '', 'noval': None, 'entity': '&', + 'sq': '"', 'dq': '\'' + }. + NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, + but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. + """ + parser = HTMLAttributeParser() + try: + parser.feed(html_element) + parser.close() + # Older Python may throw HTMLParseError in case of malformed HTML + except compat_HTMLParseError: + pass + return parser.attrs + + +def clean_html(html): + """Clean an HTML snippet into a readable string""" + + if html is None: # Convenience for sanitizing descriptions etc. + return html + + # Newline vs
+ html = html.replace('\n', ' ') + html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) + html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) + # Strip html tags + html = re.sub('<.*?>', '', html) + # Replace html entities + html = unescapeHTML(html) + return html.strip() + + +def sanitize_open(filename, open_mode): + """Try to open the given filename, and slightly tweak it if this fails. + + Attempts to open the given filename. If this fails, it tries to change + the filename slightly, step by step, until it's either able to open it + or it fails and raises a final exception, like the standard open() + function. + + It returns the tuple (stream, definitive_file_name). + """ + try: + if filename == '-': + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) + return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) + stream = open(encodeFilename(filename), open_mode) + return (stream, filename) + except (IOError, OSError) as err: + if err.errno in (errno.EACCES,): + raise + + # In case of error, try to remove win32 forbidden chars + alt_filename = sanitize_path(filename) + if alt_filename == filename: + raise + else: + # An exception here should be caught in the caller + stream = open(encodeFilename(alt_filename), open_mode) + return (stream, alt_filename) + + +def timeconvert(timestr): + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + + +def sanitize_filename(s, restricted=False, is_id=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + Set is_id if this is not an arbitrary string, but an ID that should be kept + if possible. + """ + def replace_insane(char): + if restricted and char in ACCENT_CHARS: + return ACCENT_CHARS[char] + if char == '?' or ord(char) < 32 or ord(char) == 127: + return '' + elif char == '"': + return '' if restricted else '\'' + elif char == ':': + return '_-' if restricted else ' -' + elif char in '\\/|*<>': + return '_' + if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): + return '_' + if restricted and ord(char) > 127: + return '' if unicodedata.category(char)[0] in 'CM' else '_' + + return char + + # Replace look-alike Unicode glyphs + if restricted and not is_id: + s = unicodedata.normalize('NFKC', s) + # Handle timestamps + s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) + result = ''.join(map(replace_insane, s)) + if not is_id: + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if result.startswith('-'): + result = '_' + result[len('-'):] + result = result.lstrip('.') + if not result: + result = '_' + return result + + +def sanitize_path(s): + """Sanitizes and normalizes path on Windows""" + if sys.platform != 'win32': + return s + drive_or_unc, _ = os.path.splitdrive(s) + if sys.version_info < (2, 7) and not drive_or_unc: + drive_or_unc, _ = os.path.splitunc(s) + norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) + if drive_or_unc: + norm_path.pop(0) + sanitized_path = [ + path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part) + for path_part in norm_path] + if drive_or_unc: + sanitized_path.insert(0, drive_or_unc + os.path.sep) + return os.path.join(*sanitized_path) + + +def sanitize_url(url): + # Prepend protocol-less URLs with `http:` scheme in order to mitigate + # the number of unwanted failures due to missing protocol + if url.startswith('//'): + return 'http:%s' % url + # Fix some common typos seen so far + COMMON_TYPOS = ( + # https://github.com/ytdl-org/youtube-dl/issues/15649 + (r'^httpss://', r'https://'), + # https://bx1.be/lives/direct-tv/ + (r'^rmtp([es]?)://', r'rtmp\1://'), + ) + for mistake, fixup in COMMON_TYPOS: + if re.match(mistake, url): + return re.sub(mistake, fixup, url) + return url + + +def sanitized_Request(url, *args, **kwargs): + return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs) + + +def expand_path(s): + """Expand shell variables and ~""" + return os.path.expandvars(compat_expanduser(s)) + + +def orderedSet(iterable): + """ Remove all duplicates from the input iterable """ + res = [] + for el in iterable: + if el not in res: + res.append(el) + return res + + +def _htmlentity_transform(entity_with_semicolon): + """Transforms an HTML entity to a character.""" + entity = entity_with_semicolon[:-1] + + # Known non-numeric HTML entity + if entity in compat_html_entities.name2codepoint: + return compat_chr(compat_html_entities.name2codepoint[entity]) + + # TODO: HTML5 allows entities without a semicolon. For example, + # 'Éric' should be decoded as 'Éric'. + if entity_with_semicolon in compat_html_entities_html5: + return compat_html_entities_html5[entity_with_semicolon] + + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) + if mobj is not None: + numstr = mobj.group(1) + if numstr.startswith('x'): + base = 16 + numstr = '0%s' % numstr + else: + base = 10 + # See https://github.com/ytdl-org/youtube-dl/issues/7518 + try: + return compat_chr(int(numstr, base)) + except ValueError: + pass + + # Unknown entity in name, return its literal representation + return '&%s;' % entity + + +def unescapeHTML(s): + if s is None: + return None + assert isinstance(s, compat_str) + + return re.sub( + r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) + + +def process_communicate_or_kill(p, *args, **kwargs): + try: + return p.communicate(*args, **kwargs) + except BaseException: # Including KeyboardInterrupt + p.kill() + p.wait() + raise + + +def get_subprocess_encoding(): + if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + # For subprocess calls, encode with locale encoding + # Refer to http://stackoverflow.com/a/9951851/35070 + encoding = preferredencoding() + else: + encoding = sys.getfilesystemencoding() + if encoding is None: + encoding = 'utf-8' + return encoding + + +# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible +if sys.version_info < (3, 0) and not sys.platform.startswith('java'): + + def encodeFilename(s, for_subprocess=False): + """ + @param s The name of the file + """ + + # Pass '' directly to use Unicode APIs on Windows 2000 and up + # (Detecting Windows NT 4 is tricky because 'major >= 4' would + # match Windows 9x series as well. Besides, NT 4 is obsolete.) + if (not for_subprocess + and sys.platform == 'win32' + and sys.getwindowsversion()[0] >= 5 + and isinstance(s, compat_str)): + return s + + return _encode_compat_str(s, get_subprocess_encoding(), 'ignore') + + def decodeFilename(b, for_subprocess=False): + return _decode_compat_str(b, get_subprocess_encoding(), 'ignore') + +else: + + # Python 3 has a Unicode API + encodeFilename = decodeFilename = lambda *s, **k: s[0] + + +def encodeArgument(s): + if not isinstance(s, compat_str): + # Legacy code that uses byte strings + # Uncomment the following line after fixing all post processors + # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) + s = s.decode('ascii') + return encodeFilename(s, True) + + +def decodeArgument(b): + return decodeFilename(b, True) + + +def decodeOption(optval): + if optval is None: + return optval + return _decode_compat_str(optval) + + +def formatSeconds(secs): + if secs > 3600: + return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) + elif secs > 60: + return '%d:%02d' % (secs // 60, secs % 60) + else: + return '%d' % secs + + +def make_HTTPS_handler(params, **kwargs): + + # https://www.rfc-editor.org/info/rfc7301 + ALPN_PROTOCOLS = ['http/1.1'] + + def set_alpn_protocols(ctx): + # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0 + # Thanks @coletdjnz + # Some servers may (wrongly) reject requests if ALPN extension is not sent. See: + # https://github.com/python/cpython/issues/85140 + # https://github.com/yt-dlp/yt-dlp/issues/3878 + try: + ctx.set_alpn_protocols(ALPN_PROTOCOLS) + except (AttributeError, NotImplementedError): + # Python < 2.7.10, not ssl.HAS_ALPN + pass + + opts_no_check_certificate = params.get('nocheckcertificate', False) + if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 + context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) + set_alpn_protocols(context) + if opts_no_check_certificate: + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + + try: + return YoutubeDLHTTPSHandler(params, context=context, **kwargs) + except TypeError: + # Python 2.7.8 + # (create_default_context present but HTTPSHandler has no context=) + pass + + if sys.version_info < (3, 2): + return YoutubeDLHTTPSHandler(params, **kwargs) + else: # Python3 < 3.4 + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.verify_mode = (ssl.CERT_NONE + if opts_no_check_certificate + else ssl.CERT_REQUIRED) + context.set_default_verify_paths() + set_alpn_protocols(context) + return YoutubeDLHTTPSHandler(params, context=context, **kwargs) + + +def bug_reports_message(): + if ytdl_is_updateable(): + update_cmd = 'type youtube-dl -U to update' + else: + update_cmd = 'see https://yt-dl.org/update on how to update' + msg = '; please report this issue on https://yt-dl.org/bug .' + msg += ' Make sure you are using the latest version; %s.' % update_cmd + msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' + return msg + + +class YoutubeDLError(Exception): + """Base exception for YoutubeDL errors.""" + pass + + +class ExtractorError(YoutubeDLError): + """Error during info extraction.""" + + def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): + """ tb, if given, is the original traceback (so that it can be printed out). + If expected is set, this is a normal error message and most likely not a bug in youtube-dl. + """ + + if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + expected = True + if video_id is not None: + msg = video_id + ': ' + msg + if cause: + msg += ' (caused by %r)' % cause + if not expected: + msg += bug_reports_message() + super(ExtractorError, self).__init__(msg) + + self.traceback = tb + self.exc_info = sys.exc_info() # preserve original exception + self.cause = cause + self.video_id = video_id + + def format_traceback(self): + if self.traceback is None: + return None + return ''.join(traceback.format_tb(self.traceback)) + + +class UnsupportedError(ExtractorError): + def __init__(self, url): + super(UnsupportedError, self).__init__( + 'Unsupported URL: %s' % url, expected=True) + self.url = url + + +class RegexNotFoundError(ExtractorError): + """Error when a regex didn't match""" + pass + + +class GeoRestrictedError(ExtractorError): + """Geographic restriction Error exception. + + This exception may be thrown when a video is not available from your + geographic location due to geographic restrictions imposed by a website. + """ + def __init__(self, msg, countries=None): + super(GeoRestrictedError, self).__init__(msg, expected=True) + self.msg = msg + self.countries = countries + + +class DownloadError(YoutubeDLError): + """Download Error exception. + + This exception may be thrown by FileDownloader objects if they are not + configured to continue on errors. They will contain the appropriate + error message. + """ + + def __init__(self, msg, exc_info=None): + """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ + super(DownloadError, self).__init__(msg) + self.exc_info = exc_info + + +class SameFileError(YoutubeDLError): + """Same File exception. + + This exception will be thrown by FileDownloader objects if they detect + multiple files would have to be downloaded to the same file on disk. + """ + pass + + +class PostProcessingError(YoutubeDLError): + """Post Processing exception. + + This exception may be raised by PostProcessor's .run() method to + indicate an error in the postprocessing task. + """ + + def __init__(self, msg): + super(PostProcessingError, self).__init__(msg) + self.msg = msg + + +class MaxDownloadsReached(YoutubeDLError): + """ --max-downloads limit has been reached. """ + pass + + +class UnavailableVideoError(YoutubeDLError): + """Unavailable Format exception. + + This exception will be thrown when a video is requested + in a format that is not available for that video. + """ + pass + + +class ContentTooShortError(YoutubeDLError): + """Content Too Short exception. + + This exception may be raised by FileDownloader objects when a file they + download is too small for what the server announced first, indicating + the connection was probably interrupted. + """ + + def __init__(self, downloaded, expected): + super(ContentTooShortError, self).__init__( + 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) + ) + # Both in bytes + self.downloaded = downloaded + self.expected = expected + + +class XAttrMetadataError(YoutubeDLError): + def __init__(self, code=None, msg='Unknown error'): + super(XAttrMetadataError, self).__init__(msg) + self.code = code + self.msg = msg + + # Parsing code and msg + if (self.code in (errno.ENOSPC, errno.EDQUOT) + or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg): + self.reason = 'NO_SPACE' + elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: + self.reason = 'VALUE_TOO_LONG' + else: + self.reason = 'NOT_SUPPORTED' + + +class XAttrUnavailableError(YoutubeDLError): + pass + + +def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): + # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting + # expected HTTP responses to meet HTTP/1.0 or later (see also + # https://github.com/ytdl-org/youtube-dl/issues/6727) + if sys.version_info < (3, 0): + kwargs['strict'] = True + hc = http_class(*args, **compat_kwargs(kwargs)) + source_address = ydl_handler._params.get('source_address') + + if source_address is not None: + # This is to workaround _create_connection() from socket where it will try all + # address data from getaddrinfo() including IPv6. This filters the result from + # getaddrinfo() based on the source_address value. + # This is based on the cpython socket.create_connection() function. + # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 + def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): + host, port = address + err = None + addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) + af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6 + ip_addrs = [addr for addr in addrs if addr[0] == af] + if addrs and not ip_addrs: + ip_version = 'v4' if af == socket.AF_INET else 'v6' + raise socket.error( + "No remote IP%s addresses available for connect, can't use '%s' as source address" + % (ip_version, source_address[0])) + for res in ip_addrs: + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + sock.bind(source_address) + sock.connect(sa) + err = None # Explicitly break reference cycle + return sock + except socket.error as _: + err = _ + if sock is not None: + sock.close() + if err is not None: + raise err + else: + raise socket.error('getaddrinfo returns an empty list') + if hasattr(hc, '_create_connection'): + hc._create_connection = _create_connection + sa = (source_address, 0) + if hasattr(hc, 'source_address'): # Python 2.7+ + hc.source_address = sa + else: # Python 2.6 + def _hc_connect(self, *args, **kwargs): + sock = _create_connection( + (self.host, self.port), self.timeout, sa) + if is_https: + self.sock = ssl.wrap_socket( + sock, self.key_file, self.cert_file, + ssl_version=ssl.PROTOCOL_TLSv1) + else: + self.sock = sock + hc.connect = functools.partial(_hc_connect, hc) + + return hc + + +def handle_youtubedl_headers(headers): + filtered_headers = headers + + if 'Youtubedl-no-compression' in filtered_headers: + filtered_headers = filter_dict(filtered_headers, cndn=lambda k, _: k.lower() != 'accept-encoding') + del filtered_headers['Youtubedl-no-compression'] + + return filtered_headers + + +class YoutubeDLHandler(compat_urllib_request.HTTPHandler): + """Handler for HTTP requests and responses. + + This class, when installed with an OpenerDirector, automatically adds + the standard headers to every HTTP request and handles gzipped and + deflated responses from web servers. If compression is to be avoided in + a particular request, the original request in the program code only has + to include the HTTP header "Youtubedl-no-compression", which will be + removed before making the real request. + + Part of this code was copied from: + + http://techknack.net/python-urllib2-handlers/, archived at + https://web.archive.org/web/20130527205558/http://techknack.net/python-urllib2-handlers/ + + Andrew Rowls, the author of that code, agreed to release it to the + public domain. + """ + + def __init__(self, params, *args, **kwargs): + compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs) + self._params = params + + def http_open(self, req): + conn_class = compat_http_client.HTTPConnection + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + + return self.do_open(functools.partial( + _create_http_connection, self, conn_class, False), + req) + + @staticmethod + def deflate_gz(data): + try: + # format:zlib,gzip + windowsize:32768 + return data and zlib.decompress(data, 32 + zlib.MAX_WBITS) + except zlib.error: + # raw zlib * windowsize:32768 (RFC 9110: "non-conformant") + return zlib.decompress(data, -zlib.MAX_WBITS) + + @staticmethod + def gzip(data): + + from gzip import GzipFile + + def _gzip(data): + with io.BytesIO(data) as data_buf: + gz = GzipFile(fileobj=data_buf, mode='rb') + return gz.read() + + try: + return _gzip(data) + except IOError as original_ioerror: + # There may be junk at the end of the file + # See http://stackoverflow.com/q/4928560/35070 for details + for i in range(1, 1024): + try: + return _gzip(data[:-i]) + except IOError: + continue + else: + raise original_ioerror + + @staticmethod + def brotli(data): + return data and brotli.decompress(data) + + @staticmethod + def compress(data): + return data and ncompress.decompress(data) + + @staticmethod + def _fix_path(url): + # an embedded /../ or /./ sequence is not automatically handled by urllib2 + # see https://github.com/yt-dlp/yt-dlp/issues/3355 + parsed_url = compat_urllib_parse.urlsplit(url) + path = parsed_url.path + if not path.endswith('/'): + path += '/' + parts = path.partition('/./') + if not parts[1]: + parts = path.partition('/../') + if parts[1]: + path = compat_urllib_parse.urljoin( + parts[0] + parts[1][:1], + parts[1][1:] + (parts[2] if parsed_url.path.endswith('/') else parts[2][:-1])) + url = parsed_url._replace(path=path).geturl() + if '/.' in url: + # worse, URL path may have initial /../ against RFCs: work-around + # by stripping such prefixes, like eg Firefox + path = parsed_url.path + '/' + while path.startswith('/.'): + if path.startswith('/../'): + path = path[3:] + elif path.startswith('/./'): + path = path[2:] + else: + break + path = path[:-1] + if not path.startswith('/') and parsed_url.path.startswith('/'): + path = '/' + path + url = parsed_url._replace(path=path).geturl() + return url + + def http_request(self, req): + url = req.get_full_url() + # resolve embedded . and .. + url_fixed = self._fix_path(url) + # According to RFC 3986, URLs can not contain non-ASCII characters; however this is not + # always respected by websites: some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) + # the code of this workaround has been moved here from YoutubeDL.urlopen() + url_escaped = escape_url(url_fixed) + + # Substitute URL if any change after escaping + if url != url_escaped: + req = update_Request(req, url=url_escaped) + + for h, v in std_headers.items(): + # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 + # The dict keys are capitalized because of this bug by urllib + if h.capitalize() not in req.headers: + req.add_header(h, v) + + # Similarly, 'Accept-encoding' + if 'Accept-encoding' not in req.headers: + req.add_header( + 'Accept-Encoding', join_nonempty( + 'gzip', 'deflate', brotli and 'br', ncompress and 'compress', + delim=', ')) + + req.headers = handle_youtubedl_headers(req.headers) + + if sys.version_info < (2, 7): + # avoid possible race where __r_type may be unset + req.get_type() + if '#' in req.get_full_url(): + # Python 2.6 is brain-dead when it comes to fragments + req._Request__original = req._Request__original.partition('#')[0] + req._Request__r_type = req._Request__r_type.partition('#')[0] + + # Use the totally undocumented AbstractHTTPHandler per + # https://github.com/yt-dlp/yt-dlp/pull/4158 + return compat_urllib_request.AbstractHTTPHandler.do_request_(self, req) + + def http_response(self, req, resp): + old_resp = resp + + # Content-Encoding header lists the encodings in order that they were applied [1]. + # To decompress, we simply do the reverse. + # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding + decoded_response = None + decoders = { + 'gzip': self.deflate_gz, + 'deflate': self.deflate_gz, + } + if brotli: + decoders['br'] = self.brotli + if ncompress: + decoders['compress'] = self.compress + if sys.platform.startswith('java'): + # Jython zlib implementation misses gzip + decoders['gzip'] = self.gzip + + def encodings(hdrs): + # A header field that allows multiple values can have multiple instances [2]. + # [2]: https://datatracker.ietf.org/doc/html/rfc9110#name-fields + for e in reversed(','.join(hdrs).split(',')): + if e: + yield e.strip() + + encodings_left = [] + try: + resp.headers.get_all + hdrs = resp.headers + except AttributeError: + # Py2 has no get_all() method: headers are rfc822.Message + from email.message import Message + hdrs = Message() + for k, v in resp.headers.items(): + hdrs[k] = v + + decoder, decoded_response = True, None + for encoding in encodings(hdrs.get_all('Content-Encoding', [])): + # "SHOULD consider" x-compress, x-gzip as compress, gzip + decoder = decoder and decoders.get(remove_start(encoding, 'x-')) + if not decoder: + encodings_left.insert(0, encoding) + continue + decoded_response = decoder(decoded_response or resp.read()) + if decoded_response is not None: + resp = compat_urllib_request.addinfourl( + io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + del resp.headers['Content-Length'] + resp.headers['Content-Length'] = '%d' % len(decoded_response) + del resp.headers['Content-Encoding'] + if encodings_left: + resp.headers['Content-Encoding'] = ', '.join(encodings_left) + + # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see + # https://github.com/ytdl-org/youtube-dl/issues/6457). + if 300 <= resp.code < 400: + location = resp.headers.get('Location') + if location: + # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 + if sys.version_info >= (3, 0): + location = location.encode('iso-8859-1') + location = location.decode('utf-8') + # resolve embedded . and .. + location_fixed = self._fix_path(location) + location_escaped = escape_url(location_fixed) + if location != location_escaped: + del resp.headers['Location'] + if not isinstance(location_escaped, str): # Py 2 case + location_escaped = location_escaped.encode('utf-8') + resp.headers['Location'] = location_escaped + return resp + + https_request = http_request + https_response = http_response + + +def make_socks_conn_class(base_class, socks_proxy): + assert issubclass(base_class, ( + compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) + + url_components = compat_urllib_parse.urlparse(socks_proxy) + if url_components.scheme.lower() == 'socks5': + socks_type = ProxyType.SOCKS5 + elif url_components.scheme.lower() in ('socks', 'socks4'): + socks_type = ProxyType.SOCKS4 + elif url_components.scheme.lower() == 'socks4a': + socks_type = ProxyType.SOCKS4A + + def unquote_if_non_empty(s): + if not s: + return s + return compat_urllib_parse_unquote_plus(s) + + proxy_args = ( + socks_type, + url_components.hostname, url_components.port or 1080, + True, # Remote DNS + unquote_if_non_empty(url_components.username), + unquote_if_non_empty(url_components.password), + ) + + class SocksConnection(base_class): + def connect(self): + self.sock = sockssocket() + self.sock.setproxy(*proxy_args) + if type(self.timeout) in (int, float): + self.sock.settimeout(self.timeout) + self.sock.connect((self.host, self.port)) + + if isinstance(self, compat_http_client.HTTPSConnection): + if hasattr(self, '_context'): # Python > 2.6 + self.sock = self._context.wrap_socket( + self.sock, server_hostname=self.host) + else: + self.sock = ssl.wrap_socket(self.sock) + + return SocksConnection + + +class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): + def __init__(self, params, https_conn_class=None, *args, **kwargs): + compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) + self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection + self._params = params + + def https_open(self, req): + kwargs = {} + conn_class = self._https_conn_class + + if hasattr(self, '_context'): # python > 2.6 + kwargs['context'] = self._context + if hasattr(self, '_check_hostname'): # python 3.x + kwargs['check_hostname'] = self._check_hostname + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + + return self.do_open(functools.partial( + _create_http_connection, self, conn_class, True), + req, **kwargs) + + +class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + """ + See [1] for cookie file format. + + 1. https://curl.haxx.se/docs/http-cookies.html + """ + _HTTPONLY_PREFIX = '#HttpOnly_' + _ENTRY_LEN = 7 + _HEADER = '''# Netscape HTTP Cookie File +# This file is generated by youtube-dl. Do not edit. + +''' + _CookieFileEntry = collections.namedtuple( + 'CookieFileEntry', + ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """ + Save cookies to a file. + + Most of the code is taken from CPython 3.8 and slightly adapted + to support cookie files with UTF-8 in both python 2 and 3. + """ + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + + # Store session cookies with `expires` set to 0 instead of an empty + # string + for cookie in self: + if cookie.expires is None: + cookie.expires = 0 + + with io.open(filename, 'w', encoding='utf-8') as f: + f.write(self._HEADER) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: + secure = 'TRUE' + else: + secure = 'FALSE' + if cookie.domain.startswith('.'): + initial_dot = 'TRUE' + else: + initial_dot = 'FALSE' + if cookie.expires is not None: + expires = compat_str(cookie.expires) + else: + expires = '' + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = '' + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + '\t'.join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value]) + '\n') + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + + def prepare_line(line): + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + # comments and empty lines are fine + if line.startswith('#') or not line.strip(): + return line + cookie_list = line.split('\t') + if len(cookie_list) != self._ENTRY_LEN: + raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list)) + cookie = self._CookieFileEntry(*cookie_list) + if cookie.expires_at and not cookie.expires_at.isdigit(): + raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + return line + + cf = io.StringIO() + with io.open(filename, encoding='utf-8') as f: + for line in f: + try: + cf.write(prepare_line(line)) + except compat_cookiejar.LoadError as e: + write_string( + 'WARNING: skipping cookie file entry due to %s: %r\n' + % (e, line), sys.stderr) + continue + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) + # Session cookies are denoted by either `expires` field set to + # an empty string or 0. MozillaCookieJar only recognizes the former + # (see [1]). So we need force the latter to be recognized as session + # cookies on our own. + # Session cookies may be important for cookies-based authentication, + # e.g. usually, when user does not check 'Remember me' check box while + # logging in on a site, some important cookies are stored as session + # cookies so that not recognizing them will result in failed login. + # 1. https://bugs.python.org/issue17164 + for cookie in self: + # Treat `expires=0` cookies as session cookies + if cookie.expires == 0: + cookie.expires = None + cookie.discard = True + + def get_cookie_header(self, url): + """Generate a Cookie HTTP header for a given url""" + cookie_req = sanitized_Request(url) + self.add_cookie_header(cookie_req) + return cookie_req.get_header('Cookie') + + def get_cookies_for_url(self, url): + """Generate a list of Cookie objects for a given url""" + # Policy `_now` attribute must be set before calling `_cookies_for_request` + # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360 + self._policy._now = self._now = int(time.time()) + return self._cookies_for_request(sanitized_Request(url)) + + +class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): + def __init__(self, cookiejar=None): + compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) + + def http_response(self, request, response): + # Python 2 will choke on next HTTP request in row if there are non-ASCII + # characters in Set-Cookie HTTP header of last response (see + # https://github.com/ytdl-org/youtube-dl/issues/6769). + # In order to at least prevent crashing we will percent encode Set-Cookie + # header before HTTPCookieProcessor starts processing it. + # if sys.version_info < (3, 0) and response.headers: + # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): + # set_cookie = response.headers.get(set_cookie_header) + # if set_cookie: + # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") + # if set_cookie != set_cookie_escaped: + # del response.headers[set_cookie_header] + # response.headers[set_cookie_header] = set_cookie_escaped + return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response) + + https_request = compat_urllib_request.HTTPCookieProcessor.http_request + https_response = http_response + + +class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + """YoutubeDL redirect handler + + The code is based on HTTPRedirectHandler implementation from CPython [1]. + + This redirect handler fixes and improves the logic to better align with RFC7261 + and what browsers tend to do [2][3] + + 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py + 2. https://datatracker.ietf.org/doc/html/rfc7231 + 3. https://github.com/python/cpython/issues/91306 + """ + + # Supply possibly missing alias + http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 + + def redirect_request(self, req, fp, code, msg, headers, newurl): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a + redirection response is received. If a redirection should + take place, return a new Request to allow http_error_30x to + perform the redirect. Otherwise, raise HTTPError if no-one + else should try to handle this url. Return None if you can't + but another Handler might. + """ + if code not in (301, 302, 303, 307, 308): + raise compat_urllib_HTTPError(req.full_url, code, msg, headers, fp) + + new_method = req.get_method() + new_data = req.data + + # On python 2 urlh.geturl() may sometimes return redirect URL + # as a byte string instead of unicode. This workaround forces + # it to return unicode. + newurl = _decode_compat_str(newurl) + + # Be conciliant with URIs containing a space. This is mainly + # redundant with the more complete encoding done in http_error_302(), + # but it is kept for compatibility with other callers. + newurl = newurl.replace(' ', '%20') + + # Technically the Cookie header should be in unredirected_hdrs; + # however in practice some may set it in normal headers anyway. + # We will remove it here to prevent any leaks. + remove_headers = ['Cookie'] + + # A 303 must either use GET or HEAD for subsequent request + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 + if code == 303 and req.get_method() != 'HEAD': + new_method = 'GET' + # 301 and 302 redirects are commonly turned into a GET from a POST + # for subsequent requests by browsers, so we'll do the same. + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 + elif code in (301, 302) and req.get_method() == 'POST': + new_method = 'GET' + + # only remove payload if method changed (e.g. POST to GET) + if new_method != req.get_method(): + new_data = None + remove_headers.extend(['Content-Length', 'Content-Type']) + + new_headers = filter_dict(req.headers, cndn=lambda k, _: k.title() not in remove_headers) + + return compat_urllib_request.Request( + newurl, headers=new_headers, origin_req_host=req.origin_req_host, + unverifiable=True, method=new_method, data=new_data) + + +def extract_timezone(date_str): + m = re.search( + r'''(?x) + ^.{8,}? # >=8 char non-TZ prefix, if present + (?PZ| # just the UTC Z, or + (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or + (?= 4 alpha or 2 digits + [ ]? # optional space + (?P\+|-) # +/- + (?P[0-9]{2}):?(?P[0-9]{2}) # hh[:]mm + $) + ''', date_str) + if not m: + m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P\s*[A-Z]+)$', date_str) + timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip()) + if timezone is not None: + date_str = date_str[:-len(m.group('tz'))] + timezone = datetime.timedelta(hours=timezone or 0) + else: + date_str = date_str[:-len(m.group('tz'))] + if not m.group('sign'): + timezone = datetime.timedelta() + else: + sign = 1 if m.group('sign') == '+' else -1 + timezone = datetime.timedelta( + hours=sign * int(m.group('hours')), + minutes=sign * int(m.group('minutes'))) + return timezone, date_str + + +def parse_iso8601(date_str, delimiter='T', timezone=None): + """ Return a UNIX timestamp from the given date """ + + if date_str is None: + return None + + date_str = re.sub(r'\.[0-9]+', '', date_str) + + if timezone is None: + timezone, date_str = extract_timezone(date_str) + + try: + date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + dt = datetime.datetime.strptime(date_str, date_format) - timezone + return calendar.timegm(dt.timetuple()) + except ValueError: + pass + + +def date_formats(day_first=True): + return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST + + +def unified_strdate(date_str, day_first=True): + """Return a string with the date in the format YYYYMMDD""" + + if date_str is None: + return None + upload_date = None + # Replace commas + date_str = date_str.replace(',', ' ') + # Remove AM/PM + timezone + date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) + _, date_str = extract_timezone(date_str) + + for expression in date_formats(day_first): + try: + upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') + except ValueError: + pass + if upload_date is None: + timetuple = email.utils.parsedate_tz(date_str) + if timetuple: + try: + upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + except ValueError: + pass + if upload_date is not None: + return compat_str(upload_date) + + +def unified_timestamp(date_str, day_first=True): + if date_str is None: + return None + + date_str = re.sub(r'\s+', ' ', re.sub( + r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str)) + + pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 + timezone, date_str = extract_timezone(date_str) + + # Remove AM/PM + timezone + date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) + + # Remove unrecognized timezones from ISO 8601 alike timestamps + m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P\s*[A-Z]+)$', date_str) + if m: + date_str = date_str[:-len(m.group('tz'))] + + # Python only supports microseconds, so remove nanoseconds + m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str) + if m: + date_str = m.group(1) + + for expression in date_formats(day_first): + try: + dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) + return calendar.timegm(dt.timetuple()) + except ValueError: + pass + timetuple = email.utils.parsedate_tz(date_str) + if timetuple: + return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) + + +def determine_ext(url, default_ext='unknown_video'): + if url is None or '.' not in url: + return default_ext + guess = url.partition('?')[0].rpartition('.')[2] + if re.match(r'^[A-Za-z0-9]+$', guess): + return guess + # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download + elif guess.rstrip('/') in KNOWN_EXTENSIONS: + return guess.rstrip('/') + else: + return default_ext + + +def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): + return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) + + +def date_from_str(date_str): + """ + Return a datetime object from a string in the format YYYYMMDD or + (now|today)[+-][0-9](day|week|month|year)(s)?""" + today = datetime.date.today() + if date_str in ('now', 'today'): + return today + if date_str == 'yesterday': + return today - datetime.timedelta(days=1) + match = re.match(r'(now|today)(?P[+-])(?P
\ No newline at end of file diff --git a/.idea/music-downloader.iml b/.idea/music-downloader.iml index 27eed28..721bcfd 100644 --- a/.idea/music-downloader.iml +++ b/.idea/music-downloader.iml @@ -10,5 +10,6 @@ +
\ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 35eb1dd..09dbadc 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,5 +2,6 @@ + \ No newline at end of file diff --git a/src/music_kraken/__main__.py b/src/music_kraken/__main__.py index ee4b3fb..b90757b 100644 --- a/src/music_kraken/__main__.py +++ b/src/music_kraken/__main__.py @@ -104,8 +104,11 @@ def cli(): if arguments.r: import os - if os.path.exists(shared.CONFIG_FILE): - os.remove(shared.CONFIG_FILE) + + for file in shared.CONFIG_DIRECTORY.iterdir(): + if file.is_file(): + print(f"Deleting {file}....") + file.unlink() read_config() exit() diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index d46a99a..9a66a12 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -232,7 +232,6 @@ class Connection: sleep_after_404=sleep_after_404, is_heartbeat=is_heartbeat, name=name, - user_agent=main_settings["user_agent"], **kwargs ) diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index 9e987bb..f0b84c8 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -1,5 +1,7 @@ +from __future__ import unicode_literals, annotations + from typing import Dict, List, Optional, Set, Type -from urllib.parse import urlparse, urlunparse, quote, parse_qs +from urllib.parse import urlparse, urlunparse, quote, parse_qs, urlencode import logging import random import json @@ -7,6 +9,7 @@ from dataclasses import dataclass import re from functools import lru_cache +import youtube_dl from youtube_dl.jsinterp import JSInterpreter from youtube_dl.extractor.youtube import YoutubeIE @@ -17,7 +20,6 @@ from ...utils.functions import get_current_millis from .yt_utils.jsinterp import JSInterpreter - if DEBUG: from ...utils.debug_utils import dump_to_file @@ -104,8 +106,6 @@ class YouTubeMusicCredentials: player_url: str - - @property def player_id(self): @lru_cache(128) @@ -128,15 +128,34 @@ class YouTubeMusicCredentials: return _extract_player_info(self.player_url) +class MusicKrakenYoutubeIE(YoutubeIE): + def __init__(self, *args, main_instance: YoutubeMusic, **kwargs): + self.main_instance = main_instance + super().__init__(*args, **kwargs) + + +class MusicKrakenYoutubeDL(youtube_dl.YoutubeDL): + def __init__(self, main_instance: YoutubeMusic, ydl_opts: dict, **kwargs): + self.main_instance = main_instance + super().__init__(ydl_opts or {}, **kwargs) + super().__enter__() + + def __del__(self): + super().__exit__(None, None, None) + + + class YoutubeMusic(SuperYouTube): # CHANGE SOURCE_TYPE = SourcePages.YOUTUBE_MUSIC LOGGER = logging_settings["youtube_music_logger"] - def __init__(self, *args, **kwargs): - self.connection: YoutubeMusicConnection = YoutubeMusicConnection(logger=self.LOGGER, - accept_language="en-US,en;q=0.5") + def __init__(self, *args, ydl_opts: dict = None, **kwargs): + self.connection: YoutubeMusicConnection = YoutubeMusicConnection( + logger=self.LOGGER, + accept_language="en-US,en;q=0.5" + ) self.credentials: YouTubeMusicCredentials = YouTubeMusicCredentials( api_key=youtube_settings["youtube_music_api_key"], ctoken="", @@ -149,7 +168,17 @@ class YoutubeMusic(SuperYouTube): if self.credentials.api_key == "" or DEBUG_YOUTUBE_INITIALIZING: self._fetch_from_main_page() - SuperYouTube.__init__(self,*args, **kwargs) + SuperYouTube.__init__(self, *args, **kwargs) + + self.download_connection: Connection = Connection( + host="https://music.youtube.com/", + logger=self.LOGGER, + sleep_after_404=youtube_settings["sleep_after_youtube_403"] + ) + + # https://github.com/ytdl-org/youtube-dl/blob/master/README.md#embedding-youtube-dl + self.ydl = MusicKrakenYoutubeDL(self, ydl_opts) + self.yt_ie = MusicKrakenYoutubeIE(downloader=self.ydl, main_instance=self) def _fetch_from_main_page(self): """ @@ -283,7 +312,6 @@ class YoutubeMusic(SuperYouTube): default='{}' )) or {} - def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]: return super().get_source_type(source) @@ -454,15 +482,19 @@ class YoutubeMusic(SuperYouTube): r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(' ), - code, group='sig') + code, group='sig') jsi = JSInterpreter(code) initial_function = jsi.extract_function(funcname) + return lambda s: initial_function([s]) - def _decrypt_signature(self, s): - signing_func = self._extract_signature_function(player_url=youtube_settings["player_url"]) - print(signing_func) + def _decrypt_signature(self, video_id, s): + print(youtube_settings["player_url"]) + res = self._extract_signature_function(player_url=youtube_settings["player_url"]) + test_string = ''.join(map(str, range(len(s)))) + cache_spec = [ord(c) for c in res(test_string)] + signing_func = lambda _s: ''.join(_s[i] for i in cache_spec) return signing_func(s) def _parse_adaptive_formats(self, data: list, video_id) -> dict: @@ -475,16 +507,45 @@ class YoutubeMusic(SuperYouTube): if not fmt_url: sc = parse_qs(possible_format["signatureCipher"]) print(sc["s"][0]) - signature = self._decrypt_signature(sc['s'][0]) + signature = self._decrypt_signature(video_id, sc['s'][0]) print(signature) - sp = sc.get("sp", ["sig"])[0] + tmp = sc.get("sp", ["sig"]) + sig_key = "signature" if len(tmp) <= 0 else tmp[-1] + fmt_url = sc.get("url", [None])[0] - fmt_url += '&' + sp + '=' + signature + ftm_parsed = urlparse(fmt_url) + q = parse_qs(ftm_parsed.query) + q[sig_key] = [signature] + print(json.dumps(q, indent=4)) + print(sig_key) + query_str = urlencode(q) + print(query_str) + + fmt_url = urlunparse(( + ftm_parsed.scheme, + ftm_parsed.netloc, + ftm_parsed.path, + ftm_parsed.params, + query_str, + ftm_parsed.fragment + )) + + """ + if not isinstance(url, tuple): + url = compat_urllib_parse.urlparse(url) + query = kwargs.pop('query_update', None) + if query: + qs = compat_parse_qs(url.query) + qs.update(query) + kwargs['query'] = compat_urllib_parse_urlencode(qs, True) + kwargs = compat_kwargs(kwargs) + return compat_urllib_parse.urlunparse(url._replace(**kwargs)) + """ return { - "bitrate": fmt.get("bitrate"), + "bitrate": fmt.get("bitrate"), "url": fmt_url } @@ -512,32 +573,16 @@ class YoutubeMusic(SuperYouTube): return parse_format(best_format) def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - """ - curl 'https://music.youtube.com/youtubei/v1/player?key=AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30&prettyPrint=false' - --compressed -X POST - -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0' - -H 'Accept: */*' - -H 'Accept-Language: en-US,en;q=0.5' - -H 'Accept-Encoding: gzip, deflate, br' - -H 'Content-Type: application/json' - -H 'Referer: https://music.youtube.com/' - -H 'X-Goog-Visitor-Id: CgtHdmkzbGhaMDltVSj4j5mtBjIKCgJERRIEEgAgOA%3D%3D' - -H 'X-Youtube-Bootstrap-Logged-In: false' - -H 'X-Youtube-Client-Name: 67' -H 'X-Youtube-Client-Version: 1.20240103.01.00' - -H 'Origin: https://music.youtube.com' - -H 'Sec-Fetch-Dest: empty' -H 'Sec-Fetch-Mode: cors' -H 'Sec-Fetch-Site: same-origin' -H 'Connection: keep-alive' -H 'Alt-Used: music.youtube.com' - -H 'Cookie: SOCS=CAISNQgREitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMTA5LjA1X3AwGgJlbiACGgYIgI6XrQY; YSC=r46McyPx8dE; VISITOR_PRIVACY_METADATA=CgJERRIEEgAgOA%3D%3D; CONSENT=PENDING+663; VISITOR_INFO1_LIVE=Gvi3lhZ09mU; _gcl_au=1.1.396177275.1705396217; ST-1hw5vco=csn=MC4xNTI3OTkwMzQyOTc1MzQ2&itct=CNgDEMn0AhgDIhMItMS6_cfhgwMVDMtCBR1u5wb6' -H 'TE: trailers' - --data-raw '{ - "videoId":"QeQrfsqPMCs", - "context":{"client":{"hl":"en","gl":"DE","remoteHost":"129.143.170.58","deviceMake":"","deviceModel":"","visitorData":"CgtHdmkzbGhaMDltVSj4j5mtBjIKCgJERRIEEgAgOA%3D%3D","userAgent":"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0,gzip(gfe)","clientName":"WEB_REMIX","clientVersion":"1.20240103.01.00","osName":"X11","osVersion":"","originalUrl":"https://music.youtube.com/?cbrd=1","platform":"DESKTOP","clientFormFactor":"UNKNOWN_FORM_FACTOR","configInfo":{"appInstallData":"CPiPma0GEL2ZsAUQqJqwBRCmgbAFEP24_RIQjaKwBRDNlbAFENWIsAUQmaSwBRD6p7AFEL75rwUQmvCvBRDT4a8FEL2KsAUQrtT-EhC36v4SENnJrwUQnouwBRDJ968FEJP8rwUQuIuuBRDM364FEIiHsAUQ0I2wBRDnuq8FEPOhsAUQ2piwBRDMrv4SEIjjrwUQooGwBRDuorAFEM6osAUQ6-j-EhC3nbAFEKXC_hIQ9fmvBRDh8q8FEJmUsAUQt--vBRD8hbAFEKigsAUQrLevBRC_o7AFEOuTrgUQqfevBRDd6P4SEJj8_hIQ6YywBRC9tq4FEOupsAUQ5LP-EhDfhP8SEOrDrwUQqKGwBRC8-a8FEPKYsAU%3D"},"browserName":"Firefox","browserVersion":"121.0","acceptHeader":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8","deviceExperimentId":"ChxOek15TkRZeU1EazNOalE0TXpVNU1EQXhOZz09EPiPma0GGPiPma0G","screenWidthPoints":780,"screenHeightPoints":638,"screenPixelDensity":2,"screenDensityFloat":2,"utcOffsetMinutes":60,"userInterfaceTheme":"USER_INTERFACE_THEME_DARK","timeZone":"Europe/Berlin","playerType":"UNIPLAYER","tvAppInfo":{"livingRoomAppMode":"LIVING_ROOM_APP_MODE_UNSPECIFIED"},"clientScreen":"WATCH_FULL_SCREEN"},"user":{"lockedSafetyMode":false},"request":{"useSsl":true,"internalExperimentFlags":[],"consistencyTokenJars":[]},"clientScreenNonce":"MC4xNTI3OTkwMzQyOTc1MzQ2","adSignalsInfo":{"params":[{"key":"dt","value":"1705396224619"},{"key":"flash","value":"0"},{"key":"frm","value":"0"},{"key":"u_tz","value":"60"},{"key":"u_his","value":"5"},{"key":"u_h","value":"800"},{"key":"u_w","value":"1280"},{"key":"u_ah","value":"769"},{"key":"u_aw","value":"1280"},{"key":"u_cd","value":"24"},{"key":"bc","value":"31"},{"key":"bih","value":"638"},{"key":"biw","value":"780"},{"key":"brdim","value":"0,31,0,31,1280,31,1280,769,780,638"},{"key":"vis","value":"1"},{"key":"wgl","value":"true"},{"key":"ca_type","value":"image"}]},"clickTracking":{"clickTrackingParams":"CNgDEMn0AhgDIhMItMS6_cfhgwMVDMtCBR1u5wb6"}},"playbackContext":{"contentPlaybackContext":{"html5Preference":"HTML5_PREF_WANTS","lactMilliseconds":"22","referer":"https://music.youtube.com/","signatureTimestamp":19732,"autoCaptionsDefaultOn":false,"mdxContext":{}}},"cpn":"Aqv99K7Z_3tj9ACA","playlistId":"RDAMVMQeQrfsqPMCs","captionParams":{},"serviceIntegrityDimensions":{"poToken":"MnQLhidwfIVPEAu-woG_SQU69mfPclEz7kVUmC1dNP8EQN7NNyVdF3KcVIuKRKrcXlwOXEQg3hc5qXSBbbQU_M7lxx9zgQMelv9iZwWfWlLyI9RoZXB1wipAYHWNzxu7rMqDwRn5M6WS4RRIeHcld9P_YZRYdg=="} - }' - :param source: - :param stop_at_level: - :return: - """ - song = Song(source_list=[ - source - ]) + # implement the functionality yt_dl provides + ydl_res = self.yt_ie._real_extract(source.url) + print(ydl_res) + + source.audio_url = ydl_res.get("formats")[0].get("url") + song = Song( + title=ydl_res.get("title"), + source_list=[source], + ) + return song parsed_url = urlparse(source.url) video_id = parse_qs(parsed_url.query)['v'] @@ -575,4 +620,9 @@ class YoutubeMusic(SuperYouTube): return super().download_song_to_target(source, target) print(source.audio_url) - return self.download_connection.stream_into(source.audio_url, target, description=desc, raw_url=True) + return self.download_connection.stream_into(source.audio_url, target, description=desc, headers={ + "Host": "rr1---sn-cxaf0x-nugl.googlevideo.com" + }) + + def __del__(self): + self.ydl.__exit__() diff --git a/src/music_kraken/utils/config/config_files/youtube_config.py b/src/music_kraken/utils/config/config_files/youtube_config.py index 3c525d5..4610f18 100644 --- a/src/music_kraken/utils/config/config_files/youtube_config.py +++ b/src/music_kraken/utils/config/config_files/youtube_config.py @@ -30,12 +30,13 @@ Dw. if it is empty, Rachel will fetch it automatically for you <333 Attribute(name="youtube_music_clean_data", default_value=True, description="If set to true, it exclusively fetches artists/albums/songs, not things like user channels etc."), UrlAttribute(name="youtube_url", default_value=[ "https://www.youtube.com/", - "https://www.youtu.be/" + "https://www.youtu.be/", + "https://music.youtube.com/", ], description="""This is used to detect, if an url is from youtube, or any alternativ frontend. If any instance seems to be missing, run music kraken with the -f flag."""), Attribute(name="use_sponsor_block", default_value=True, description="Use sponsor block to remove adds or simmilar from the youtube videos."), - Attribute(name="player_url", default_value="/s/player/80b90bfd/player_ias.vflset/en_US/base.js", description=""" + Attribute(name="player_url", default_value="https://music.youtube.com/s/player/80b90bfd/player_ias.vflset/en_US/base.js", description=""" This is needed to fetch videos without invidious """), Attribute(name="youtube_music_consent_cookies", default_value={ diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index d1645f8..31cadec 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -1,5 +1,6 @@ import random +from .path_manager import LOCATIONS from .config import main_settings DEBUG = True @@ -15,6 +16,8 @@ def get_random_message() -> str: return random.choice(main_settings['happy_messages']) +CONFIG_DIRECTORY = LOCATIONS.CONFIG_DIRECTORY + HIGHEST_ID = 2 ** main_settings['id_bits'] HELP_MESSAGE = """to search: From c7279eb424a97cc9693777851f6e6ddd8a781e9b Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 31 Jan 2024 15:24:19 +0100 Subject: [PATCH 067/104] feat: layed out the calculation of prefered format. --- .../pages/youtube_music/youtube_music.py | 183 ++++-------------- 1 file changed, 43 insertions(+), 140 deletions(-) diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index f0b84c8..fb3f812 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -128,22 +128,43 @@ class YouTubeMusicCredentials: return _extract_player_info(self.player_url) -class MusicKrakenYoutubeIE(YoutubeIE): - def __init__(self, *args, main_instance: YoutubeMusic, **kwargs): - self.main_instance = main_instance - super().__init__(*args, **kwargs) +class YTDLLogger: + def __init__(self, logger: logging.Logger): + self.logger = logger + + def debug(self, msg): + self.logger.debug(msg) + + def warning(self, msg): + self.logger.warning(msg) + + def error(self, msg): + self.logger.error(msg) class MusicKrakenYoutubeDL(youtube_dl.YoutubeDL): def __init__(self, main_instance: YoutubeMusic, ydl_opts: dict, **kwargs): self.main_instance = main_instance - super().__init__(ydl_opts or {}, **kwargs) + ydl_opts = ydl_opts or {} + ydl_opts.update({ + "logger": YTDLLogger(self.main_instance.LOGGER), + }) + + super().__init__(ydl_opts, **kwargs) super().__enter__() def __del__(self): super().__exit__(None, None, None) +class MusicKrakenYoutubeIE(YoutubeIE): + def __init__(self, *args, main_instance: YoutubeMusic, **kwargs): + self.main_instance = main_instance + super().__init__(*args, **kwargs) + + def _extract_player_url(self, *ytcfgs, **kw_webpage): + return youtube_settings["player_url"] + class YoutubeMusic(SuperYouTube): @@ -459,157 +480,39 @@ class YoutubeMusic(SuperYouTube): return album - @lru_cache() - def _extract_signature_function(self, player_url): - r = self.connection.get(player_url) - if r is None: - return lambda x: None + def _get_best_format(self, format_list: List[Dict]) -> str: + def _calc_score(_f: dict): + s = 0 - code = r.text + _url = _f.get("url", "") + if "mime=audio" in _url: + s += 100 - funcname = self._search_regex(( - r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\bm=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', - r'\bc&&\(c=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', - r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', - # Obsolete patterns - r'("|\')signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(' - ), - code, group='sig') + return s - jsi = JSInterpreter(code) - initial_function = jsi.extract_function(funcname) + highest_score = 0 + best_format = {} + for _format in format_list: + print(_format) - return lambda s: initial_function([s]) + _s = _calc_score(_format) + if _s >= highest_score: + highest_score = _s + best_format = _format - def _decrypt_signature(self, video_id, s): - print(youtube_settings["player_url"]) - res = self._extract_signature_function(player_url=youtube_settings["player_url"]) - test_string = ''.join(map(str, range(len(s)))) - cache_spec = [ord(c) for c in res(test_string)] - signing_func = lambda _s: ''.join(_s[i] for i in cache_spec) - return signing_func(s) - - def _parse_adaptive_formats(self, data: list, video_id) -> dict: - best_format = None - best_bitrate = 0 - - def parse_format(fmt: dict): - fmt_url = fmt.get('url') - - if not fmt_url: - sc = parse_qs(possible_format["signatureCipher"]) - print(sc["s"][0]) - signature = self._decrypt_signature(video_id, sc['s'][0]) - print(signature) - - tmp = sc.get("sp", ["sig"]) - sig_key = "signature" if len(tmp) <= 0 else tmp[-1] - - fmt_url = sc.get("url", [None])[0] - - ftm_parsed = urlparse(fmt_url) - q = parse_qs(ftm_parsed.query) - q[sig_key] = [signature] - print(json.dumps(q, indent=4)) - print(sig_key) - query_str = urlencode(q) - print(query_str) - - fmt_url = urlunparse(( - ftm_parsed.scheme, - ftm_parsed.netloc, - ftm_parsed.path, - ftm_parsed.params, - query_str, - ftm_parsed.fragment - )) - - """ - if not isinstance(url, tuple): - url = compat_urllib_parse.urlparse(url) - query = kwargs.pop('query_update', None) - if query: - qs = compat_parse_qs(url.query) - qs.update(query) - kwargs['query'] = compat_urllib_parse_urlencode(qs, True) - kwargs = compat_kwargs(kwargs) - return compat_urllib_parse.urlunparse(url._replace(**kwargs)) - """ - - return { - "bitrate": fmt.get("bitrate"), - "url": fmt_url - } - - for possible_format in sorted(data, key=lambda x: x.get("bitrate", 0)): - if best_bitrate <= 0: - # no format has been found yet - best_format = possible_format - - if possible_format.get('targetDurationSec') or possible_format.get('drmFamilies'): - continue - - mime_type: str = possible_format["mimeType"] - if not mime_type.startswith("audio"): - continue - - bitrate = int(possible_format.get("bitrate", 0)) - - if bitrate > best_bitrate: - best_bitrate = bitrate - best_format = possible_format - - if bitrate >= main_settings["bitrate"]: - break - - return parse_format(best_format) + return best_format.get("url") def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: # implement the functionality yt_dl provides ydl_res = self.yt_ie._real_extract(source.url) - print(ydl_res) - source.audio_url = ydl_res.get("formats")[0].get("url") + source.audio_url = self._get_best_format(ydl_res.get("formats", [{}])) song = Song( title=ydl_res.get("title"), source_list=[source], ) return song - parsed_url = urlparse(source.url) - video_id = parse_qs(parsed_url.query)['v'] - if len(video_id) <= 0: - return song - browse_id = video_id[0] - - r = self.connection.post( - url=get_youtube_url(path="/youtubei/v1/player", query=f"key={self.credentials.api_key}&prettyPrint=false"), - json={ - "videoId": browse_id, - "context": {**self.credentials.context, "adSignalsInfo": {"params": []}} - } - ) - if r is None: - return song - - data = r.json() - - dump_to_file("yt_video_overview.json", data, exit_after_dump=False) - - available_formats = data.get("streamingData", {}).get("adaptiveFormats", []) - - if len(available_formats) > 0: - source.audio_url = self._parse_adaptive_formats(available_formats, video_id=browse_id).get("url") - - return song def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: if source.audio_url is None: From 42729cd585d411aa3995cd0f3c269924bbfee498 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 26 Feb 2024 14:38:37 +0100 Subject: [PATCH 068/104] fix: encoding of url --- requirements.txt | 4 +- .../pages/youtube_music/youtube_music.py | 11 +- .../pages/youtube_music/yt_utils/__init__.py | 0 .../pages/youtube_music/yt_utils/compat.py | 3308 --------- .../pages/youtube_music/yt_utils/jsinterp.py | 1054 --- .../pages/youtube_music/yt_utils/socks.py | 273 - .../pages/youtube_music/yt_utils/utils.py | 6513 ----------------- 7 files changed, 3 insertions(+), 11160 deletions(-) delete mode 100644 src/music_kraken/pages/youtube_music/yt_utils/__init__.py delete mode 100644 src/music_kraken/pages/youtube_music/yt_utils/compat.py delete mode 100644 src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py delete mode 100644 src/music_kraken/pages/youtube_music/yt_utils/socks.py delete mode 100644 src/music_kraken/pages/youtube_music/yt_utils/utils.py diff --git a/requirements.txt b/requirements.txt index 31605f0..200fa36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,14 +3,14 @@ mutagen~=1.46.0 musicbrainzngs~=0.7.1 jellyfish~=0.9.0 beautifulsoup4~=4.11.1 -pycountry~=22.3.5 +pycountry~=24.0.1 python-dateutil~=2.8.2 pandoc~=2.3 SQLAlchemy~=2.0.7 setuptools~=68.2.0 tqdm~=4.65.0 ffmpeg-python~=0.2.0 -platformdirs~=3.2.0 +platformdirs~=4.2.0 transliterate~=1.10.2 sponsorblock~=0.1.3 regex~=2022.9.13 diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index fb3f812..7d18f63 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -10,7 +10,6 @@ import re from functools import lru_cache import youtube_dl -from youtube_dl.jsinterp import JSInterpreter from youtube_dl.extractor.youtube import YoutubeIE from ...utils.exception.config import SettingValueError @@ -18,8 +17,6 @@ from ...utils.config import main_settings, youtube_settings, logging_settings from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING from ...utils.functions import get_current_millis -from .yt_utils.jsinterp import JSInterpreter - if DEBUG: from ...utils.debug_utils import dump_to_file @@ -162,8 +159,6 @@ class MusicKrakenYoutubeIE(YoutubeIE): self.main_instance = main_instance super().__init__(*args, **kwargs) - def _extract_player_url(self, *ytcfgs, **kw_webpage): - return youtube_settings["player_url"] @@ -493,8 +488,6 @@ class YoutubeMusic(SuperYouTube): highest_score = 0 best_format = {} for _format in format_list: - print(_format) - _s = _calc_score(_format) if _s >= highest_score: highest_score = _s @@ -513,7 +506,6 @@ class YoutubeMusic(SuperYouTube): ) return song - def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: if source.audio_url is None: self.fetch_song(source) @@ -522,10 +514,9 @@ class YoutubeMusic(SuperYouTube): self.LOGGER.warning(f"Couldn't fetch the audio source with the innertube api, falling back to invidious.") return super().download_song_to_target(source, target) - print(source.audio_url) return self.download_connection.stream_into(source.audio_url, target, description=desc, headers={ "Host": "rr1---sn-cxaf0x-nugl.googlevideo.com" - }) + }, raw_url=True) def __del__(self): self.ydl.__exit__() diff --git a/src/music_kraken/pages/youtube_music/yt_utils/__init__.py b/src/music_kraken/pages/youtube_music/yt_utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/music_kraken/pages/youtube_music/yt_utils/compat.py b/src/music_kraken/pages/youtube_music/yt_utils/compat.py deleted file mode 100644 index 3c526a7..0000000 --- a/src/music_kraken/pages/youtube_music/yt_utils/compat.py +++ /dev/null @@ -1,3308 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals -from __future__ import division - -import base64 -import binascii -import collections -import ctypes -import datetime -import email -import getpass -import io -import itertools -import optparse -import os -import platform -import re -import shlex -import shutil -import socket -import struct -import subprocess -import sys -import types -import xml.etree.ElementTree - -# naming convention -# 'compat_' + Python3_name.replace('.', '_') -# other aliases exist for convenience and/or legacy - -# deal with critical unicode/str things first -try: - # Python 2 - compat_str, compat_basestring, compat_chr = ( - unicode, basestring, unichr - ) -except NameError: - compat_str, compat_basestring, compat_chr = ( - str, (str, bytes), chr - ) - -# casefold -try: - compat_str.casefold - compat_casefold = lambda s: s.casefold() -except AttributeError: - from .casefold import casefold as compat_casefold - -try: - import collections.abc as compat_collections_abc -except ImportError: - import collections as compat_collections_abc - -try: - import urllib.request as compat_urllib_request -except ImportError: # Python 2 - import urllib2 as compat_urllib_request - -# Also fix up lack of method arg in old Pythons -try: - _req = compat_urllib_request.Request - _req('http://127.0.0.1', method='GET') -except TypeError: - class _request(object): - def __new__(cls, url, *args, **kwargs): - method = kwargs.pop('method', None) - r = _req(url, *args, **kwargs) - if method: - r.get_method = types.MethodType(lambda _: method, r) - return r - - compat_urllib_request.Request = _request - - -try: - import urllib.error as compat_urllib_error -except ImportError: # Python 2 - import urllib2 as compat_urllib_error - -try: - import urllib.parse as compat_urllib_parse -except ImportError: # Python 2 - import urllib as compat_urllib_parse - import urlparse as _urlparse - for a in dir(_urlparse): - if not hasattr(compat_urllib_parse, a): - setattr(compat_urllib_parse, a, getattr(_urlparse, a)) - del _urlparse - -# unfavoured aliases -compat_urlparse = compat_urllib_parse -compat_urllib_parse_urlparse = compat_urllib_parse.urlparse - -try: - import urllib.response as compat_urllib_response -except ImportError: # Python 2 - import urllib as compat_urllib_response - -try: - compat_urllib_response.addinfourl.status -except AttributeError: - # .getcode() is deprecated in Py 3. - compat_urllib_response.addinfourl.status = property(lambda self: self.getcode()) - -try: - import http.cookiejar as compat_cookiejar -except ImportError: # Python 2 - import cookielib as compat_cookiejar -compat_http_cookiejar = compat_cookiejar - -if sys.version_info[0] == 2: - class compat_cookiejar_Cookie(compat_cookiejar.Cookie): - def __init__(self, version, name, value, *args, **kwargs): - if isinstance(name, compat_str): - name = name.encode() - if isinstance(value, compat_str): - value = value.encode() - compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs) -else: - compat_cookiejar_Cookie = compat_cookiejar.Cookie -compat_http_cookiejar_Cookie = compat_cookiejar_Cookie - -try: - import http.cookies as compat_cookies -except ImportError: # Python 2 - import Cookie as compat_cookies -compat_http_cookies = compat_cookies - -if sys.version_info[0] == 2 or sys.version_info < (3, 3): - class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie): - def load(self, rawdata): - must_have_value = 0 - if not isinstance(rawdata, dict): - if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'): - # attribute must have value for parsing - rawdata, must_have_value = re.subn( - r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata) - if sys.version_info[0] == 2: - if isinstance(rawdata, compat_str): - rawdata = str(rawdata) - super(compat_cookies_SimpleCookie, self).load(rawdata) - if must_have_value > 0: - for morsel in self.values(): - for attr in ('secure', 'httponly'): - if morsel.get(attr): - morsel[attr] = True -else: - compat_cookies_SimpleCookie = compat_cookies.SimpleCookie -compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie - -try: - import html.entities as compat_html_entities -except ImportError: # Python 2 - import htmlentitydefs as compat_html_entities - -try: # Python >= 3.3 - compat_html_entities_html5 = compat_html_entities.html5 -except AttributeError: - # Copied from CPython 3.5.1 html/entities.py - compat_html_entities_html5 = { - 'Aacute': '\xc1', - 'aacute': '\xe1', - 'Aacute;': '\xc1', - 'aacute;': '\xe1', - 'Abreve;': '\u0102', - 'abreve;': '\u0103', - 'ac;': '\u223e', - 'acd;': '\u223f', - 'acE;': '\u223e\u0333', - 'Acirc': '\xc2', - 'acirc': '\xe2', - 'Acirc;': '\xc2', - 'acirc;': '\xe2', - 'acute': '\xb4', - 'acute;': '\xb4', - 'Acy;': '\u0410', - 'acy;': '\u0430', - 'AElig': '\xc6', - 'aelig': '\xe6', - 'AElig;': '\xc6', - 'aelig;': '\xe6', - 'af;': '\u2061', - 'Afr;': '\U0001d504', - 'afr;': '\U0001d51e', - 'Agrave': '\xc0', - 'agrave': '\xe0', - 'Agrave;': '\xc0', - 'agrave;': '\xe0', - 'alefsym;': '\u2135', - 'aleph;': '\u2135', - 'Alpha;': '\u0391', - 'alpha;': '\u03b1', - 'Amacr;': '\u0100', - 'amacr;': '\u0101', - 'amalg;': '\u2a3f', - 'AMP': '&', - 'amp': '&', - 'AMP;': '&', - 'amp;': '&', - 'And;': '\u2a53', - 'and;': '\u2227', - 'andand;': '\u2a55', - 'andd;': '\u2a5c', - 'andslope;': '\u2a58', - 'andv;': '\u2a5a', - 'ang;': '\u2220', - 'ange;': '\u29a4', - 'angle;': '\u2220', - 'angmsd;': '\u2221', - 'angmsdaa;': '\u29a8', - 'angmsdab;': '\u29a9', - 'angmsdac;': '\u29aa', - 'angmsdad;': '\u29ab', - 'angmsdae;': '\u29ac', - 'angmsdaf;': '\u29ad', - 'angmsdag;': '\u29ae', - 'angmsdah;': '\u29af', - 'angrt;': '\u221f', - 'angrtvb;': '\u22be', - 'angrtvbd;': '\u299d', - 'angsph;': '\u2222', - 'angst;': '\xc5', - 'angzarr;': '\u237c', - 'Aogon;': '\u0104', - 'aogon;': '\u0105', - 'Aopf;': '\U0001d538', - 'aopf;': '\U0001d552', - 'ap;': '\u2248', - 'apacir;': '\u2a6f', - 'apE;': '\u2a70', - 'ape;': '\u224a', - 'apid;': '\u224b', - 'apos;': "'", - 'ApplyFunction;': '\u2061', - 'approx;': '\u2248', - 'approxeq;': '\u224a', - 'Aring': '\xc5', - 'aring': '\xe5', - 'Aring;': '\xc5', - 'aring;': '\xe5', - 'Ascr;': '\U0001d49c', - 'ascr;': '\U0001d4b6', - 'Assign;': '\u2254', - 'ast;': '*', - 'asymp;': '\u2248', - 'asympeq;': '\u224d', - 'Atilde': '\xc3', - 'atilde': '\xe3', - 'Atilde;': '\xc3', - 'atilde;': '\xe3', - 'Auml': '\xc4', - 'auml': '\xe4', - 'Auml;': '\xc4', - 'auml;': '\xe4', - 'awconint;': '\u2233', - 'awint;': '\u2a11', - 'backcong;': '\u224c', - 'backepsilon;': '\u03f6', - 'backprime;': '\u2035', - 'backsim;': '\u223d', - 'backsimeq;': '\u22cd', - 'Backslash;': '\u2216', - 'Barv;': '\u2ae7', - 'barvee;': '\u22bd', - 'Barwed;': '\u2306', - 'barwed;': '\u2305', - 'barwedge;': '\u2305', - 'bbrk;': '\u23b5', - 'bbrktbrk;': '\u23b6', - 'bcong;': '\u224c', - 'Bcy;': '\u0411', - 'bcy;': '\u0431', - 'bdquo;': '\u201e', - 'becaus;': '\u2235', - 'Because;': '\u2235', - 'because;': '\u2235', - 'bemptyv;': '\u29b0', - 'bepsi;': '\u03f6', - 'bernou;': '\u212c', - 'Bernoullis;': '\u212c', - 'Beta;': '\u0392', - 'beta;': '\u03b2', - 'beth;': '\u2136', - 'between;': '\u226c', - 'Bfr;': '\U0001d505', - 'bfr;': '\U0001d51f', - 'bigcap;': '\u22c2', - 'bigcirc;': '\u25ef', - 'bigcup;': '\u22c3', - 'bigodot;': '\u2a00', - 'bigoplus;': '\u2a01', - 'bigotimes;': '\u2a02', - 'bigsqcup;': '\u2a06', - 'bigstar;': '\u2605', - 'bigtriangledown;': '\u25bd', - 'bigtriangleup;': '\u25b3', - 'biguplus;': '\u2a04', - 'bigvee;': '\u22c1', - 'bigwedge;': '\u22c0', - 'bkarow;': '\u290d', - 'blacklozenge;': '\u29eb', - 'blacksquare;': '\u25aa', - 'blacktriangle;': '\u25b4', - 'blacktriangledown;': '\u25be', - 'blacktriangleleft;': '\u25c2', - 'blacktriangleright;': '\u25b8', - 'blank;': '\u2423', - 'blk12;': '\u2592', - 'blk14;': '\u2591', - 'blk34;': '\u2593', - 'block;': '\u2588', - 'bne;': '=\u20e5', - 'bnequiv;': '\u2261\u20e5', - 'bNot;': '\u2aed', - 'bnot;': '\u2310', - 'Bopf;': '\U0001d539', - 'bopf;': '\U0001d553', - 'bot;': '\u22a5', - 'bottom;': '\u22a5', - 'bowtie;': '\u22c8', - 'boxbox;': '\u29c9', - 'boxDL;': '\u2557', - 'boxDl;': '\u2556', - 'boxdL;': '\u2555', - 'boxdl;': '\u2510', - 'boxDR;': '\u2554', - 'boxDr;': '\u2553', - 'boxdR;': '\u2552', - 'boxdr;': '\u250c', - 'boxH;': '\u2550', - 'boxh;': '\u2500', - 'boxHD;': '\u2566', - 'boxHd;': '\u2564', - 'boxhD;': '\u2565', - 'boxhd;': '\u252c', - 'boxHU;': '\u2569', - 'boxHu;': '\u2567', - 'boxhU;': '\u2568', - 'boxhu;': '\u2534', - 'boxminus;': '\u229f', - 'boxplus;': '\u229e', - 'boxtimes;': '\u22a0', - 'boxUL;': '\u255d', - 'boxUl;': '\u255c', - 'boxuL;': '\u255b', - 'boxul;': '\u2518', - 'boxUR;': '\u255a', - 'boxUr;': '\u2559', - 'boxuR;': '\u2558', - 'boxur;': '\u2514', - 'boxV;': '\u2551', - 'boxv;': '\u2502', - 'boxVH;': '\u256c', - 'boxVh;': '\u256b', - 'boxvH;': '\u256a', - 'boxvh;': '\u253c', - 'boxVL;': '\u2563', - 'boxVl;': '\u2562', - 'boxvL;': '\u2561', - 'boxvl;': '\u2524', - 'boxVR;': '\u2560', - 'boxVr;': '\u255f', - 'boxvR;': '\u255e', - 'boxvr;': '\u251c', - 'bprime;': '\u2035', - 'Breve;': '\u02d8', - 'breve;': '\u02d8', - 'brvbar': '\xa6', - 'brvbar;': '\xa6', - 'Bscr;': '\u212c', - 'bscr;': '\U0001d4b7', - 'bsemi;': '\u204f', - 'bsim;': '\u223d', - 'bsime;': '\u22cd', - 'bsol;': '\\', - 'bsolb;': '\u29c5', - 'bsolhsub;': '\u27c8', - 'bull;': '\u2022', - 'bullet;': '\u2022', - 'bump;': '\u224e', - 'bumpE;': '\u2aae', - 'bumpe;': '\u224f', - 'Bumpeq;': '\u224e', - 'bumpeq;': '\u224f', - 'Cacute;': '\u0106', - 'cacute;': '\u0107', - 'Cap;': '\u22d2', - 'cap;': '\u2229', - 'capand;': '\u2a44', - 'capbrcup;': '\u2a49', - 'capcap;': '\u2a4b', - 'capcup;': '\u2a47', - 'capdot;': '\u2a40', - 'CapitalDifferentialD;': '\u2145', - 'caps;': '\u2229\ufe00', - 'caret;': '\u2041', - 'caron;': '\u02c7', - 'Cayleys;': '\u212d', - 'ccaps;': '\u2a4d', - 'Ccaron;': '\u010c', - 'ccaron;': '\u010d', - 'Ccedil': '\xc7', - 'ccedil': '\xe7', - 'Ccedil;': '\xc7', - 'ccedil;': '\xe7', - 'Ccirc;': '\u0108', - 'ccirc;': '\u0109', - 'Cconint;': '\u2230', - 'ccups;': '\u2a4c', - 'ccupssm;': '\u2a50', - 'Cdot;': '\u010a', - 'cdot;': '\u010b', - 'cedil': '\xb8', - 'cedil;': '\xb8', - 'Cedilla;': '\xb8', - 'cemptyv;': '\u29b2', - 'cent': '\xa2', - 'cent;': '\xa2', - 'CenterDot;': '\xb7', - 'centerdot;': '\xb7', - 'Cfr;': '\u212d', - 'cfr;': '\U0001d520', - 'CHcy;': '\u0427', - 'chcy;': '\u0447', - 'check;': '\u2713', - 'checkmark;': '\u2713', - 'Chi;': '\u03a7', - 'chi;': '\u03c7', - 'cir;': '\u25cb', - 'circ;': '\u02c6', - 'circeq;': '\u2257', - 'circlearrowleft;': '\u21ba', - 'circlearrowright;': '\u21bb', - 'circledast;': '\u229b', - 'circledcirc;': '\u229a', - 'circleddash;': '\u229d', - 'CircleDot;': '\u2299', - 'circledR;': '\xae', - 'circledS;': '\u24c8', - 'CircleMinus;': '\u2296', - 'CirclePlus;': '\u2295', - 'CircleTimes;': '\u2297', - 'cirE;': '\u29c3', - 'cire;': '\u2257', - 'cirfnint;': '\u2a10', - 'cirmid;': '\u2aef', - 'cirscir;': '\u29c2', - 'ClockwiseContourIntegral;': '\u2232', - 'CloseCurlyDoubleQuote;': '\u201d', - 'CloseCurlyQuote;': '\u2019', - 'clubs;': '\u2663', - 'clubsuit;': '\u2663', - 'Colon;': '\u2237', - 'colon;': ':', - 'Colone;': '\u2a74', - 'colone;': '\u2254', - 'coloneq;': '\u2254', - 'comma;': ',', - 'commat;': '@', - 'comp;': '\u2201', - 'compfn;': '\u2218', - 'complement;': '\u2201', - 'complexes;': '\u2102', - 'cong;': '\u2245', - 'congdot;': '\u2a6d', - 'Congruent;': '\u2261', - 'Conint;': '\u222f', - 'conint;': '\u222e', - 'ContourIntegral;': '\u222e', - 'Copf;': '\u2102', - 'copf;': '\U0001d554', - 'coprod;': '\u2210', - 'Coproduct;': '\u2210', - 'COPY': '\xa9', - 'copy': '\xa9', - 'COPY;': '\xa9', - 'copy;': '\xa9', - 'copysr;': '\u2117', - 'CounterClockwiseContourIntegral;': '\u2233', - 'crarr;': '\u21b5', - 'Cross;': '\u2a2f', - 'cross;': '\u2717', - 'Cscr;': '\U0001d49e', - 'cscr;': '\U0001d4b8', - 'csub;': '\u2acf', - 'csube;': '\u2ad1', - 'csup;': '\u2ad0', - 'csupe;': '\u2ad2', - 'ctdot;': '\u22ef', - 'cudarrl;': '\u2938', - 'cudarrr;': '\u2935', - 'cuepr;': '\u22de', - 'cuesc;': '\u22df', - 'cularr;': '\u21b6', - 'cularrp;': '\u293d', - 'Cup;': '\u22d3', - 'cup;': '\u222a', - 'cupbrcap;': '\u2a48', - 'CupCap;': '\u224d', - 'cupcap;': '\u2a46', - 'cupcup;': '\u2a4a', - 'cupdot;': '\u228d', - 'cupor;': '\u2a45', - 'cups;': '\u222a\ufe00', - 'curarr;': '\u21b7', - 'curarrm;': '\u293c', - 'curlyeqprec;': '\u22de', - 'curlyeqsucc;': '\u22df', - 'curlyvee;': '\u22ce', - 'curlywedge;': '\u22cf', - 'curren': '\xa4', - 'curren;': '\xa4', - 'curvearrowleft;': '\u21b6', - 'curvearrowright;': '\u21b7', - 'cuvee;': '\u22ce', - 'cuwed;': '\u22cf', - 'cwconint;': '\u2232', - 'cwint;': '\u2231', - 'cylcty;': '\u232d', - 'Dagger;': '\u2021', - 'dagger;': '\u2020', - 'daleth;': '\u2138', - 'Darr;': '\u21a1', - 'dArr;': '\u21d3', - 'darr;': '\u2193', - 'dash;': '\u2010', - 'Dashv;': '\u2ae4', - 'dashv;': '\u22a3', - 'dbkarow;': '\u290f', - 'dblac;': '\u02dd', - 'Dcaron;': '\u010e', - 'dcaron;': '\u010f', - 'Dcy;': '\u0414', - 'dcy;': '\u0434', - 'DD;': '\u2145', - 'dd;': '\u2146', - 'ddagger;': '\u2021', - 'ddarr;': '\u21ca', - 'DDotrahd;': '\u2911', - 'ddotseq;': '\u2a77', - 'deg': '\xb0', - 'deg;': '\xb0', - 'Del;': '\u2207', - 'Delta;': '\u0394', - 'delta;': '\u03b4', - 'demptyv;': '\u29b1', - 'dfisht;': '\u297f', - 'Dfr;': '\U0001d507', - 'dfr;': '\U0001d521', - 'dHar;': '\u2965', - 'dharl;': '\u21c3', - 'dharr;': '\u21c2', - 'DiacriticalAcute;': '\xb4', - 'DiacriticalDot;': '\u02d9', - 'DiacriticalDoubleAcute;': '\u02dd', - 'DiacriticalGrave;': '`', - 'DiacriticalTilde;': '\u02dc', - 'diam;': '\u22c4', - 'Diamond;': '\u22c4', - 'diamond;': '\u22c4', - 'diamondsuit;': '\u2666', - 'diams;': '\u2666', - 'die;': '\xa8', - 'DifferentialD;': '\u2146', - 'digamma;': '\u03dd', - 'disin;': '\u22f2', - 'div;': '\xf7', - 'divide': '\xf7', - 'divide;': '\xf7', - 'divideontimes;': '\u22c7', - 'divonx;': '\u22c7', - 'DJcy;': '\u0402', - 'djcy;': '\u0452', - 'dlcorn;': '\u231e', - 'dlcrop;': '\u230d', - 'dollar;': '$', - 'Dopf;': '\U0001d53b', - 'dopf;': '\U0001d555', - 'Dot;': '\xa8', - 'dot;': '\u02d9', - 'DotDot;': '\u20dc', - 'doteq;': '\u2250', - 'doteqdot;': '\u2251', - 'DotEqual;': '\u2250', - 'dotminus;': '\u2238', - 'dotplus;': '\u2214', - 'dotsquare;': '\u22a1', - 'doublebarwedge;': '\u2306', - 'DoubleContourIntegral;': '\u222f', - 'DoubleDot;': '\xa8', - 'DoubleDownArrow;': '\u21d3', - 'DoubleLeftArrow;': '\u21d0', - 'DoubleLeftRightArrow;': '\u21d4', - 'DoubleLeftTee;': '\u2ae4', - 'DoubleLongLeftArrow;': '\u27f8', - 'DoubleLongLeftRightArrow;': '\u27fa', - 'DoubleLongRightArrow;': '\u27f9', - 'DoubleRightArrow;': '\u21d2', - 'DoubleRightTee;': '\u22a8', - 'DoubleUpArrow;': '\u21d1', - 'DoubleUpDownArrow;': '\u21d5', - 'DoubleVerticalBar;': '\u2225', - 'DownArrow;': '\u2193', - 'Downarrow;': '\u21d3', - 'downarrow;': '\u2193', - 'DownArrowBar;': '\u2913', - 'DownArrowUpArrow;': '\u21f5', - 'DownBreve;': '\u0311', - 'downdownarrows;': '\u21ca', - 'downharpoonleft;': '\u21c3', - 'downharpoonright;': '\u21c2', - 'DownLeftRightVector;': '\u2950', - 'DownLeftTeeVector;': '\u295e', - 'DownLeftVector;': '\u21bd', - 'DownLeftVectorBar;': '\u2956', - 'DownRightTeeVector;': '\u295f', - 'DownRightVector;': '\u21c1', - 'DownRightVectorBar;': '\u2957', - 'DownTee;': '\u22a4', - 'DownTeeArrow;': '\u21a7', - 'drbkarow;': '\u2910', - 'drcorn;': '\u231f', - 'drcrop;': '\u230c', - 'Dscr;': '\U0001d49f', - 'dscr;': '\U0001d4b9', - 'DScy;': '\u0405', - 'dscy;': '\u0455', - 'dsol;': '\u29f6', - 'Dstrok;': '\u0110', - 'dstrok;': '\u0111', - 'dtdot;': '\u22f1', - 'dtri;': '\u25bf', - 'dtrif;': '\u25be', - 'duarr;': '\u21f5', - 'duhar;': '\u296f', - 'dwangle;': '\u29a6', - 'DZcy;': '\u040f', - 'dzcy;': '\u045f', - 'dzigrarr;': '\u27ff', - 'Eacute': '\xc9', - 'eacute': '\xe9', - 'Eacute;': '\xc9', - 'eacute;': '\xe9', - 'easter;': '\u2a6e', - 'Ecaron;': '\u011a', - 'ecaron;': '\u011b', - 'ecir;': '\u2256', - 'Ecirc': '\xca', - 'ecirc': '\xea', - 'Ecirc;': '\xca', - 'ecirc;': '\xea', - 'ecolon;': '\u2255', - 'Ecy;': '\u042d', - 'ecy;': '\u044d', - 'eDDot;': '\u2a77', - 'Edot;': '\u0116', - 'eDot;': '\u2251', - 'edot;': '\u0117', - 'ee;': '\u2147', - 'efDot;': '\u2252', - 'Efr;': '\U0001d508', - 'efr;': '\U0001d522', - 'eg;': '\u2a9a', - 'Egrave': '\xc8', - 'egrave': '\xe8', - 'Egrave;': '\xc8', - 'egrave;': '\xe8', - 'egs;': '\u2a96', - 'egsdot;': '\u2a98', - 'el;': '\u2a99', - 'Element;': '\u2208', - 'elinters;': '\u23e7', - 'ell;': '\u2113', - 'els;': '\u2a95', - 'elsdot;': '\u2a97', - 'Emacr;': '\u0112', - 'emacr;': '\u0113', - 'empty;': '\u2205', - 'emptyset;': '\u2205', - 'EmptySmallSquare;': '\u25fb', - 'emptyv;': '\u2205', - 'EmptyVerySmallSquare;': '\u25ab', - 'emsp13;': '\u2004', - 'emsp14;': '\u2005', - 'emsp;': '\u2003', - 'ENG;': '\u014a', - 'eng;': '\u014b', - 'ensp;': '\u2002', - 'Eogon;': '\u0118', - 'eogon;': '\u0119', - 'Eopf;': '\U0001d53c', - 'eopf;': '\U0001d556', - 'epar;': '\u22d5', - 'eparsl;': '\u29e3', - 'eplus;': '\u2a71', - 'epsi;': '\u03b5', - 'Epsilon;': '\u0395', - 'epsilon;': '\u03b5', - 'epsiv;': '\u03f5', - 'eqcirc;': '\u2256', - 'eqcolon;': '\u2255', - 'eqsim;': '\u2242', - 'eqslantgtr;': '\u2a96', - 'eqslantless;': '\u2a95', - 'Equal;': '\u2a75', - 'equals;': '=', - 'EqualTilde;': '\u2242', - 'equest;': '\u225f', - 'Equilibrium;': '\u21cc', - 'equiv;': '\u2261', - 'equivDD;': '\u2a78', - 'eqvparsl;': '\u29e5', - 'erarr;': '\u2971', - 'erDot;': '\u2253', - 'Escr;': '\u2130', - 'escr;': '\u212f', - 'esdot;': '\u2250', - 'Esim;': '\u2a73', - 'esim;': '\u2242', - 'Eta;': '\u0397', - 'eta;': '\u03b7', - 'ETH': '\xd0', - 'eth': '\xf0', - 'ETH;': '\xd0', - 'eth;': '\xf0', - 'Euml': '\xcb', - 'euml': '\xeb', - 'Euml;': '\xcb', - 'euml;': '\xeb', - 'euro;': '\u20ac', - 'excl;': '!', - 'exist;': '\u2203', - 'Exists;': '\u2203', - 'expectation;': '\u2130', - 'ExponentialE;': '\u2147', - 'exponentiale;': '\u2147', - 'fallingdotseq;': '\u2252', - 'Fcy;': '\u0424', - 'fcy;': '\u0444', - 'female;': '\u2640', - 'ffilig;': '\ufb03', - 'fflig;': '\ufb00', - 'ffllig;': '\ufb04', - 'Ffr;': '\U0001d509', - 'ffr;': '\U0001d523', - 'filig;': '\ufb01', - 'FilledSmallSquare;': '\u25fc', - 'FilledVerySmallSquare;': '\u25aa', - 'fjlig;': 'fj', - 'flat;': '\u266d', - 'fllig;': '\ufb02', - 'fltns;': '\u25b1', - 'fnof;': '\u0192', - 'Fopf;': '\U0001d53d', - 'fopf;': '\U0001d557', - 'ForAll;': '\u2200', - 'forall;': '\u2200', - 'fork;': '\u22d4', - 'forkv;': '\u2ad9', - 'Fouriertrf;': '\u2131', - 'fpartint;': '\u2a0d', - 'frac12': '\xbd', - 'frac12;': '\xbd', - 'frac13;': '\u2153', - 'frac14': '\xbc', - 'frac14;': '\xbc', - 'frac15;': '\u2155', - 'frac16;': '\u2159', - 'frac18;': '\u215b', - 'frac23;': '\u2154', - 'frac25;': '\u2156', - 'frac34': '\xbe', - 'frac34;': '\xbe', - 'frac35;': '\u2157', - 'frac38;': '\u215c', - 'frac45;': '\u2158', - 'frac56;': '\u215a', - 'frac58;': '\u215d', - 'frac78;': '\u215e', - 'frasl;': '\u2044', - 'frown;': '\u2322', - 'Fscr;': '\u2131', - 'fscr;': '\U0001d4bb', - 'gacute;': '\u01f5', - 'Gamma;': '\u0393', - 'gamma;': '\u03b3', - 'Gammad;': '\u03dc', - 'gammad;': '\u03dd', - 'gap;': '\u2a86', - 'Gbreve;': '\u011e', - 'gbreve;': '\u011f', - 'Gcedil;': '\u0122', - 'Gcirc;': '\u011c', - 'gcirc;': '\u011d', - 'Gcy;': '\u0413', - 'gcy;': '\u0433', - 'Gdot;': '\u0120', - 'gdot;': '\u0121', - 'gE;': '\u2267', - 'ge;': '\u2265', - 'gEl;': '\u2a8c', - 'gel;': '\u22db', - 'geq;': '\u2265', - 'geqq;': '\u2267', - 'geqslant;': '\u2a7e', - 'ges;': '\u2a7e', - 'gescc;': '\u2aa9', - 'gesdot;': '\u2a80', - 'gesdoto;': '\u2a82', - 'gesdotol;': '\u2a84', - 'gesl;': '\u22db\ufe00', - 'gesles;': '\u2a94', - 'Gfr;': '\U0001d50a', - 'gfr;': '\U0001d524', - 'Gg;': '\u22d9', - 'gg;': '\u226b', - 'ggg;': '\u22d9', - 'gimel;': '\u2137', - 'GJcy;': '\u0403', - 'gjcy;': '\u0453', - 'gl;': '\u2277', - 'gla;': '\u2aa5', - 'glE;': '\u2a92', - 'glj;': '\u2aa4', - 'gnap;': '\u2a8a', - 'gnapprox;': '\u2a8a', - 'gnE;': '\u2269', - 'gne;': '\u2a88', - 'gneq;': '\u2a88', - 'gneqq;': '\u2269', - 'gnsim;': '\u22e7', - 'Gopf;': '\U0001d53e', - 'gopf;': '\U0001d558', - 'grave;': '`', - 'GreaterEqual;': '\u2265', - 'GreaterEqualLess;': '\u22db', - 'GreaterFullEqual;': '\u2267', - 'GreaterGreater;': '\u2aa2', - 'GreaterLess;': '\u2277', - 'GreaterSlantEqual;': '\u2a7e', - 'GreaterTilde;': '\u2273', - 'Gscr;': '\U0001d4a2', - 'gscr;': '\u210a', - 'gsim;': '\u2273', - 'gsime;': '\u2a8e', - 'gsiml;': '\u2a90', - 'GT': '>', - 'gt': '>', - 'GT;': '>', - 'Gt;': '\u226b', - 'gt;': '>', - 'gtcc;': '\u2aa7', - 'gtcir;': '\u2a7a', - 'gtdot;': '\u22d7', - 'gtlPar;': '\u2995', - 'gtquest;': '\u2a7c', - 'gtrapprox;': '\u2a86', - 'gtrarr;': '\u2978', - 'gtrdot;': '\u22d7', - 'gtreqless;': '\u22db', - 'gtreqqless;': '\u2a8c', - 'gtrless;': '\u2277', - 'gtrsim;': '\u2273', - 'gvertneqq;': '\u2269\ufe00', - 'gvnE;': '\u2269\ufe00', - 'Hacek;': '\u02c7', - 'hairsp;': '\u200a', - 'half;': '\xbd', - 'hamilt;': '\u210b', - 'HARDcy;': '\u042a', - 'hardcy;': '\u044a', - 'hArr;': '\u21d4', - 'harr;': '\u2194', - 'harrcir;': '\u2948', - 'harrw;': '\u21ad', - 'Hat;': '^', - 'hbar;': '\u210f', - 'Hcirc;': '\u0124', - 'hcirc;': '\u0125', - 'hearts;': '\u2665', - 'heartsuit;': '\u2665', - 'hellip;': '\u2026', - 'hercon;': '\u22b9', - 'Hfr;': '\u210c', - 'hfr;': '\U0001d525', - 'HilbertSpace;': '\u210b', - 'hksearow;': '\u2925', - 'hkswarow;': '\u2926', - 'hoarr;': '\u21ff', - 'homtht;': '\u223b', - 'hookleftarrow;': '\u21a9', - 'hookrightarrow;': '\u21aa', - 'Hopf;': '\u210d', - 'hopf;': '\U0001d559', - 'horbar;': '\u2015', - 'HorizontalLine;': '\u2500', - 'Hscr;': '\u210b', - 'hscr;': '\U0001d4bd', - 'hslash;': '\u210f', - 'Hstrok;': '\u0126', - 'hstrok;': '\u0127', - 'HumpDownHump;': '\u224e', - 'HumpEqual;': '\u224f', - 'hybull;': '\u2043', - 'hyphen;': '\u2010', - 'Iacute': '\xcd', - 'iacute': '\xed', - 'Iacute;': '\xcd', - 'iacute;': '\xed', - 'ic;': '\u2063', - 'Icirc': '\xce', - 'icirc': '\xee', - 'Icirc;': '\xce', - 'icirc;': '\xee', - 'Icy;': '\u0418', - 'icy;': '\u0438', - 'Idot;': '\u0130', - 'IEcy;': '\u0415', - 'iecy;': '\u0435', - 'iexcl': '\xa1', - 'iexcl;': '\xa1', - 'iff;': '\u21d4', - 'Ifr;': '\u2111', - 'ifr;': '\U0001d526', - 'Igrave': '\xcc', - 'igrave': '\xec', - 'Igrave;': '\xcc', - 'igrave;': '\xec', - 'ii;': '\u2148', - 'iiiint;': '\u2a0c', - 'iiint;': '\u222d', - 'iinfin;': '\u29dc', - 'iiota;': '\u2129', - 'IJlig;': '\u0132', - 'ijlig;': '\u0133', - 'Im;': '\u2111', - 'Imacr;': '\u012a', - 'imacr;': '\u012b', - 'image;': '\u2111', - 'ImaginaryI;': '\u2148', - 'imagline;': '\u2110', - 'imagpart;': '\u2111', - 'imath;': '\u0131', - 'imof;': '\u22b7', - 'imped;': '\u01b5', - 'Implies;': '\u21d2', - 'in;': '\u2208', - 'incare;': '\u2105', - 'infin;': '\u221e', - 'infintie;': '\u29dd', - 'inodot;': '\u0131', - 'Int;': '\u222c', - 'int;': '\u222b', - 'intcal;': '\u22ba', - 'integers;': '\u2124', - 'Integral;': '\u222b', - 'intercal;': '\u22ba', - 'Intersection;': '\u22c2', - 'intlarhk;': '\u2a17', - 'intprod;': '\u2a3c', - 'InvisibleComma;': '\u2063', - 'InvisibleTimes;': '\u2062', - 'IOcy;': '\u0401', - 'iocy;': '\u0451', - 'Iogon;': '\u012e', - 'iogon;': '\u012f', - 'Iopf;': '\U0001d540', - 'iopf;': '\U0001d55a', - 'Iota;': '\u0399', - 'iota;': '\u03b9', - 'iprod;': '\u2a3c', - 'iquest': '\xbf', - 'iquest;': '\xbf', - 'Iscr;': '\u2110', - 'iscr;': '\U0001d4be', - 'isin;': '\u2208', - 'isindot;': '\u22f5', - 'isinE;': '\u22f9', - 'isins;': '\u22f4', - 'isinsv;': '\u22f3', - 'isinv;': '\u2208', - 'it;': '\u2062', - 'Itilde;': '\u0128', - 'itilde;': '\u0129', - 'Iukcy;': '\u0406', - 'iukcy;': '\u0456', - 'Iuml': '\xcf', - 'iuml': '\xef', - 'Iuml;': '\xcf', - 'iuml;': '\xef', - 'Jcirc;': '\u0134', - 'jcirc;': '\u0135', - 'Jcy;': '\u0419', - 'jcy;': '\u0439', - 'Jfr;': '\U0001d50d', - 'jfr;': '\U0001d527', - 'jmath;': '\u0237', - 'Jopf;': '\U0001d541', - 'jopf;': '\U0001d55b', - 'Jscr;': '\U0001d4a5', - 'jscr;': '\U0001d4bf', - 'Jsercy;': '\u0408', - 'jsercy;': '\u0458', - 'Jukcy;': '\u0404', - 'jukcy;': '\u0454', - 'Kappa;': '\u039a', - 'kappa;': '\u03ba', - 'kappav;': '\u03f0', - 'Kcedil;': '\u0136', - 'kcedil;': '\u0137', - 'Kcy;': '\u041a', - 'kcy;': '\u043a', - 'Kfr;': '\U0001d50e', - 'kfr;': '\U0001d528', - 'kgreen;': '\u0138', - 'KHcy;': '\u0425', - 'khcy;': '\u0445', - 'KJcy;': '\u040c', - 'kjcy;': '\u045c', - 'Kopf;': '\U0001d542', - 'kopf;': '\U0001d55c', - 'Kscr;': '\U0001d4a6', - 'kscr;': '\U0001d4c0', - 'lAarr;': '\u21da', - 'Lacute;': '\u0139', - 'lacute;': '\u013a', - 'laemptyv;': '\u29b4', - 'lagran;': '\u2112', - 'Lambda;': '\u039b', - 'lambda;': '\u03bb', - 'Lang;': '\u27ea', - 'lang;': '\u27e8', - 'langd;': '\u2991', - 'langle;': '\u27e8', - 'lap;': '\u2a85', - 'Laplacetrf;': '\u2112', - 'laquo': '\xab', - 'laquo;': '\xab', - 'Larr;': '\u219e', - 'lArr;': '\u21d0', - 'larr;': '\u2190', - 'larrb;': '\u21e4', - 'larrbfs;': '\u291f', - 'larrfs;': '\u291d', - 'larrhk;': '\u21a9', - 'larrlp;': '\u21ab', - 'larrpl;': '\u2939', - 'larrsim;': '\u2973', - 'larrtl;': '\u21a2', - 'lat;': '\u2aab', - 'lAtail;': '\u291b', - 'latail;': '\u2919', - 'late;': '\u2aad', - 'lates;': '\u2aad\ufe00', - 'lBarr;': '\u290e', - 'lbarr;': '\u290c', - 'lbbrk;': '\u2772', - 'lbrace;': '{', - 'lbrack;': '[', - 'lbrke;': '\u298b', - 'lbrksld;': '\u298f', - 'lbrkslu;': '\u298d', - 'Lcaron;': '\u013d', - 'lcaron;': '\u013e', - 'Lcedil;': '\u013b', - 'lcedil;': '\u013c', - 'lceil;': '\u2308', - 'lcub;': '{', - 'Lcy;': '\u041b', - 'lcy;': '\u043b', - 'ldca;': '\u2936', - 'ldquo;': '\u201c', - 'ldquor;': '\u201e', - 'ldrdhar;': '\u2967', - 'ldrushar;': '\u294b', - 'ldsh;': '\u21b2', - 'lE;': '\u2266', - 'le;': '\u2264', - 'LeftAngleBracket;': '\u27e8', - 'LeftArrow;': '\u2190', - 'Leftarrow;': '\u21d0', - 'leftarrow;': '\u2190', - 'LeftArrowBar;': '\u21e4', - 'LeftArrowRightArrow;': '\u21c6', - 'leftarrowtail;': '\u21a2', - 'LeftCeiling;': '\u2308', - 'LeftDoubleBracket;': '\u27e6', - 'LeftDownTeeVector;': '\u2961', - 'LeftDownVector;': '\u21c3', - 'LeftDownVectorBar;': '\u2959', - 'LeftFloor;': '\u230a', - 'leftharpoondown;': '\u21bd', - 'leftharpoonup;': '\u21bc', - 'leftleftarrows;': '\u21c7', - 'LeftRightArrow;': '\u2194', - 'Leftrightarrow;': '\u21d4', - 'leftrightarrow;': '\u2194', - 'leftrightarrows;': '\u21c6', - 'leftrightharpoons;': '\u21cb', - 'leftrightsquigarrow;': '\u21ad', - 'LeftRightVector;': '\u294e', - 'LeftTee;': '\u22a3', - 'LeftTeeArrow;': '\u21a4', - 'LeftTeeVector;': '\u295a', - 'leftthreetimes;': '\u22cb', - 'LeftTriangle;': '\u22b2', - 'LeftTriangleBar;': '\u29cf', - 'LeftTriangleEqual;': '\u22b4', - 'LeftUpDownVector;': '\u2951', - 'LeftUpTeeVector;': '\u2960', - 'LeftUpVector;': '\u21bf', - 'LeftUpVectorBar;': '\u2958', - 'LeftVector;': '\u21bc', - 'LeftVectorBar;': '\u2952', - 'lEg;': '\u2a8b', - 'leg;': '\u22da', - 'leq;': '\u2264', - 'leqq;': '\u2266', - 'leqslant;': '\u2a7d', - 'les;': '\u2a7d', - 'lescc;': '\u2aa8', - 'lesdot;': '\u2a7f', - 'lesdoto;': '\u2a81', - 'lesdotor;': '\u2a83', - 'lesg;': '\u22da\ufe00', - 'lesges;': '\u2a93', - 'lessapprox;': '\u2a85', - 'lessdot;': '\u22d6', - 'lesseqgtr;': '\u22da', - 'lesseqqgtr;': '\u2a8b', - 'LessEqualGreater;': '\u22da', - 'LessFullEqual;': '\u2266', - 'LessGreater;': '\u2276', - 'lessgtr;': '\u2276', - 'LessLess;': '\u2aa1', - 'lesssim;': '\u2272', - 'LessSlantEqual;': '\u2a7d', - 'LessTilde;': '\u2272', - 'lfisht;': '\u297c', - 'lfloor;': '\u230a', - 'Lfr;': '\U0001d50f', - 'lfr;': '\U0001d529', - 'lg;': '\u2276', - 'lgE;': '\u2a91', - 'lHar;': '\u2962', - 'lhard;': '\u21bd', - 'lharu;': '\u21bc', - 'lharul;': '\u296a', - 'lhblk;': '\u2584', - 'LJcy;': '\u0409', - 'ljcy;': '\u0459', - 'Ll;': '\u22d8', - 'll;': '\u226a', - 'llarr;': '\u21c7', - 'llcorner;': '\u231e', - 'Lleftarrow;': '\u21da', - 'llhard;': '\u296b', - 'lltri;': '\u25fa', - 'Lmidot;': '\u013f', - 'lmidot;': '\u0140', - 'lmoust;': '\u23b0', - 'lmoustache;': '\u23b0', - 'lnap;': '\u2a89', - 'lnapprox;': '\u2a89', - 'lnE;': '\u2268', - 'lne;': '\u2a87', - 'lneq;': '\u2a87', - 'lneqq;': '\u2268', - 'lnsim;': '\u22e6', - 'loang;': '\u27ec', - 'loarr;': '\u21fd', - 'lobrk;': '\u27e6', - 'LongLeftArrow;': '\u27f5', - 'Longleftarrow;': '\u27f8', - 'longleftarrow;': '\u27f5', - 'LongLeftRightArrow;': '\u27f7', - 'Longleftrightarrow;': '\u27fa', - 'longleftrightarrow;': '\u27f7', - 'longmapsto;': '\u27fc', - 'LongRightArrow;': '\u27f6', - 'Longrightarrow;': '\u27f9', - 'longrightarrow;': '\u27f6', - 'looparrowleft;': '\u21ab', - 'looparrowright;': '\u21ac', - 'lopar;': '\u2985', - 'Lopf;': '\U0001d543', - 'lopf;': '\U0001d55d', - 'loplus;': '\u2a2d', - 'lotimes;': '\u2a34', - 'lowast;': '\u2217', - 'lowbar;': '_', - 'LowerLeftArrow;': '\u2199', - 'LowerRightArrow;': '\u2198', - 'loz;': '\u25ca', - 'lozenge;': '\u25ca', - 'lozf;': '\u29eb', - 'lpar;': '(', - 'lparlt;': '\u2993', - 'lrarr;': '\u21c6', - 'lrcorner;': '\u231f', - 'lrhar;': '\u21cb', - 'lrhard;': '\u296d', - 'lrm;': '\u200e', - 'lrtri;': '\u22bf', - 'lsaquo;': '\u2039', - 'Lscr;': '\u2112', - 'lscr;': '\U0001d4c1', - 'Lsh;': '\u21b0', - 'lsh;': '\u21b0', - 'lsim;': '\u2272', - 'lsime;': '\u2a8d', - 'lsimg;': '\u2a8f', - 'lsqb;': '[', - 'lsquo;': '\u2018', - 'lsquor;': '\u201a', - 'Lstrok;': '\u0141', - 'lstrok;': '\u0142', - 'LT': '<', - 'lt': '<', - 'LT;': '<', - 'Lt;': '\u226a', - 'lt;': '<', - 'ltcc;': '\u2aa6', - 'ltcir;': '\u2a79', - 'ltdot;': '\u22d6', - 'lthree;': '\u22cb', - 'ltimes;': '\u22c9', - 'ltlarr;': '\u2976', - 'ltquest;': '\u2a7b', - 'ltri;': '\u25c3', - 'ltrie;': '\u22b4', - 'ltrif;': '\u25c2', - 'ltrPar;': '\u2996', - 'lurdshar;': '\u294a', - 'luruhar;': '\u2966', - 'lvertneqq;': '\u2268\ufe00', - 'lvnE;': '\u2268\ufe00', - 'macr': '\xaf', - 'macr;': '\xaf', - 'male;': '\u2642', - 'malt;': '\u2720', - 'maltese;': '\u2720', - 'Map;': '\u2905', - 'map;': '\u21a6', - 'mapsto;': '\u21a6', - 'mapstodown;': '\u21a7', - 'mapstoleft;': '\u21a4', - 'mapstoup;': '\u21a5', - 'marker;': '\u25ae', - 'mcomma;': '\u2a29', - 'Mcy;': '\u041c', - 'mcy;': '\u043c', - 'mdash;': '\u2014', - 'mDDot;': '\u223a', - 'measuredangle;': '\u2221', - 'MediumSpace;': '\u205f', - 'Mellintrf;': '\u2133', - 'Mfr;': '\U0001d510', - 'mfr;': '\U0001d52a', - 'mho;': '\u2127', - 'micro': '\xb5', - 'micro;': '\xb5', - 'mid;': '\u2223', - 'midast;': '*', - 'midcir;': '\u2af0', - 'middot': '\xb7', - 'middot;': '\xb7', - 'minus;': '\u2212', - 'minusb;': '\u229f', - 'minusd;': '\u2238', - 'minusdu;': '\u2a2a', - 'MinusPlus;': '\u2213', - 'mlcp;': '\u2adb', - 'mldr;': '\u2026', - 'mnplus;': '\u2213', - 'models;': '\u22a7', - 'Mopf;': '\U0001d544', - 'mopf;': '\U0001d55e', - 'mp;': '\u2213', - 'Mscr;': '\u2133', - 'mscr;': '\U0001d4c2', - 'mstpos;': '\u223e', - 'Mu;': '\u039c', - 'mu;': '\u03bc', - 'multimap;': '\u22b8', - 'mumap;': '\u22b8', - 'nabla;': '\u2207', - 'Nacute;': '\u0143', - 'nacute;': '\u0144', - 'nang;': '\u2220\u20d2', - 'nap;': '\u2249', - 'napE;': '\u2a70\u0338', - 'napid;': '\u224b\u0338', - 'napos;': '\u0149', - 'napprox;': '\u2249', - 'natur;': '\u266e', - 'natural;': '\u266e', - 'naturals;': '\u2115', - 'nbsp': '\xa0', - 'nbsp;': '\xa0', - 'nbump;': '\u224e\u0338', - 'nbumpe;': '\u224f\u0338', - 'ncap;': '\u2a43', - 'Ncaron;': '\u0147', - 'ncaron;': '\u0148', - 'Ncedil;': '\u0145', - 'ncedil;': '\u0146', - 'ncong;': '\u2247', - 'ncongdot;': '\u2a6d\u0338', - 'ncup;': '\u2a42', - 'Ncy;': '\u041d', - 'ncy;': '\u043d', - 'ndash;': '\u2013', - 'ne;': '\u2260', - 'nearhk;': '\u2924', - 'neArr;': '\u21d7', - 'nearr;': '\u2197', - 'nearrow;': '\u2197', - 'nedot;': '\u2250\u0338', - 'NegativeMediumSpace;': '\u200b', - 'NegativeThickSpace;': '\u200b', - 'NegativeThinSpace;': '\u200b', - 'NegativeVeryThinSpace;': '\u200b', - 'nequiv;': '\u2262', - 'nesear;': '\u2928', - 'nesim;': '\u2242\u0338', - 'NestedGreaterGreater;': '\u226b', - 'NestedLessLess;': '\u226a', - 'NewLine;': '\n', - 'nexist;': '\u2204', - 'nexists;': '\u2204', - 'Nfr;': '\U0001d511', - 'nfr;': '\U0001d52b', - 'ngE;': '\u2267\u0338', - 'nge;': '\u2271', - 'ngeq;': '\u2271', - 'ngeqq;': '\u2267\u0338', - 'ngeqslant;': '\u2a7e\u0338', - 'nges;': '\u2a7e\u0338', - 'nGg;': '\u22d9\u0338', - 'ngsim;': '\u2275', - 'nGt;': '\u226b\u20d2', - 'ngt;': '\u226f', - 'ngtr;': '\u226f', - 'nGtv;': '\u226b\u0338', - 'nhArr;': '\u21ce', - 'nharr;': '\u21ae', - 'nhpar;': '\u2af2', - 'ni;': '\u220b', - 'nis;': '\u22fc', - 'nisd;': '\u22fa', - 'niv;': '\u220b', - 'NJcy;': '\u040a', - 'njcy;': '\u045a', - 'nlArr;': '\u21cd', - 'nlarr;': '\u219a', - 'nldr;': '\u2025', - 'nlE;': '\u2266\u0338', - 'nle;': '\u2270', - 'nLeftarrow;': '\u21cd', - 'nleftarrow;': '\u219a', - 'nLeftrightarrow;': '\u21ce', - 'nleftrightarrow;': '\u21ae', - 'nleq;': '\u2270', - 'nleqq;': '\u2266\u0338', - 'nleqslant;': '\u2a7d\u0338', - 'nles;': '\u2a7d\u0338', - 'nless;': '\u226e', - 'nLl;': '\u22d8\u0338', - 'nlsim;': '\u2274', - 'nLt;': '\u226a\u20d2', - 'nlt;': '\u226e', - 'nltri;': '\u22ea', - 'nltrie;': '\u22ec', - 'nLtv;': '\u226a\u0338', - 'nmid;': '\u2224', - 'NoBreak;': '\u2060', - 'NonBreakingSpace;': '\xa0', - 'Nopf;': '\u2115', - 'nopf;': '\U0001d55f', - 'not': '\xac', - 'Not;': '\u2aec', - 'not;': '\xac', - 'NotCongruent;': '\u2262', - 'NotCupCap;': '\u226d', - 'NotDoubleVerticalBar;': '\u2226', - 'NotElement;': '\u2209', - 'NotEqual;': '\u2260', - 'NotEqualTilde;': '\u2242\u0338', - 'NotExists;': '\u2204', - 'NotGreater;': '\u226f', - 'NotGreaterEqual;': '\u2271', - 'NotGreaterFullEqual;': '\u2267\u0338', - 'NotGreaterGreater;': '\u226b\u0338', - 'NotGreaterLess;': '\u2279', - 'NotGreaterSlantEqual;': '\u2a7e\u0338', - 'NotGreaterTilde;': '\u2275', - 'NotHumpDownHump;': '\u224e\u0338', - 'NotHumpEqual;': '\u224f\u0338', - 'notin;': '\u2209', - 'notindot;': '\u22f5\u0338', - 'notinE;': '\u22f9\u0338', - 'notinva;': '\u2209', - 'notinvb;': '\u22f7', - 'notinvc;': '\u22f6', - 'NotLeftTriangle;': '\u22ea', - 'NotLeftTriangleBar;': '\u29cf\u0338', - 'NotLeftTriangleEqual;': '\u22ec', - 'NotLess;': '\u226e', - 'NotLessEqual;': '\u2270', - 'NotLessGreater;': '\u2278', - 'NotLessLess;': '\u226a\u0338', - 'NotLessSlantEqual;': '\u2a7d\u0338', - 'NotLessTilde;': '\u2274', - 'NotNestedGreaterGreater;': '\u2aa2\u0338', - 'NotNestedLessLess;': '\u2aa1\u0338', - 'notni;': '\u220c', - 'notniva;': '\u220c', - 'notnivb;': '\u22fe', - 'notnivc;': '\u22fd', - 'NotPrecedes;': '\u2280', - 'NotPrecedesEqual;': '\u2aaf\u0338', - 'NotPrecedesSlantEqual;': '\u22e0', - 'NotReverseElement;': '\u220c', - 'NotRightTriangle;': '\u22eb', - 'NotRightTriangleBar;': '\u29d0\u0338', - 'NotRightTriangleEqual;': '\u22ed', - 'NotSquareSubset;': '\u228f\u0338', - 'NotSquareSubsetEqual;': '\u22e2', - 'NotSquareSuperset;': '\u2290\u0338', - 'NotSquareSupersetEqual;': '\u22e3', - 'NotSubset;': '\u2282\u20d2', - 'NotSubsetEqual;': '\u2288', - 'NotSucceeds;': '\u2281', - 'NotSucceedsEqual;': '\u2ab0\u0338', - 'NotSucceedsSlantEqual;': '\u22e1', - 'NotSucceedsTilde;': '\u227f\u0338', - 'NotSuperset;': '\u2283\u20d2', - 'NotSupersetEqual;': '\u2289', - 'NotTilde;': '\u2241', - 'NotTildeEqual;': '\u2244', - 'NotTildeFullEqual;': '\u2247', - 'NotTildeTilde;': '\u2249', - 'NotVerticalBar;': '\u2224', - 'npar;': '\u2226', - 'nparallel;': '\u2226', - 'nparsl;': '\u2afd\u20e5', - 'npart;': '\u2202\u0338', - 'npolint;': '\u2a14', - 'npr;': '\u2280', - 'nprcue;': '\u22e0', - 'npre;': '\u2aaf\u0338', - 'nprec;': '\u2280', - 'npreceq;': '\u2aaf\u0338', - 'nrArr;': '\u21cf', - 'nrarr;': '\u219b', - 'nrarrc;': '\u2933\u0338', - 'nrarrw;': '\u219d\u0338', - 'nRightarrow;': '\u21cf', - 'nrightarrow;': '\u219b', - 'nrtri;': '\u22eb', - 'nrtrie;': '\u22ed', - 'nsc;': '\u2281', - 'nsccue;': '\u22e1', - 'nsce;': '\u2ab0\u0338', - 'Nscr;': '\U0001d4a9', - 'nscr;': '\U0001d4c3', - 'nshortmid;': '\u2224', - 'nshortparallel;': '\u2226', - 'nsim;': '\u2241', - 'nsime;': '\u2244', - 'nsimeq;': '\u2244', - 'nsmid;': '\u2224', - 'nspar;': '\u2226', - 'nsqsube;': '\u22e2', - 'nsqsupe;': '\u22e3', - 'nsub;': '\u2284', - 'nsubE;': '\u2ac5\u0338', - 'nsube;': '\u2288', - 'nsubset;': '\u2282\u20d2', - 'nsubseteq;': '\u2288', - 'nsubseteqq;': '\u2ac5\u0338', - 'nsucc;': '\u2281', - 'nsucceq;': '\u2ab0\u0338', - 'nsup;': '\u2285', - 'nsupE;': '\u2ac6\u0338', - 'nsupe;': '\u2289', - 'nsupset;': '\u2283\u20d2', - 'nsupseteq;': '\u2289', - 'nsupseteqq;': '\u2ac6\u0338', - 'ntgl;': '\u2279', - 'Ntilde': '\xd1', - 'ntilde': '\xf1', - 'Ntilde;': '\xd1', - 'ntilde;': '\xf1', - 'ntlg;': '\u2278', - 'ntriangleleft;': '\u22ea', - 'ntrianglelefteq;': '\u22ec', - 'ntriangleright;': '\u22eb', - 'ntrianglerighteq;': '\u22ed', - 'Nu;': '\u039d', - 'nu;': '\u03bd', - 'num;': '#', - 'numero;': '\u2116', - 'numsp;': '\u2007', - 'nvap;': '\u224d\u20d2', - 'nVDash;': '\u22af', - 'nVdash;': '\u22ae', - 'nvDash;': '\u22ad', - 'nvdash;': '\u22ac', - 'nvge;': '\u2265\u20d2', - 'nvgt;': '>\u20d2', - 'nvHarr;': '\u2904', - 'nvinfin;': '\u29de', - 'nvlArr;': '\u2902', - 'nvle;': '\u2264\u20d2', - 'nvlt;': '<\u20d2', - 'nvltrie;': '\u22b4\u20d2', - 'nvrArr;': '\u2903', - 'nvrtrie;': '\u22b5\u20d2', - 'nvsim;': '\u223c\u20d2', - 'nwarhk;': '\u2923', - 'nwArr;': '\u21d6', - 'nwarr;': '\u2196', - 'nwarrow;': '\u2196', - 'nwnear;': '\u2927', - 'Oacute': '\xd3', - 'oacute': '\xf3', - 'Oacute;': '\xd3', - 'oacute;': '\xf3', - 'oast;': '\u229b', - 'ocir;': '\u229a', - 'Ocirc': '\xd4', - 'ocirc': '\xf4', - 'Ocirc;': '\xd4', - 'ocirc;': '\xf4', - 'Ocy;': '\u041e', - 'ocy;': '\u043e', - 'odash;': '\u229d', - 'Odblac;': '\u0150', - 'odblac;': '\u0151', - 'odiv;': '\u2a38', - 'odot;': '\u2299', - 'odsold;': '\u29bc', - 'OElig;': '\u0152', - 'oelig;': '\u0153', - 'ofcir;': '\u29bf', - 'Ofr;': '\U0001d512', - 'ofr;': '\U0001d52c', - 'ogon;': '\u02db', - 'Ograve': '\xd2', - 'ograve': '\xf2', - 'Ograve;': '\xd2', - 'ograve;': '\xf2', - 'ogt;': '\u29c1', - 'ohbar;': '\u29b5', - 'ohm;': '\u03a9', - 'oint;': '\u222e', - 'olarr;': '\u21ba', - 'olcir;': '\u29be', - 'olcross;': '\u29bb', - 'oline;': '\u203e', - 'olt;': '\u29c0', - 'Omacr;': '\u014c', - 'omacr;': '\u014d', - 'Omega;': '\u03a9', - 'omega;': '\u03c9', - 'Omicron;': '\u039f', - 'omicron;': '\u03bf', - 'omid;': '\u29b6', - 'ominus;': '\u2296', - 'Oopf;': '\U0001d546', - 'oopf;': '\U0001d560', - 'opar;': '\u29b7', - 'OpenCurlyDoubleQuote;': '\u201c', - 'OpenCurlyQuote;': '\u2018', - 'operp;': '\u29b9', - 'oplus;': '\u2295', - 'Or;': '\u2a54', - 'or;': '\u2228', - 'orarr;': '\u21bb', - 'ord;': '\u2a5d', - 'order;': '\u2134', - 'orderof;': '\u2134', - 'ordf': '\xaa', - 'ordf;': '\xaa', - 'ordm': '\xba', - 'ordm;': '\xba', - 'origof;': '\u22b6', - 'oror;': '\u2a56', - 'orslope;': '\u2a57', - 'orv;': '\u2a5b', - 'oS;': '\u24c8', - 'Oscr;': '\U0001d4aa', - 'oscr;': '\u2134', - 'Oslash': '\xd8', - 'oslash': '\xf8', - 'Oslash;': '\xd8', - 'oslash;': '\xf8', - 'osol;': '\u2298', - 'Otilde': '\xd5', - 'otilde': '\xf5', - 'Otilde;': '\xd5', - 'otilde;': '\xf5', - 'Otimes;': '\u2a37', - 'otimes;': '\u2297', - 'otimesas;': '\u2a36', - 'Ouml': '\xd6', - 'ouml': '\xf6', - 'Ouml;': '\xd6', - 'ouml;': '\xf6', - 'ovbar;': '\u233d', - 'OverBar;': '\u203e', - 'OverBrace;': '\u23de', - 'OverBracket;': '\u23b4', - 'OverParenthesis;': '\u23dc', - 'par;': '\u2225', - 'para': '\xb6', - 'para;': '\xb6', - 'parallel;': '\u2225', - 'parsim;': '\u2af3', - 'parsl;': '\u2afd', - 'part;': '\u2202', - 'PartialD;': '\u2202', - 'Pcy;': '\u041f', - 'pcy;': '\u043f', - 'percnt;': '%', - 'period;': '.', - 'permil;': '\u2030', - 'perp;': '\u22a5', - 'pertenk;': '\u2031', - 'Pfr;': '\U0001d513', - 'pfr;': '\U0001d52d', - 'Phi;': '\u03a6', - 'phi;': '\u03c6', - 'phiv;': '\u03d5', - 'phmmat;': '\u2133', - 'phone;': '\u260e', - 'Pi;': '\u03a0', - 'pi;': '\u03c0', - 'pitchfork;': '\u22d4', - 'piv;': '\u03d6', - 'planck;': '\u210f', - 'planckh;': '\u210e', - 'plankv;': '\u210f', - 'plus;': '+', - 'plusacir;': '\u2a23', - 'plusb;': '\u229e', - 'pluscir;': '\u2a22', - 'plusdo;': '\u2214', - 'plusdu;': '\u2a25', - 'pluse;': '\u2a72', - 'PlusMinus;': '\xb1', - 'plusmn': '\xb1', - 'plusmn;': '\xb1', - 'plussim;': '\u2a26', - 'plustwo;': '\u2a27', - 'pm;': '\xb1', - 'Poincareplane;': '\u210c', - 'pointint;': '\u2a15', - 'Popf;': '\u2119', - 'popf;': '\U0001d561', - 'pound': '\xa3', - 'pound;': '\xa3', - 'Pr;': '\u2abb', - 'pr;': '\u227a', - 'prap;': '\u2ab7', - 'prcue;': '\u227c', - 'prE;': '\u2ab3', - 'pre;': '\u2aaf', - 'prec;': '\u227a', - 'precapprox;': '\u2ab7', - 'preccurlyeq;': '\u227c', - 'Precedes;': '\u227a', - 'PrecedesEqual;': '\u2aaf', - 'PrecedesSlantEqual;': '\u227c', - 'PrecedesTilde;': '\u227e', - 'preceq;': '\u2aaf', - 'precnapprox;': '\u2ab9', - 'precneqq;': '\u2ab5', - 'precnsim;': '\u22e8', - 'precsim;': '\u227e', - 'Prime;': '\u2033', - 'prime;': '\u2032', - 'primes;': '\u2119', - 'prnap;': '\u2ab9', - 'prnE;': '\u2ab5', - 'prnsim;': '\u22e8', - 'prod;': '\u220f', - 'Product;': '\u220f', - 'profalar;': '\u232e', - 'profline;': '\u2312', - 'profsurf;': '\u2313', - 'prop;': '\u221d', - 'Proportion;': '\u2237', - 'Proportional;': '\u221d', - 'propto;': '\u221d', - 'prsim;': '\u227e', - 'prurel;': '\u22b0', - 'Pscr;': '\U0001d4ab', - 'pscr;': '\U0001d4c5', - 'Psi;': '\u03a8', - 'psi;': '\u03c8', - 'puncsp;': '\u2008', - 'Qfr;': '\U0001d514', - 'qfr;': '\U0001d52e', - 'qint;': '\u2a0c', - 'Qopf;': '\u211a', - 'qopf;': '\U0001d562', - 'qprime;': '\u2057', - 'Qscr;': '\U0001d4ac', - 'qscr;': '\U0001d4c6', - 'quaternions;': '\u210d', - 'quatint;': '\u2a16', - 'quest;': '?', - 'questeq;': '\u225f', - 'QUOT': '"', - 'quot': '"', - 'QUOT;': '"', - 'quot;': '"', - 'rAarr;': '\u21db', - 'race;': '\u223d\u0331', - 'Racute;': '\u0154', - 'racute;': '\u0155', - 'radic;': '\u221a', - 'raemptyv;': '\u29b3', - 'Rang;': '\u27eb', - 'rang;': '\u27e9', - 'rangd;': '\u2992', - 'range;': '\u29a5', - 'rangle;': '\u27e9', - 'raquo': '\xbb', - 'raquo;': '\xbb', - 'Rarr;': '\u21a0', - 'rArr;': '\u21d2', - 'rarr;': '\u2192', - 'rarrap;': '\u2975', - 'rarrb;': '\u21e5', - 'rarrbfs;': '\u2920', - 'rarrc;': '\u2933', - 'rarrfs;': '\u291e', - 'rarrhk;': '\u21aa', - 'rarrlp;': '\u21ac', - 'rarrpl;': '\u2945', - 'rarrsim;': '\u2974', - 'Rarrtl;': '\u2916', - 'rarrtl;': '\u21a3', - 'rarrw;': '\u219d', - 'rAtail;': '\u291c', - 'ratail;': '\u291a', - 'ratio;': '\u2236', - 'rationals;': '\u211a', - 'RBarr;': '\u2910', - 'rBarr;': '\u290f', - 'rbarr;': '\u290d', - 'rbbrk;': '\u2773', - 'rbrace;': '}', - 'rbrack;': ']', - 'rbrke;': '\u298c', - 'rbrksld;': '\u298e', - 'rbrkslu;': '\u2990', - 'Rcaron;': '\u0158', - 'rcaron;': '\u0159', - 'Rcedil;': '\u0156', - 'rcedil;': '\u0157', - 'rceil;': '\u2309', - 'rcub;': '}', - 'Rcy;': '\u0420', - 'rcy;': '\u0440', - 'rdca;': '\u2937', - 'rdldhar;': '\u2969', - 'rdquo;': '\u201d', - 'rdquor;': '\u201d', - 'rdsh;': '\u21b3', - 'Re;': '\u211c', - 'real;': '\u211c', - 'realine;': '\u211b', - 'realpart;': '\u211c', - 'reals;': '\u211d', - 'rect;': '\u25ad', - 'REG': '\xae', - 'reg': '\xae', - 'REG;': '\xae', - 'reg;': '\xae', - 'ReverseElement;': '\u220b', - 'ReverseEquilibrium;': '\u21cb', - 'ReverseUpEquilibrium;': '\u296f', - 'rfisht;': '\u297d', - 'rfloor;': '\u230b', - 'Rfr;': '\u211c', - 'rfr;': '\U0001d52f', - 'rHar;': '\u2964', - 'rhard;': '\u21c1', - 'rharu;': '\u21c0', - 'rharul;': '\u296c', - 'Rho;': '\u03a1', - 'rho;': '\u03c1', - 'rhov;': '\u03f1', - 'RightAngleBracket;': '\u27e9', - 'RightArrow;': '\u2192', - 'Rightarrow;': '\u21d2', - 'rightarrow;': '\u2192', - 'RightArrowBar;': '\u21e5', - 'RightArrowLeftArrow;': '\u21c4', - 'rightarrowtail;': '\u21a3', - 'RightCeiling;': '\u2309', - 'RightDoubleBracket;': '\u27e7', - 'RightDownTeeVector;': '\u295d', - 'RightDownVector;': '\u21c2', - 'RightDownVectorBar;': '\u2955', - 'RightFloor;': '\u230b', - 'rightharpoondown;': '\u21c1', - 'rightharpoonup;': '\u21c0', - 'rightleftarrows;': '\u21c4', - 'rightleftharpoons;': '\u21cc', - 'rightrightarrows;': '\u21c9', - 'rightsquigarrow;': '\u219d', - 'RightTee;': '\u22a2', - 'RightTeeArrow;': '\u21a6', - 'RightTeeVector;': '\u295b', - 'rightthreetimes;': '\u22cc', - 'RightTriangle;': '\u22b3', - 'RightTriangleBar;': '\u29d0', - 'RightTriangleEqual;': '\u22b5', - 'RightUpDownVector;': '\u294f', - 'RightUpTeeVector;': '\u295c', - 'RightUpVector;': '\u21be', - 'RightUpVectorBar;': '\u2954', - 'RightVector;': '\u21c0', - 'RightVectorBar;': '\u2953', - 'ring;': '\u02da', - 'risingdotseq;': '\u2253', - 'rlarr;': '\u21c4', - 'rlhar;': '\u21cc', - 'rlm;': '\u200f', - 'rmoust;': '\u23b1', - 'rmoustache;': '\u23b1', - 'rnmid;': '\u2aee', - 'roang;': '\u27ed', - 'roarr;': '\u21fe', - 'robrk;': '\u27e7', - 'ropar;': '\u2986', - 'Ropf;': '\u211d', - 'ropf;': '\U0001d563', - 'roplus;': '\u2a2e', - 'rotimes;': '\u2a35', - 'RoundImplies;': '\u2970', - 'rpar;': ')', - 'rpargt;': '\u2994', - 'rppolint;': '\u2a12', - 'rrarr;': '\u21c9', - 'Rrightarrow;': '\u21db', - 'rsaquo;': '\u203a', - 'Rscr;': '\u211b', - 'rscr;': '\U0001d4c7', - 'Rsh;': '\u21b1', - 'rsh;': '\u21b1', - 'rsqb;': ']', - 'rsquo;': '\u2019', - 'rsquor;': '\u2019', - 'rthree;': '\u22cc', - 'rtimes;': '\u22ca', - 'rtri;': '\u25b9', - 'rtrie;': '\u22b5', - 'rtrif;': '\u25b8', - 'rtriltri;': '\u29ce', - 'RuleDelayed;': '\u29f4', - 'ruluhar;': '\u2968', - 'rx;': '\u211e', - 'Sacute;': '\u015a', - 'sacute;': '\u015b', - 'sbquo;': '\u201a', - 'Sc;': '\u2abc', - 'sc;': '\u227b', - 'scap;': '\u2ab8', - 'Scaron;': '\u0160', - 'scaron;': '\u0161', - 'sccue;': '\u227d', - 'scE;': '\u2ab4', - 'sce;': '\u2ab0', - 'Scedil;': '\u015e', - 'scedil;': '\u015f', - 'Scirc;': '\u015c', - 'scirc;': '\u015d', - 'scnap;': '\u2aba', - 'scnE;': '\u2ab6', - 'scnsim;': '\u22e9', - 'scpolint;': '\u2a13', - 'scsim;': '\u227f', - 'Scy;': '\u0421', - 'scy;': '\u0441', - 'sdot;': '\u22c5', - 'sdotb;': '\u22a1', - 'sdote;': '\u2a66', - 'searhk;': '\u2925', - 'seArr;': '\u21d8', - 'searr;': '\u2198', - 'searrow;': '\u2198', - 'sect': '\xa7', - 'sect;': '\xa7', - 'semi;': ';', - 'seswar;': '\u2929', - 'setminus;': '\u2216', - 'setmn;': '\u2216', - 'sext;': '\u2736', - 'Sfr;': '\U0001d516', - 'sfr;': '\U0001d530', - 'sfrown;': '\u2322', - 'sharp;': '\u266f', - 'SHCHcy;': '\u0429', - 'shchcy;': '\u0449', - 'SHcy;': '\u0428', - 'shcy;': '\u0448', - 'ShortDownArrow;': '\u2193', - 'ShortLeftArrow;': '\u2190', - 'shortmid;': '\u2223', - 'shortparallel;': '\u2225', - 'ShortRightArrow;': '\u2192', - 'ShortUpArrow;': '\u2191', - 'shy': '\xad', - 'shy;': '\xad', - 'Sigma;': '\u03a3', - 'sigma;': '\u03c3', - 'sigmaf;': '\u03c2', - 'sigmav;': '\u03c2', - 'sim;': '\u223c', - 'simdot;': '\u2a6a', - 'sime;': '\u2243', - 'simeq;': '\u2243', - 'simg;': '\u2a9e', - 'simgE;': '\u2aa0', - 'siml;': '\u2a9d', - 'simlE;': '\u2a9f', - 'simne;': '\u2246', - 'simplus;': '\u2a24', - 'simrarr;': '\u2972', - 'slarr;': '\u2190', - 'SmallCircle;': '\u2218', - 'smallsetminus;': '\u2216', - 'smashp;': '\u2a33', - 'smeparsl;': '\u29e4', - 'smid;': '\u2223', - 'smile;': '\u2323', - 'smt;': '\u2aaa', - 'smte;': '\u2aac', - 'smtes;': '\u2aac\ufe00', - 'SOFTcy;': '\u042c', - 'softcy;': '\u044c', - 'sol;': '/', - 'solb;': '\u29c4', - 'solbar;': '\u233f', - 'Sopf;': '\U0001d54a', - 'sopf;': '\U0001d564', - 'spades;': '\u2660', - 'spadesuit;': '\u2660', - 'spar;': '\u2225', - 'sqcap;': '\u2293', - 'sqcaps;': '\u2293\ufe00', - 'sqcup;': '\u2294', - 'sqcups;': '\u2294\ufe00', - 'Sqrt;': '\u221a', - 'sqsub;': '\u228f', - 'sqsube;': '\u2291', - 'sqsubset;': '\u228f', - 'sqsubseteq;': '\u2291', - 'sqsup;': '\u2290', - 'sqsupe;': '\u2292', - 'sqsupset;': '\u2290', - 'sqsupseteq;': '\u2292', - 'squ;': '\u25a1', - 'Square;': '\u25a1', - 'square;': '\u25a1', - 'SquareIntersection;': '\u2293', - 'SquareSubset;': '\u228f', - 'SquareSubsetEqual;': '\u2291', - 'SquareSuperset;': '\u2290', - 'SquareSupersetEqual;': '\u2292', - 'SquareUnion;': '\u2294', - 'squarf;': '\u25aa', - 'squf;': '\u25aa', - 'srarr;': '\u2192', - 'Sscr;': '\U0001d4ae', - 'sscr;': '\U0001d4c8', - 'ssetmn;': '\u2216', - 'ssmile;': '\u2323', - 'sstarf;': '\u22c6', - 'Star;': '\u22c6', - 'star;': '\u2606', - 'starf;': '\u2605', - 'straightepsilon;': '\u03f5', - 'straightphi;': '\u03d5', - 'strns;': '\xaf', - 'Sub;': '\u22d0', - 'sub;': '\u2282', - 'subdot;': '\u2abd', - 'subE;': '\u2ac5', - 'sube;': '\u2286', - 'subedot;': '\u2ac3', - 'submult;': '\u2ac1', - 'subnE;': '\u2acb', - 'subne;': '\u228a', - 'subplus;': '\u2abf', - 'subrarr;': '\u2979', - 'Subset;': '\u22d0', - 'subset;': '\u2282', - 'subseteq;': '\u2286', - 'subseteqq;': '\u2ac5', - 'SubsetEqual;': '\u2286', - 'subsetneq;': '\u228a', - 'subsetneqq;': '\u2acb', - 'subsim;': '\u2ac7', - 'subsub;': '\u2ad5', - 'subsup;': '\u2ad3', - 'succ;': '\u227b', - 'succapprox;': '\u2ab8', - 'succcurlyeq;': '\u227d', - 'Succeeds;': '\u227b', - 'SucceedsEqual;': '\u2ab0', - 'SucceedsSlantEqual;': '\u227d', - 'SucceedsTilde;': '\u227f', - 'succeq;': '\u2ab0', - 'succnapprox;': '\u2aba', - 'succneqq;': '\u2ab6', - 'succnsim;': '\u22e9', - 'succsim;': '\u227f', - 'SuchThat;': '\u220b', - 'Sum;': '\u2211', - 'sum;': '\u2211', - 'sung;': '\u266a', - 'sup1': '\xb9', - 'sup1;': '\xb9', - 'sup2': '\xb2', - 'sup2;': '\xb2', - 'sup3': '\xb3', - 'sup3;': '\xb3', - 'Sup;': '\u22d1', - 'sup;': '\u2283', - 'supdot;': '\u2abe', - 'supdsub;': '\u2ad8', - 'supE;': '\u2ac6', - 'supe;': '\u2287', - 'supedot;': '\u2ac4', - 'Superset;': '\u2283', - 'SupersetEqual;': '\u2287', - 'suphsol;': '\u27c9', - 'suphsub;': '\u2ad7', - 'suplarr;': '\u297b', - 'supmult;': '\u2ac2', - 'supnE;': '\u2acc', - 'supne;': '\u228b', - 'supplus;': '\u2ac0', - 'Supset;': '\u22d1', - 'supset;': '\u2283', - 'supseteq;': '\u2287', - 'supseteqq;': '\u2ac6', - 'supsetneq;': '\u228b', - 'supsetneqq;': '\u2acc', - 'supsim;': '\u2ac8', - 'supsub;': '\u2ad4', - 'supsup;': '\u2ad6', - 'swarhk;': '\u2926', - 'swArr;': '\u21d9', - 'swarr;': '\u2199', - 'swarrow;': '\u2199', - 'swnwar;': '\u292a', - 'szlig': '\xdf', - 'szlig;': '\xdf', - 'Tab;': '\t', - 'target;': '\u2316', - 'Tau;': '\u03a4', - 'tau;': '\u03c4', - 'tbrk;': '\u23b4', - 'Tcaron;': '\u0164', - 'tcaron;': '\u0165', - 'Tcedil;': '\u0162', - 'tcedil;': '\u0163', - 'Tcy;': '\u0422', - 'tcy;': '\u0442', - 'tdot;': '\u20db', - 'telrec;': '\u2315', - 'Tfr;': '\U0001d517', - 'tfr;': '\U0001d531', - 'there4;': '\u2234', - 'Therefore;': '\u2234', - 'therefore;': '\u2234', - 'Theta;': '\u0398', - 'theta;': '\u03b8', - 'thetasym;': '\u03d1', - 'thetav;': '\u03d1', - 'thickapprox;': '\u2248', - 'thicksim;': '\u223c', - 'ThickSpace;': '\u205f\u200a', - 'thinsp;': '\u2009', - 'ThinSpace;': '\u2009', - 'thkap;': '\u2248', - 'thksim;': '\u223c', - 'THORN': '\xde', - 'thorn': '\xfe', - 'THORN;': '\xde', - 'thorn;': '\xfe', - 'Tilde;': '\u223c', - 'tilde;': '\u02dc', - 'TildeEqual;': '\u2243', - 'TildeFullEqual;': '\u2245', - 'TildeTilde;': '\u2248', - 'times': '\xd7', - 'times;': '\xd7', - 'timesb;': '\u22a0', - 'timesbar;': '\u2a31', - 'timesd;': '\u2a30', - 'tint;': '\u222d', - 'toea;': '\u2928', - 'top;': '\u22a4', - 'topbot;': '\u2336', - 'topcir;': '\u2af1', - 'Topf;': '\U0001d54b', - 'topf;': '\U0001d565', - 'topfork;': '\u2ada', - 'tosa;': '\u2929', - 'tprime;': '\u2034', - 'TRADE;': '\u2122', - 'trade;': '\u2122', - 'triangle;': '\u25b5', - 'triangledown;': '\u25bf', - 'triangleleft;': '\u25c3', - 'trianglelefteq;': '\u22b4', - 'triangleq;': '\u225c', - 'triangleright;': '\u25b9', - 'trianglerighteq;': '\u22b5', - 'tridot;': '\u25ec', - 'trie;': '\u225c', - 'triminus;': '\u2a3a', - 'TripleDot;': '\u20db', - 'triplus;': '\u2a39', - 'trisb;': '\u29cd', - 'tritime;': '\u2a3b', - 'trpezium;': '\u23e2', - 'Tscr;': '\U0001d4af', - 'tscr;': '\U0001d4c9', - 'TScy;': '\u0426', - 'tscy;': '\u0446', - 'TSHcy;': '\u040b', - 'tshcy;': '\u045b', - 'Tstrok;': '\u0166', - 'tstrok;': '\u0167', - 'twixt;': '\u226c', - 'twoheadleftarrow;': '\u219e', - 'twoheadrightarrow;': '\u21a0', - 'Uacute': '\xda', - 'uacute': '\xfa', - 'Uacute;': '\xda', - 'uacute;': '\xfa', - 'Uarr;': '\u219f', - 'uArr;': '\u21d1', - 'uarr;': '\u2191', - 'Uarrocir;': '\u2949', - 'Ubrcy;': '\u040e', - 'ubrcy;': '\u045e', - 'Ubreve;': '\u016c', - 'ubreve;': '\u016d', - 'Ucirc': '\xdb', - 'ucirc': '\xfb', - 'Ucirc;': '\xdb', - 'ucirc;': '\xfb', - 'Ucy;': '\u0423', - 'ucy;': '\u0443', - 'udarr;': '\u21c5', - 'Udblac;': '\u0170', - 'udblac;': '\u0171', - 'udhar;': '\u296e', - 'ufisht;': '\u297e', - 'Ufr;': '\U0001d518', - 'ufr;': '\U0001d532', - 'Ugrave': '\xd9', - 'ugrave': '\xf9', - 'Ugrave;': '\xd9', - 'ugrave;': '\xf9', - 'uHar;': '\u2963', - 'uharl;': '\u21bf', - 'uharr;': '\u21be', - 'uhblk;': '\u2580', - 'ulcorn;': '\u231c', - 'ulcorner;': '\u231c', - 'ulcrop;': '\u230f', - 'ultri;': '\u25f8', - 'Umacr;': '\u016a', - 'umacr;': '\u016b', - 'uml': '\xa8', - 'uml;': '\xa8', - 'UnderBar;': '_', - 'UnderBrace;': '\u23df', - 'UnderBracket;': '\u23b5', - 'UnderParenthesis;': '\u23dd', - 'Union;': '\u22c3', - 'UnionPlus;': '\u228e', - 'Uogon;': '\u0172', - 'uogon;': '\u0173', - 'Uopf;': '\U0001d54c', - 'uopf;': '\U0001d566', - 'UpArrow;': '\u2191', - 'Uparrow;': '\u21d1', - 'uparrow;': '\u2191', - 'UpArrowBar;': '\u2912', - 'UpArrowDownArrow;': '\u21c5', - 'UpDownArrow;': '\u2195', - 'Updownarrow;': '\u21d5', - 'updownarrow;': '\u2195', - 'UpEquilibrium;': '\u296e', - 'upharpoonleft;': '\u21bf', - 'upharpoonright;': '\u21be', - 'uplus;': '\u228e', - 'UpperLeftArrow;': '\u2196', - 'UpperRightArrow;': '\u2197', - 'Upsi;': '\u03d2', - 'upsi;': '\u03c5', - 'upsih;': '\u03d2', - 'Upsilon;': '\u03a5', - 'upsilon;': '\u03c5', - 'UpTee;': '\u22a5', - 'UpTeeArrow;': '\u21a5', - 'upuparrows;': '\u21c8', - 'urcorn;': '\u231d', - 'urcorner;': '\u231d', - 'urcrop;': '\u230e', - 'Uring;': '\u016e', - 'uring;': '\u016f', - 'urtri;': '\u25f9', - 'Uscr;': '\U0001d4b0', - 'uscr;': '\U0001d4ca', - 'utdot;': '\u22f0', - 'Utilde;': '\u0168', - 'utilde;': '\u0169', - 'utri;': '\u25b5', - 'utrif;': '\u25b4', - 'uuarr;': '\u21c8', - 'Uuml': '\xdc', - 'uuml': '\xfc', - 'Uuml;': '\xdc', - 'uuml;': '\xfc', - 'uwangle;': '\u29a7', - 'vangrt;': '\u299c', - 'varepsilon;': '\u03f5', - 'varkappa;': '\u03f0', - 'varnothing;': '\u2205', - 'varphi;': '\u03d5', - 'varpi;': '\u03d6', - 'varpropto;': '\u221d', - 'vArr;': '\u21d5', - 'varr;': '\u2195', - 'varrho;': '\u03f1', - 'varsigma;': '\u03c2', - 'varsubsetneq;': '\u228a\ufe00', - 'varsubsetneqq;': '\u2acb\ufe00', - 'varsupsetneq;': '\u228b\ufe00', - 'varsupsetneqq;': '\u2acc\ufe00', - 'vartheta;': '\u03d1', - 'vartriangleleft;': '\u22b2', - 'vartriangleright;': '\u22b3', - 'Vbar;': '\u2aeb', - 'vBar;': '\u2ae8', - 'vBarv;': '\u2ae9', - 'Vcy;': '\u0412', - 'vcy;': '\u0432', - 'VDash;': '\u22ab', - 'Vdash;': '\u22a9', - 'vDash;': '\u22a8', - 'vdash;': '\u22a2', - 'Vdashl;': '\u2ae6', - 'Vee;': '\u22c1', - 'vee;': '\u2228', - 'veebar;': '\u22bb', - 'veeeq;': '\u225a', - 'vellip;': '\u22ee', - 'Verbar;': '\u2016', - 'verbar;': '|', - 'Vert;': '\u2016', - 'vert;': '|', - 'VerticalBar;': '\u2223', - 'VerticalLine;': '|', - 'VerticalSeparator;': '\u2758', - 'VerticalTilde;': '\u2240', - 'VeryThinSpace;': '\u200a', - 'Vfr;': '\U0001d519', - 'vfr;': '\U0001d533', - 'vltri;': '\u22b2', - 'vnsub;': '\u2282\u20d2', - 'vnsup;': '\u2283\u20d2', - 'Vopf;': '\U0001d54d', - 'vopf;': '\U0001d567', - 'vprop;': '\u221d', - 'vrtri;': '\u22b3', - 'Vscr;': '\U0001d4b1', - 'vscr;': '\U0001d4cb', - 'vsubnE;': '\u2acb\ufe00', - 'vsubne;': '\u228a\ufe00', - 'vsupnE;': '\u2acc\ufe00', - 'vsupne;': '\u228b\ufe00', - 'Vvdash;': '\u22aa', - 'vzigzag;': '\u299a', - 'Wcirc;': '\u0174', - 'wcirc;': '\u0175', - 'wedbar;': '\u2a5f', - 'Wedge;': '\u22c0', - 'wedge;': '\u2227', - 'wedgeq;': '\u2259', - 'weierp;': '\u2118', - 'Wfr;': '\U0001d51a', - 'wfr;': '\U0001d534', - 'Wopf;': '\U0001d54e', - 'wopf;': '\U0001d568', - 'wp;': '\u2118', - 'wr;': '\u2240', - 'wreath;': '\u2240', - 'Wscr;': '\U0001d4b2', - 'wscr;': '\U0001d4cc', - 'xcap;': '\u22c2', - 'xcirc;': '\u25ef', - 'xcup;': '\u22c3', - 'xdtri;': '\u25bd', - 'Xfr;': '\U0001d51b', - 'xfr;': '\U0001d535', - 'xhArr;': '\u27fa', - 'xharr;': '\u27f7', - 'Xi;': '\u039e', - 'xi;': '\u03be', - 'xlArr;': '\u27f8', - 'xlarr;': '\u27f5', - 'xmap;': '\u27fc', - 'xnis;': '\u22fb', - 'xodot;': '\u2a00', - 'Xopf;': '\U0001d54f', - 'xopf;': '\U0001d569', - 'xoplus;': '\u2a01', - 'xotime;': '\u2a02', - 'xrArr;': '\u27f9', - 'xrarr;': '\u27f6', - 'Xscr;': '\U0001d4b3', - 'xscr;': '\U0001d4cd', - 'xsqcup;': '\u2a06', - 'xuplus;': '\u2a04', - 'xutri;': '\u25b3', - 'xvee;': '\u22c1', - 'xwedge;': '\u22c0', - 'Yacute': '\xdd', - 'yacute': '\xfd', - 'Yacute;': '\xdd', - 'yacute;': '\xfd', - 'YAcy;': '\u042f', - 'yacy;': '\u044f', - 'Ycirc;': '\u0176', - 'ycirc;': '\u0177', - 'Ycy;': '\u042b', - 'ycy;': '\u044b', - 'yen': '\xa5', - 'yen;': '\xa5', - 'Yfr;': '\U0001d51c', - 'yfr;': '\U0001d536', - 'YIcy;': '\u0407', - 'yicy;': '\u0457', - 'Yopf;': '\U0001d550', - 'yopf;': '\U0001d56a', - 'Yscr;': '\U0001d4b4', - 'yscr;': '\U0001d4ce', - 'YUcy;': '\u042e', - 'yucy;': '\u044e', - 'yuml': '\xff', - 'Yuml;': '\u0178', - 'yuml;': '\xff', - 'Zacute;': '\u0179', - 'zacute;': '\u017a', - 'Zcaron;': '\u017d', - 'zcaron;': '\u017e', - 'Zcy;': '\u0417', - 'zcy;': '\u0437', - 'Zdot;': '\u017b', - 'zdot;': '\u017c', - 'zeetrf;': '\u2128', - 'ZeroWidthSpace;': '\u200b', - 'Zeta;': '\u0396', - 'zeta;': '\u03b6', - 'Zfr;': '\u2128', - 'zfr;': '\U0001d537', - 'ZHcy;': '\u0416', - 'zhcy;': '\u0436', - 'zigrarr;': '\u21dd', - 'Zopf;': '\u2124', - 'zopf;': '\U0001d56b', - 'Zscr;': '\U0001d4b5', - 'zscr;': '\U0001d4cf', - 'zwj;': '\u200d', - 'zwnj;': '\u200c', - } - -try: - import http.client as compat_http_client -except ImportError: # Python 2 - import httplib as compat_http_client -try: - compat_http_client.HTTPResponse.getcode -except AttributeError: - # Py < 3.1 - compat_http_client.HTTPResponse.getcode = lambda self: self.status - -try: - from urllib.error import HTTPError as compat_HTTPError -except ImportError: # Python 2 - from urllib2 import HTTPError as compat_HTTPError -compat_urllib_HTTPError = compat_HTTPError - -try: - from urllib.request import urlretrieve as compat_urlretrieve -except ImportError: # Python 2 - from urllib import urlretrieve as compat_urlretrieve -compat_urllib_request_urlretrieve = compat_urlretrieve - -try: - from html.parser import HTMLParser as compat_HTMLParser -except ImportError: # Python 2 - from HTMLParser import HTMLParser as compat_HTMLParser -compat_html_parser_HTMLParser = compat_HTMLParser - -try: # Python 2 - from HTMLParser import HTMLParseError as compat_HTMLParseError -except ImportError: # Python <3.4 - try: - from html.parser import HTMLParseError as compat_HTMLParseError - except ImportError: # Python >3.4 - - # HTMLParseError has been deprecated in Python 3.3 and removed in - # Python 3.5. Introducing dummy exception for Python >3.5 for compatible - # and uniform cross-version exception handling - class compat_HTMLParseError(Exception): - pass -compat_html_parser_HTMLParseError = compat_HTMLParseError - -try: - from subprocess import DEVNULL - compat_subprocess_get_DEVNULL = lambda: DEVNULL -except ImportError: - compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') - -try: - import http.server as compat_http_server -except ImportError: - import BaseHTTPServer as compat_http_server - -try: - from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes - from urllib.parse import unquote as compat_urllib_parse_unquote - from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus - from urllib.parse import urlencode as compat_urllib_parse_urlencode - from urllib.parse import parse_qs as compat_parse_qs -except ImportError: # Python 2 - _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') - else re.compile(r'([\x00-\x7f]+)')) - - # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus - # implementations from cpython 3.4.3's stdlib. Python 2's version - # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244) - - def compat_urllib_parse_unquote_to_bytes(string): - """unquote_to_bytes('abc%20def') -> b'abc def'.""" - # Note: strings are encoded as UTF-8. This is only an issue if it contains - # unescaped non-ASCII characters, which URIs should not. - if not string: - # Is it a string-like object? - string.split - return b'' - if isinstance(string, compat_str): - string = string.encode('utf-8') - bits = string.split(b'%') - if len(bits) == 1: - return string - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(compat_urllib_parse._hextochr[item[:2]]) - append(item[2:]) - except KeyError: - append(b'%') - append(item) - return b''.join(res) - - def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): - """Replace %xx escapes by their single-character equivalent. The optional - encoding and errors parameters specify how to decode percent-encoded - sequences into Unicode characters, as accepted by the bytes.decode() - method. - By default, percent-encoded sequences are decoded with UTF-8, and invalid - sequences are replaced by a placeholder character. - - unquote('abc%20def') -> 'abc def'. - """ - if '%' not in string: - string.split - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'replace' - bits = _asciire.split(string) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors)) - append(bits[i + 1]) - return ''.join(res) - - def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'): - """Like unquote(), but also replace plus signs by spaces, as required for - unquoting HTML form values. - - unquote_plus('%7e/abc+def') -> '~/abc def' - """ - string = string.replace('+', ' ') - return compat_urllib_parse_unquote(string, encoding, errors) - - # Python 2 will choke in urlencode on mixture of byte and unicode strings. - # Possible solutions are to either port it from python 3 with all - # the friends or manually ensure input query contains only byte strings. - # We will stick with latter thus recursively encoding the whole query. - def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): - def encode_elem(e): - if isinstance(e, dict): - e = encode_dict(e) - elif isinstance(e, (list, tuple,)): - list_e = encode_list(e) - e = tuple(list_e) if isinstance(e, tuple) else list_e - elif isinstance(e, compat_str): - e = e.encode(encoding) - return e - - def encode_dict(d): - return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) - - def encode_list(l): - return [encode_elem(e) for e in l] - - return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq) - - # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. - # Python 2's version is apparently totally broken - def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - qs, _coerce_result = qs, compat_str - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError('bad query field: %r' % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = nv[0].replace('+', ' ') - name = compat_urllib_parse_unquote( - name, encoding=encoding, errors=errors) - name = _coerce_result(name) - value = nv[1].replace('+', ' ') - value = compat_urllib_parse_unquote( - value, encoding=encoding, errors=errors) - value = _coerce_result(value) - r.append((name, value)) - return r - - def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - parsed_result = {} - pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) - for name, value in pairs: - if name in parsed_result: - parsed_result[name].append(value) - else: - parsed_result[name] = [value] - return parsed_result - - setattr(compat_urllib_parse, '_urlencode', - getattr(compat_urllib_parse, 'urlencode')) - for name, fix in ( - ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes), - ('parse_unquote', compat_urllib_parse_unquote), - ('unquote_plus', compat_urllib_parse_unquote_plus), - ('urlencode', compat_urllib_parse_urlencode), - ('parse_qs', compat_parse_qs)): - setattr(compat_urllib_parse, name, fix) - -compat_urllib_parse_parse_qs = compat_parse_qs - -try: - from urllib.request import DataHandler as compat_urllib_request_DataHandler -except ImportError: # Python < 3.4 - # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py - class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler): - def data_open(self, req): - # data URLs as specified in RFC 2397. - # - # ignores POSTed data - # - # syntax: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - url = req.get_full_url() - - scheme, data = url.split(':', 1) - mediatype, data = data.split(',', 1) - - # even base64 encoded data URLs might be quoted so unquote in any case: - data = compat_urllib_parse_unquote_to_bytes(data) - if mediatype.endswith(';base64'): - data = binascii.a2b_base64(data) - mediatype = mediatype[:-7] - - if not mediatype: - mediatype = 'text/plain;charset=US-ASCII' - - headers = email.message_from_string( - 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data))) - - return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) - -try: - from xml.etree.ElementTree import ParseError as compat_xml_parse_error -except ImportError: # Python 2.6 - from xml.parsers.expat import ExpatError as compat_xml_parse_error -compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error - -etree = xml.etree.ElementTree - - -class _TreeBuilder(etree.TreeBuilder): - def doctype(self, name, pubid, system): - pass - - -try: - # xml.etree.ElementTree.Element is a method in Python <=2.6 and - # the following will crash with: - # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types - isinstance(None, etree.Element) - from xml.etree.ElementTree import Element as compat_etree_Element -except TypeError: # Python <=2.6 - from xml.etree.ElementTree import _ElementInterface as compat_etree_Element -compat_xml_etree_ElementTree_Element = compat_etree_Element - -if sys.version_info[0] >= 3: - def compat_etree_fromstring(text): - return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) -else: - # python 2.x tries to encode unicode strings with ascii (see the - # XMLParser._fixtext method) - try: - _etree_iter = etree.Element.iter - except AttributeError: # Python <=2.6 - def _etree_iter(root): - for el in root.findall('*'): - yield el - for sub in _etree_iter(el): - yield sub - - # on 2.6 XML doesn't have a parser argument, function copied from CPython - # 2.7 source - def _XML(text, parser=None): - if not parser: - parser = etree.XMLParser(target=_TreeBuilder()) - parser.feed(text) - return parser.close() - - def _element_factory(*args, **kwargs): - el = etree.Element(*args, **kwargs) - for k, v in el.items(): - if isinstance(v, bytes): - el.set(k, v.decode('utf-8')) - return el - - def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) - for el in _etree_iter(doc): - if el.text is not None and isinstance(el.text, bytes): - el.text = el.text.decode('utf-8') - return doc - -if hasattr(etree, 'register_namespace'): - compat_etree_register_namespace = etree.register_namespace -else: - def compat_etree_register_namespace(prefix, uri): - """Register a namespace prefix. - The registry is global, and any existing mapping for either the - given prefix or the namespace URI will be removed. - *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and - attributes in this namespace will be serialized with prefix if possible. - ValueError is raised if prefix is reserved or is invalid. - """ - if re.match(r"ns\d+$", prefix): - raise ValueError("Prefix format reserved for internal use") - for k, v in list(etree._namespace_map.items()): - if k == uri or v == prefix: - del etree._namespace_map[k] - etree._namespace_map[uri] = prefix -compat_xml_etree_register_namespace = compat_etree_register_namespace - -if sys.version_info < (2, 7): - # Here comes the crazy part: In 2.6, if the xpath is a unicode, - # .//node does not match if a node is a direct child of . ! - def compat_xpath(xpath): - if isinstance(xpath, compat_str): - xpath = xpath.encode('ascii') - return xpath -else: - compat_xpath = lambda xpath: xpath - - -compat_os_name = os._name if os.name == 'java' else os.name - - -if compat_os_name == 'nt': - def compat_shlex_quote(s): - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') -else: - try: - from shlex import quote as compat_shlex_quote - except ImportError: # Python < 3.3 - def compat_shlex_quote(s): - if re.match(r'^[-_\w./]+$', s): - return s - else: - return "'" + s.replace("'", "'\"'\"'") + "'" - - -try: - args = shlex.split('中文') - assert (isinstance(args, list) - and isinstance(args[0], compat_str) - and args[0] == '中文') - compat_shlex_split = shlex.split -except (AssertionError, UnicodeEncodeError): - # Working around shlex issue with unicode strings on some python 2 - # versions (see http://bugs.python.org/issue1548891) - def compat_shlex_split(s, comments=False, posix=True): - if isinstance(s, compat_str): - s = s.encode('utf-8') - return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) - - -def compat_ord(c): - if type(c) is int: - return c - else: - return ord(c) - - -if sys.version_info >= (3, 0): - compat_getenv = os.getenv - compat_expanduser = os.path.expanduser - - def compat_setenv(key, value, env=os.environ): - env[key] = value -else: - # Environment variables should be decoded with filesystem encoding. - # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) - - def compat_getenv(key, default=None): - from .utils import get_filesystem_encoding - env = os.getenv(key, default) - if env: - env = env.decode(get_filesystem_encoding()) - return env - - def compat_setenv(key, value, env=os.environ): - def encode(v): - from .utils import get_filesystem_encoding - return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v - env[encode(key)] = encode(value) - - # HACK: The default implementations of os.path.expanduser from cpython do not decode - # environment variables with filesystem encoding. We will work around this by - # providing adjusted implementations. - # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib - # for different platforms with correct environment variables decoding. - - if compat_os_name == 'posix': - def compat_expanduser(path): - """Expand ~ and ~user constructions. If user or $HOME is unknown, - do nothing.""" - if not path.startswith('~'): - return path - i = path.find('/', 1) - if i < 0: - i = len(path) - if i == 1: - if 'HOME' not in os.environ: - import pwd - userhome = pwd.getpwuid(os.getuid()).pw_dir - else: - userhome = compat_getenv('HOME') - else: - import pwd - try: - pwent = pwd.getpwnam(path[1:i]) - except KeyError: - return path - userhome = pwent.pw_dir - userhome = userhome.rstrip('/') - return (userhome + path[i:]) or '/' - elif compat_os_name in ('nt', 'ce'): - def compat_expanduser(path): - """Expand ~ and ~user constructs. - - If user or $HOME is unknown, do nothing.""" - if path[:1] != '~': - return path - i, n = 1, len(path) - while i < n and path[i] not in '/\\': - i = i + 1 - - if 'HOME' in os.environ: - userhome = compat_getenv('HOME') - elif 'USERPROFILE' in os.environ: - userhome = compat_getenv('USERPROFILE') - elif 'HOMEPATH' not in os.environ: - return path - else: - try: - drive = compat_getenv('HOMEDRIVE') - except KeyError: - drive = '' - userhome = os.path.join(drive, compat_getenv('HOMEPATH')) - - if i != 1: # ~user - userhome = os.path.join(os.path.dirname(userhome), path[1:i]) - - return userhome + path[i:] - else: - compat_expanduser = os.path.expanduser - -compat_os_path_expanduser = compat_expanduser - - -if compat_os_name == 'nt' and sys.version_info < (3, 8): - # os.path.realpath on Windows does not follow symbolic links - # prior to Python 3.8 (see https://bugs.python.org/issue9949) - def compat_realpath(path): - while os.path.islink(path): - path = os.path.abspath(os.readlink(path)) - return path -else: - compat_realpath = os.path.realpath - -compat_os_path_realpath = compat_realpath - - -if sys.version_info < (3, 0): - def compat_print(s): - from .utils import preferredencoding - print(s.encode(preferredencoding(), 'xmlcharrefreplace')) -else: - def compat_print(s): - assert isinstance(s, compat_str) - print(s) - - -if sys.version_info < (3, 0) and sys.platform == 'win32': - def compat_getpass(prompt, *args, **kwargs): - if isinstance(prompt, compat_str): - from .utils import preferredencoding - prompt = prompt.encode(preferredencoding()) - return getpass.getpass(prompt, *args, **kwargs) -else: - compat_getpass = getpass.getpass - -compat_getpass_getpass = compat_getpass - - -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - - -# Python < 2.6.5 require kwargs to be bytes -try: - def _testfunc(x): - pass - _testfunc(**{'x': 0}) -except TypeError: - def compat_kwargs(kwargs): - return dict((bytes(k), v) for k, v in kwargs.items()) -else: - compat_kwargs = lambda kwargs: kwargs - - -try: - compat_numeric_types = (int, float, long, complex) -except NameError: # Python 3 - compat_numeric_types = (int, float, complex) - - -try: - compat_integer_types = (int, long) -except NameError: # Python 3 - compat_integer_types = (int, ) - - -if sys.version_info < (2, 7): - def compat_socket_create_connection(address, timeout, source_address=None): - host, port = address - err = None - for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket.socket(af, socktype, proto) - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - except socket.error as _: - err = _ - if sock is not None: - sock.close() - if err is not None: - raise err - else: - raise socket.error('getaddrinfo returns an empty list') -else: - compat_socket_create_connection = socket.create_connection - - -# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 -# See http://bugs.python.org/issue9161 for what is broken -def workaround_optparse_bug9161(): - op = optparse.OptionParser() - og = optparse.OptionGroup(op, 'foo') - try: - og.add_option('-t') - except TypeError: - real_add_option = optparse.OptionGroup.add_option - - def _compat_add_option(self, *args, **kwargs): - enc = lambda v: ( - v.encode('ascii', 'replace') if isinstance(v, compat_str) - else v) - bargs = [enc(a) for a in args] - bkwargs = dict( - (k, enc(v)) for k, v in kwargs.items()) - return real_add_option(self, *bargs, **bkwargs) - optparse.OptionGroup.add_option = _compat_add_option - - -if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 - compat_get_terminal_size = shutil.get_terminal_size -else: - _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) - - def compat_get_terminal_size(fallback=(80, 24)): - from .utils import process_communicate_or_kill - columns = compat_getenv('COLUMNS') - if columns: - columns = int(columns) - else: - columns = None - lines = compat_getenv('LINES') - if lines: - lines = int(lines) - else: - lines = None - - if columns is None or lines is None or columns <= 0 or lines <= 0: - try: - sp = subprocess.Popen( - ['stty', 'size'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = process_communicate_or_kill(sp) - _lines, _columns = map(int, out.split()) - except Exception: - _columns, _lines = _terminal_size(*fallback) - - if columns is None or columns <= 0: - columns = _columns - if lines is None or lines <= 0: - lines = _lines - return _terminal_size(columns, lines) - - -try: - itertools.count(start=0, step=1) - compat_itertools_count = itertools.count -except TypeError: # Python 2.6 - def compat_itertools_count(start=0, step=1): - while True: - yield start - start += step - - -if sys.version_info >= (3, 0): - from tokenize import tokenize as compat_tokenize_tokenize -else: - from tokenize import generate_tokens as compat_tokenize_tokenize - - -try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def compat_struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) - - def compat_struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) - - class compat_Struct(struct.Struct): - def __init__(self, fmt): - if isinstance(fmt, compat_str): - fmt = fmt.encode('ascii') - super(compat_Struct, self).__init__(fmt) -else: - compat_struct_pack = struct.pack - compat_struct_unpack = struct.unpack - if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8): - class compat_Struct(struct.Struct): - def unpack(self, string): - if not isinstance(string, buffer): # noqa: F821 - string = buffer(string) # noqa: F821 - return super(compat_Struct, self).unpack(string) - else: - compat_Struct = struct.Struct - - -# compat_map/filter() returning an iterator, supposedly the -# same versioning as for zip below -try: - from future_builtins import map as compat_map -except ImportError: - try: - from itertools import imap as compat_map - except ImportError: - compat_map = map - -try: - from future_builtins import filter as compat_filter -except ImportError: - try: - from itertools import ifilter as compat_filter - except ImportError: - compat_filter = filter - -try: - from future_builtins import zip as compat_zip -except ImportError: # not 2.6+ or is 3.x - try: - from itertools import izip as compat_zip # < 2.5 or 3.x - except ImportError: - compat_zip = zip - - -# method renamed between Py2/3 -try: - from itertools import zip_longest as compat_itertools_zip_longest -except ImportError: - from itertools import izip_longest as compat_itertools_zip_longest - - -# new class in collections -try: - from collections import ChainMap as compat_collections_chain_map - # Py3.3's ChainMap is deficient - if sys.version_info < (3, 4): - raise ImportError -except ImportError: - # Py <= 3.3 - class compat_collections_chain_map(compat_collections_abc.MutableMapping): - - maps = [{}] - - def __init__(self, *maps): - self.maps = list(maps) or [{}] - - def __getitem__(self, k): - for m in self.maps: - if k in m: - return m[k] - raise KeyError(k) - - def __setitem__(self, k, v): - self.maps[0].__setitem__(k, v) - return - - def __contains__(self, k): - return any((k in m) for m in self.maps) - - def __delitem(self, k): - if k in self.maps[0]: - del self.maps[0][k] - return - raise KeyError(k) - - def __delitem__(self, k): - self.__delitem(k) - - def __iter__(self): - return itertools.chain(*reversed(self.maps)) - - def __len__(self): - return len(iter(self)) - - # to match Py3, don't del directly - def pop(self, k, *args): - if self.__contains__(k): - off = self.__getitem__(k) - self.__delitem(k) - return off - elif len(args) > 0: - return args[0] - raise KeyError(k) - - def new_child(self, m=None, **kwargs): - m = m or {} - m.update(kwargs) - return compat_collections_chain_map(m, *self.maps) - - @property - def parents(self): - return compat_collections_chain_map(*(self.maps[1:])) - - -# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?) -compat_re_Pattern = type(re.compile('')) -# and on the type of a match -compat_re_Match = type(re.match('a', 'a')) - - -if sys.version_info < (3, 3): - def compat_b64decode(s, *args, **kwargs): - if isinstance(s, compat_str): - s = s.encode('ascii') - return base64.b64decode(s, *args, **kwargs) -else: - compat_b64decode = base64.b64decode - -compat_base64_b64decode = compat_b64decode - - -if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): - # PyPy2 prior to version 5.4.0 expects byte strings as Windows function - # names, see the original PyPy issue [1] and the youtube-dl one [2]. - # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name - # 2. https://github.com/ytdl-org/youtube-dl/pull/4392 - def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - real = ctypes.WINFUNCTYPE(*args, **kwargs) - - def resf(tpl, *args, **kwargs): - funcname, dll = tpl - return real((str(funcname), dll), *args, **kwargs) - - return resf -else: - def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - return ctypes.WINFUNCTYPE(*args, **kwargs) - - -if sys.version_info < (3, 0): - # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None - def compat_open(file_, *args, **kwargs): - if len(args) > 6 or 'opener' in kwargs: - raise ValueError('open: unsupported argument "opener"') - return io.open(file_, *args, **kwargs) -else: - compat_open = open - - -# compat_register_utf8 -def compat_register_utf8(): - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - from codecs import register, lookup - register( - lambda name: lookup('utf-8') if name == 'cp65001' else None) - - -# compat_datetime_timedelta_total_seconds -try: - compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds -except AttributeError: - # Py 2.6 - def compat_datetime_timedelta_total_seconds(td): - return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 - -# optional decompression packages -# PyPi brotli package implements 'br' Content-Encoding -try: - import brotli as compat_brotli -except ImportError: - compat_brotli = None -# PyPi ncompress package implements 'compress' Content-Encoding -try: - import ncompress as compat_ncompress -except ImportError: - compat_ncompress = None - - -legacy = [ - 'compat_HTMLParseError', - 'compat_HTMLParser', - 'compat_HTTPError', - 'compat_b64decode', - 'compat_cookiejar', - 'compat_cookiejar_Cookie', - 'compat_cookies', - 'compat_cookies_SimpleCookie', - 'compat_etree_Element', - 'compat_etree_register_namespace', - 'compat_expanduser', - 'compat_getpass', - 'compat_parse_qs', - 'compat_realpath', - 'compat_urllib_parse_parse_qs', - 'compat_urllib_parse_unquote', - 'compat_urllib_parse_unquote_plus', - 'compat_urllib_parse_unquote_to_bytes', - 'compat_urllib_parse_urlencode', - 'compat_urllib_parse_urlparse', - 'compat_urlparse', - 'compat_urlretrieve', - 'compat_xml_parse_error', -] - - -__all__ = [ - 'compat_html_parser_HTMLParseError', - 'compat_html_parser_HTMLParser', - 'compat_Struct', - 'compat_base64_b64decode', - 'compat_basestring', - 'compat_brotli', - 'compat_casefold', - 'compat_chr', - 'compat_collections_abc', - 'compat_collections_chain_map', - 'compat_datetime_timedelta_total_seconds', - 'compat_http_cookiejar', - 'compat_http_cookiejar_Cookie', - 'compat_http_cookies', - 'compat_http_cookies_SimpleCookie', - 'compat_ctypes_WINFUNCTYPE', - 'compat_etree_fromstring', - 'compat_filter', - 'compat_get_terminal_size', - 'compat_getenv', - 'compat_getpass_getpass', - 'compat_html_entities', - 'compat_html_entities_html5', - 'compat_http_client', - 'compat_http_server', - 'compat_input', - 'compat_integer_types', - 'compat_itertools_count', - 'compat_itertools_zip_longest', - 'compat_kwargs', - 'compat_map', - 'compat_ncompress', - 'compat_numeric_types', - 'compat_open', - 'compat_ord', - 'compat_os_name', - 'compat_os_path_expanduser', - 'compat_os_path_realpath', - 'compat_print', - 'compat_re_Match', - 'compat_re_Pattern', - 'compat_register_utf8', - 'compat_setenv', - 'compat_shlex_quote', - 'compat_shlex_split', - 'compat_socket_create_connection', - 'compat_str', - 'compat_struct_pack', - 'compat_struct_unpack', - 'compat_subprocess_get_DEVNULL', - 'compat_tokenize_tokenize', - 'compat_urllib_error', - 'compat_urllib_parse', - 'compat_urllib_request', - 'compat_urllib_request_DataHandler', - 'compat_urllib_response', - 'compat_urllib_request_urlretrieve', - 'compat_urllib_HTTPError', - 'compat_xml_etree_ElementTree_Element', - 'compat_xml_etree_ElementTree_ParseError', - 'compat_xml_etree_register_namespace', - 'compat_xpath', - 'compat_zip', - 'workaround_optparse_bug9161', -] diff --git a/src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py b/src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py deleted file mode 100644 index 86d9022..0000000 --- a/src/music_kraken/pages/youtube_music/yt_utils/jsinterp.py +++ /dev/null @@ -1,1054 +0,0 @@ -from __future__ import unicode_literals - -import itertools -import json -import operator -import re - -from functools import update_wrapper - -from .utils import ( - error_to_compat_str, - ExtractorError, - js_to_json, - remove_quotes, - unified_timestamp, - variadic, -) -from .compat import ( - compat_basestring, - compat_chr, - compat_collections_chain_map as ChainMap, - compat_itertools_zip_longest as zip_longest, - compat_str, -) - - -# name JS functions -class function_with_repr(object): - # from yt_dlp/utils.py, but in this module - # repr_ is always set - def __init__(self, func, repr_): - update_wrapper(self, func) - self.func, self.__repr = func, repr_ - - def __call__(self, *args, **kwargs): - return self.func(*args, **kwargs) - - def __repr__(self): - return self.__repr - - -# name JS operators -def wraps_op(op): - - def update_and_rename_wrapper(w): - f = update_wrapper(w, op) - # fn names are str in both Py 2/3 - f.__name__ = str('JS_') + f.__name__ - return f - - return update_and_rename_wrapper - - -# NB In principle NaN cannot be checked by membership. -# Here all NaN values are actually this one, so _NaN is _NaN, -# although _NaN != _NaN. - -_NaN = float('nan') - - -def _js_bit_op(op): - - def zeroise(x): - return 0 if x in (None, JS_Undefined, _NaN) else x - - @wraps_op(op) - def wrapped(a, b): - return op(zeroise(a), zeroise(b)) & 0xffffffff - - return wrapped - - -def _js_arith_op(op): - - @wraps_op(op) - def wrapped(a, b): - if JS_Undefined in (a, b): - return _NaN - return op(a or 0, b or 0) - - return wrapped - - -def _js_div(a, b): - if JS_Undefined in (a, b) or not (a or b): - return _NaN - return operator.truediv(a or 0, b) if b else float('inf') - - -def _js_mod(a, b): - if JS_Undefined in (a, b) or not b: - return _NaN - return (a or 0) % b - - -def _js_exp(a, b): - if not b: - return 1 # even 0 ** 0 !! - elif JS_Undefined in (a, b): - return _NaN - return (a or 0) ** b - - -def _js_eq_op(op): - - @wraps_op(op) - def wrapped(a, b): - if set((a, b)) <= set((None, JS_Undefined)): - return op(a, a) - return op(a, b) - - return wrapped - - -def _js_comp_op(op): - - @wraps_op(op) - def wrapped(a, b): - if JS_Undefined in (a, b): - return False - if isinstance(a, compat_basestring): - b = compat_str(b or 0) - elif isinstance(b, compat_basestring): - a = compat_str(a or 0) - return op(a or 0, b or 0) - - return wrapped - - -def _js_ternary(cndn, if_true=True, if_false=False): - """Simulate JS's ternary operator (cndn?if_true:if_false)""" - if cndn in (False, None, 0, '', JS_Undefined, _NaN): - return if_false - return if_true - - -# (op, definition) in order of binding priority, tightest first -# avoid dict to maintain order -# definition None => Defined in JSInterpreter._operator -_OPERATORS = ( - ('>>', _js_bit_op(operator.rshift)), - ('<<', _js_bit_op(operator.lshift)), - ('+', _js_arith_op(operator.add)), - ('-', _js_arith_op(operator.sub)), - ('*', _js_arith_op(operator.mul)), - ('%', _js_mod), - ('/', _js_div), - ('**', _js_exp), -) - -_COMP_OPERATORS = ( - ('===', operator.is_), - ('!==', operator.is_not), - ('==', _js_eq_op(operator.eq)), - ('!=', _js_eq_op(operator.ne)), - ('<=', _js_comp_op(operator.le)), - ('>=', _js_comp_op(operator.ge)), - ('<', _js_comp_op(operator.lt)), - ('>', _js_comp_op(operator.gt)), -) - -_LOG_OPERATORS = ( - ('|', _js_bit_op(operator.or_)), - ('^', _js_bit_op(operator.xor)), - ('&', _js_bit_op(operator.and_)), -) - -_SC_OPERATORS = ( - ('?', None), - ('??', None), - ('||', None), - ('&&', None), -) - -_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS)) - -_NAME_RE = r'[a-zA-Z_$][\w$]*' -_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) -_QUOTES = '\'"/' - - -class JS_Undefined(object): - pass - - -class JS_Break(ExtractorError): - def __init__(self): - ExtractorError.__init__(self, 'Invalid break') - - -class JS_Continue(ExtractorError): - def __init__(self): - ExtractorError.__init__(self, 'Invalid continue') - - -class JS_Throw(ExtractorError): - def __init__(self, e): - self.error = e - ExtractorError.__init__(self, 'Uncaught exception ' + error_to_compat_str(e)) - - -class LocalNameSpace(ChainMap): - def __getitem__(self, key): - try: - return super(LocalNameSpace, self).__getitem__(key) - except KeyError: - return JS_Undefined - - def __setitem__(self, key, value): - for scope in self.maps: - if key in scope: - scope[key] = value - return - self.maps[0][key] = value - - def __delitem__(self, key): - raise NotImplementedError('Deleting is not supported') - - def __repr__(self): - return 'LocalNameSpace%s' % (self.maps, ) - - -class JSInterpreter(object): - __named_object_counter = 0 - - _OBJ_NAME = '__youtube_dl_jsinterp_obj' - - OP_CHARS = None - - def __init__(self, code, objects=None): - self.code, self._functions = code, {} - self._objects = {} if objects is None else objects - if type(self).OP_CHARS is None: - type(self).OP_CHARS = self.OP_CHARS = self.__op_chars() - - class Exception(ExtractorError): - def __init__(self, msg, *args, **kwargs): - expr = kwargs.pop('expr', None) - if expr is not None: - msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr) - super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) - - class JS_RegExp(object): - RE_FLAGS = { - # special knowledge: Python's re flags are bitmask values, current max 128 - # invent new bitmask values well above that for literal parsing - # TODO: execute matches with these flags (remaining: d, y) - 'd': 1024, # Generate indices for substring matches - 'g': 2048, # Global search - 'i': re.I, # Case-insensitive search - 'm': re.M, # Multi-line search - 's': re.S, # Allows . to match newline characters - 'u': re.U, # Treat a pattern as a sequence of unicode code points - 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string - } - - def __init__(self, pattern_txt, flags=0): - if isinstance(flags, compat_str): - flags, _ = self.regex_flags(flags) - # First, avoid https://github.com/python/cpython/issues/74534 - self.__self = None - self.__pattern_txt = pattern_txt.replace('[[', r'[\[') - self.__flags = flags - - def __instantiate(self): - if self.__self: - return - self.__self = re.compile(self.__pattern_txt, self.__flags) - # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern - for name in dir(self.__self): - # Only these? Obviously __class__, __init__. - # PyPy creates a __weakref__ attribute with value None - # that can't be setattr'd but also can't need to be copied. - if name in ('__class__', '__init__', '__weakref__'): - continue - setattr(self, name, getattr(self.__self, name)) - - def __getattr__(self, name): - self.__instantiate() - # make Py 2.6 conform to its lying documentation - if name == 'flags': - self.flags = self.__flags - return self.flags - elif name == 'pattern': - self.pattern = self.__pattern_txt - return self.pattern - elif hasattr(self.__self, name): - v = getattr(self.__self, name) - setattr(self, name, v) - return v - elif name in ('groupindex', 'groups'): - return 0 if name == 'groupindex' else {} - raise AttributeError('{0} has no attribute named {1}'.format(self, name)) - - @classmethod - def regex_flags(cls, expr): - flags = 0 - if not expr: - return flags, expr - for idx, ch in enumerate(expr): - if ch not in cls.RE_FLAGS: - break - flags |= cls.RE_FLAGS[ch] - return flags, expr[idx + 1:] - - @classmethod - def __op_chars(cls): - op_chars = set(';,[') - for op in cls._all_operators(): - for c in op[0]: - op_chars.add(c) - return op_chars - - def _named_object(self, namespace, obj): - self.__named_object_counter += 1 - name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter) - if callable(obj) and not isinstance(obj, function_with_repr): - obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, )) - namespace[name] = obj - return name - - @classmethod - def _separate(cls, expr, delim=',', max_split=None, skip_delims=None): - if not expr: - return - # collections.Counter() is ~10% slower in both 2.7 and 3.9 - counters = dict((k, 0) for k in _MATCHING_PARENS.values()) - start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 - in_quote, escaping, skipping = None, False, 0 - after_op, in_regex_char_group = True, False - - for idx, char in enumerate(expr): - paren_delta = 0 - if not in_quote: - if char in _MATCHING_PARENS: - counters[_MATCHING_PARENS[char]] += 1 - paren_delta = 1 - elif char in counters: - counters[char] -= 1 - paren_delta = -1 - if not escaping: - if char in _QUOTES and in_quote in (char, None): - if in_quote or after_op or char != '/': - in_quote = None if in_quote and not in_regex_char_group else char - elif in_quote == '/' and char in '[]': - in_regex_char_group = char == '[' - escaping = not escaping and in_quote and char == '\\' - after_op = not in_quote and (char in cls.OP_CHARS or paren_delta > 0 or (after_op and char.isspace())) - - if char != delim[pos] or any(counters.values()) or in_quote: - pos = skipping = 0 - continue - elif skipping > 0: - skipping -= 1 - continue - elif pos == 0 and skip_delims: - here = expr[idx:] - for s in variadic(skip_delims): - if here.startswith(s) and s: - skipping = len(s) - 1 - break - if skipping > 0: - continue - if pos < delim_len: - pos += 1 - continue - yield expr[start: idx - delim_len] - start, pos = idx + 1, 0 - splits += 1 - if max_split and splits >= max_split: - break - yield expr[start:] - - @classmethod - def _separate_at_paren(cls, expr, delim=None): - if delim is None: - delim = expr and _MATCHING_PARENS[expr[0]] - separated = list(cls._separate(expr, delim, 1)) - - if len(separated) < 2: - raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals())) - return separated[0][1:].strip(), separated[1].strip() - - @staticmethod - def _all_operators(): - return itertools.chain( - # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence - _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS) - - def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): - if op in ('||', '&&'): - if (op == '&&') ^ _js_ternary(left_val): - return left_val # short circuiting - elif op == '??': - if left_val not in (None, JS_Undefined): - return left_val - elif op == '?': - right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1)) - - right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) - opfunc = op and next((v for k, v in self._all_operators() if k == op), None) - if not opfunc: - return right_val - - try: - # print('Eval:', opfunc.__name__, left_val, right_val) - return opfunc(left_val, right_val) - except Exception as e: - raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e) - - def _index(self, obj, idx, allow_undefined=False): - if idx == 'length': - return len(obj) - try: - return obj[int(idx)] if isinstance(obj, list) else obj[idx] - except Exception as e: - if allow_undefined: - return JS_Undefined - raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e) - - def _dump(self, obj, namespace): - try: - return json.dumps(obj) - except TypeError: - return self._named_object(namespace, obj) - - # used below - _VAR_RET_THROW_RE = re.compile(r'''(?x) - (?P(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?Pthrow\s+) - ''') - _COMPOUND_RE = re.compile(r'''(?x) - (?Ptry)\s*\{| - (?Pif)\s*\(| - (?Pswitch)\s*\(| - (?Pfor)\s*\(| - (?Pwhile)\s*\( - ''') - _FINALLY_RE = re.compile(r'finally\s*\{') - _SWITCH_RE = re.compile(r'switch\s*\(') - - def interpret_statement(self, stmt, local_vars, allow_recursion=100): - if allow_recursion < 0: - raise self.Exception('Recursion limit reached') - allow_recursion -= 1 - - # print('At: ' + stmt[:60]) - should_return = False - # fails on (eg) if (...) stmt1; else stmt2; - sub_statements = list(self._separate(stmt, ';')) or [''] - expr = stmt = sub_statements.pop().strip() - for sub_stmt in sub_statements: - ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion) - if should_return: - return ret, should_return - - m = self._VAR_RET_THROW_RE.match(stmt) - if m: - expr = stmt[len(m.group(0)):].strip() - if m.group('throw'): - raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) - should_return = not m.group('var') - if not expr: - return None, should_return - - if expr[0] in _QUOTES: - inner, outer = self._separate(expr, expr[0], 1) - if expr[0] == '/': - flags, outer = self.JS_RegExp.regex_flags(outer) - inner = self.JS_RegExp(inner[1:], flags=flags) - else: - inner = json.loads(js_to_json(inner + expr[0])) # , strict=True)) - if not outer: - return inner, should_return - expr = self._named_object(local_vars, inner) + outer - - new_kw, _, obj = expr.partition('new ') - if not new_kw: - for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)), - ('RegExp', self.JS_RegExp), - ('Error', self.Exception)): - if not obj.startswith(klass + '('): - continue - left, right = self._separate_at_paren(obj[len(klass):]) - argvals = self.interpret_iter(left, local_vars, allow_recursion) - expr = konstr(*argvals) - if expr is None: - raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr) - expr = self._dump(expr, local_vars) + right - break - else: - raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr) - - if expr.startswith('void '): - left = self.interpret_expression(expr[5:], local_vars, allow_recursion) - return None, should_return - - if expr.startswith('{'): - inner, outer = self._separate_at_paren(expr) - # try for object expression (Map) - sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)] - if all(len(sub_expr) == 2 for sub_expr in sub_expressions): - return dict( - (key_expr if re.match(_NAME_RE, key_expr) else key_expr, - self.interpret_expression(val_expr, local_vars, allow_recursion)) - for key_expr, val_expr in sub_expressions), should_return - # or statement list - inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) - if not outer or should_abort: - return inner, should_abort or should_return - else: - expr = self._dump(inner, local_vars) + outer - - if expr.startswith('('): - - m = re.match(r'\((?P[a-z])%(?P[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr) - if m: - # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig` - outer = None - inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars) - else: - inner, outer = self._separate_at_paren(expr) - inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) - if not outer or should_abort: - return inner, should_abort or should_return - else: - expr = self._dump(inner, local_vars) + outer - - if expr.startswith('['): - inner, outer = self._separate_at_paren(expr) - name = self._named_object(local_vars, [ - self.interpret_expression(item, local_vars, allow_recursion) - for item in self._separate(inner)]) - expr = name + outer - - m = self._COMPOUND_RE.match(expr) - md = m.groupdict() if m else {} - if md.get('if'): - cndn, expr = self._separate_at_paren(expr[m.end() - 1:]) - if expr.startswith('{'): - if_expr, expr = self._separate_at_paren(expr) - else: - # may lose ... else ... because of ll.368-374 - if_expr, expr = self._separate_at_paren(expr, delim=';') - else_expr = None - m = re.match(r'else\s*(?P\{)?', expr) - if m: - if m.group('block'): - else_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) - else: - # handle subset ... else if (...) {...} else ... - # TODO: make interpret_statement do this properly, if possible - exprs = list(self._separate(expr[m.end():], delim='}', max_split=2)) - if len(exprs) > 1: - if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]): - else_expr = exprs[0] + '}' + exprs[1] - expr = (exprs[2] + '}') if len(exprs) == 3 else None - else: - else_expr = exprs[0] - exprs.append('') - expr = '}'.join(exprs[1:]) - else: - else_expr = exprs[0] - expr = None - else_expr = else_expr.lstrip() + '}' - cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)) - ret, should_abort = self.interpret_statement( - if_expr if cndn else else_expr, local_vars, allow_recursion) - if should_abort: - return ret, True - - elif md.get('try'): - try_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) - err = None - try: - ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion) - if should_abort: - return ret, True - except Exception as e: - # XXX: This works for now, but makes debugging future issues very hard - err = e - - pending = (None, False) - m = re.match(r'catch\s*(?P\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr) - if m: - sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) - if err: - catch_vars = {} - if m.group('err'): - catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err - catch_vars = local_vars.new_child(m=catch_vars) - err = None - pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion) - - m = self._FINALLY_RE.match(expr) - if m: - sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) - ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) - if should_abort: - return ret, True - - ret, should_abort = pending - if should_abort: - return ret, True - - if err: - raise err - - elif md.get('for') or md.get('while'): - init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:]) - if remaining.startswith('{'): - body, expr = self._separate_at_paren(remaining) - else: - switch_m = self._SWITCH_RE.match(remaining) # FIXME - if switch_m: - switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:]) - body, expr = self._separate_at_paren(remaining, '}') - body = 'switch(%s){%s}' % (switch_val, body) - else: - body, expr = remaining, '' - if md.get('for'): - start, cndn, increment = self._separate(init_or_cond, ';') - self.interpret_expression(start, local_vars, allow_recursion) - else: - cndn, increment = init_or_cond, None - while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): - try: - ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion) - if should_abort: - return ret, True - except JS_Break: - break - except JS_Continue: - pass - if increment: - self.interpret_expression(increment, local_vars, allow_recursion) - - elif md.get('switch'): - switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:]) - switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) - body, expr = self._separate_at_paren(remaining, '}') - items = body.replace('default:', 'case default:').split('case ')[1:] - for default in (False, True): - matched = False - for item in items: - case, stmt = (i.strip() for i in self._separate(item, ':', 1)) - if default: - matched = matched or case == 'default' - elif not matched: - matched = (case != 'default' - and switch_val == self.interpret_expression(case, local_vars, allow_recursion)) - if not matched: - continue - try: - ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion) - if should_abort: - return ret - except JS_Break: - break - if matched: - break - - if md: - ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) - return ret, should_abort or should_return - - # Comma separated statements - sub_expressions = list(self._separate(expr)) - if len(sub_expressions) > 1: - for sub_expr in sub_expressions: - ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) - if should_abort: - return ret, True - return ret, False - - for m in re.finditer(r'''(?x) - (?P\+\+|--)(?P{_NAME_RE})| - (?P{_NAME_RE})(?P\+\+|--)'''.format(**globals()), expr): - var = m.group('var1') or m.group('var2') - start, end = m.span() - sign = m.group('pre_sign') or m.group('post_sign') - ret = local_vars[var] - local_vars[var] += 1 if sign[0] == '+' else -1 - if m.group('pre_sign'): - ret = local_vars[var] - expr = expr[:start] + self._dump(ret, local_vars) + expr[end:] - - if not expr: - return None, should_return - - m = re.match(r'''(?x) - (?P - (?P{_NAME_RE})(?:\[(?P[^\]]+?)\])?\s* - (?P{_OPERATOR_RE})? - =(?!=)(?P.*)$ - )|(?P - (?!if|return|true|false|null|undefined)(?P{_NAME_RE})$ - )|(?P - (?P{_NAME_RE})\[(?P.+)\]$ - )|(?P - (?P{_NAME_RE})(?:(?P\?)?\.(?P[^(]+)|\[(?P[^\]]+)\])\s* - )|(?P - (?P{_NAME_RE})\((?P.*)\)$ - )'''.format(**globals()), expr) - md = m.groupdict() if m else {} - if md.get('assign'): - left_val = local_vars.get(m.group('out')) - - if not m.group('index'): - local_vars[m.group('out')] = self._operator( - m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) - return local_vars[m.group('out')], should_return - elif left_val in (None, JS_Undefined): - raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr) - - idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) - if not isinstance(idx, (int, float)): - raise self.Exception('List index %s must be integer' % (idx, ), expr=expr) - idx = int(idx) - left_val[idx] = self._operator( - m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion) - return left_val[idx], should_return - - elif expr.isdigit(): - return int(expr), should_return - - elif expr == 'break': - raise JS_Break() - elif expr == 'continue': - raise JS_Continue() - - elif expr == 'undefined': - return JS_Undefined, should_return - elif expr == 'NaN': - return _NaN, should_return - - elif md.get('return'): - return local_vars[m.group('name')], should_return - - try: - ret = json.loads(js_to_json(expr)) # strict=True) - if not md.get('attribute'): - return ret, should_return - except ValueError: - pass - - if md.get('indexing'): - val = local_vars[m.group('in')] - idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) - return self._index(val, idx), should_return - - for op, _ in self._all_operators(): - # hackety: have higher priority than <>, but don't confuse them - skip_delim = (op + op) if op in '<>*?' else None - if op == '?': - skip_delim = (skip_delim, '?.') - separated = list(self._separate(expr, op, skip_delims=skip_delim)) - if len(separated) < 2: - continue - - right_expr = separated.pop() - # handle operators that are both unary and binary, minimal BODMAS - if op in ('+', '-'): - undone = 0 - while len(separated) > 1 and not separated[-1].strip(): - undone += 1 - separated.pop() - if op == '-' and undone % 2 != 0: - right_expr = op + right_expr - left_val = separated[-1] - for dm_op in ('*', '%', '/', '**'): - bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) - if len(bodmas) > 1 and not bodmas[-1].strip(): - expr = op.join(separated) + op + right_expr - right_expr = None - break - if right_expr is None: - continue - - left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) - return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return - - if md.get('attribute'): - variable, member, nullish = m.group('var', 'member', 'nullish') - if not member: - member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion) - arg_str = expr[m.end():] - if arg_str.startswith('('): - arg_str, remaining = self._separate_at_paren(arg_str) - else: - arg_str, remaining = None, arg_str - - def assertion(cndn, msg): - """ assert, but without risk of getting optimized out """ - if not cndn: - memb = member - raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr) - - def eval_method(): - if (variable, member) == ('console', 'debug'): - return - types = { - 'String': compat_str, - 'Math': float, - } - obj = local_vars.get(variable) - if obj in (JS_Undefined, None): - obj = types.get(variable, JS_Undefined) - if obj is JS_Undefined: - try: - if variable not in self._objects: - self._objects[variable] = self.extract_object(variable) - obj = self._objects[variable] - except self.Exception: - if not nullish: - raise - - if nullish and obj is JS_Undefined: - return JS_Undefined - - # Member access - if arg_str is None: - return self._index(obj, member, nullish) - - # Function call - argvals = [ - self.interpret_expression(v, local_vars, allow_recursion) - for v in self._separate(arg_str)] - - if obj == compat_str: - if member == 'fromCharCode': - assertion(argvals, 'takes one or more arguments') - return ''.join(map(compat_chr, argvals)) - raise self.Exception('Unsupported string method ' + member, expr=expr) - elif obj == float: - if member == 'pow': - assertion(len(argvals) == 2, 'takes two arguments') - return argvals[0] ** argvals[1] - raise self.Exception('Unsupported Math method ' + member, expr=expr) - - if member == 'split': - assertion(argvals, 'takes one or more arguments') - assertion(len(argvals) == 1, 'with limit argument is not implemented') - return obj.split(argvals[0]) if argvals[0] else list(obj) - elif member == 'join': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(len(argvals) == 1, 'takes exactly one argument') - return argvals[0].join(obj) - elif member == 'reverse': - assertion(not argvals, 'does not take any arguments') - obj.reverse() - return obj - elif member == 'slice': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(len(argvals) == 1, 'takes exactly one argument') - return obj[argvals[0]:] - elif member == 'splice': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(argvals, 'takes one or more arguments') - index, howMany = map(int, (argvals + [len(obj)])[:2]) - if index < 0: - index += len(obj) - add_items = argvals[2:] - res = [] - for i in range(index, min(index + howMany, len(obj))): - res.append(obj.pop(index)) - for i, item in enumerate(add_items): - obj.insert(index + i, item) - return res - elif member == 'unshift': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(argvals, 'takes one or more arguments') - for item in reversed(argvals): - obj.insert(0, item) - return obj - elif member == 'pop': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(not argvals, 'does not take any arguments') - if not obj: - return - return obj.pop() - elif member == 'push': - assertion(argvals, 'takes one or more arguments') - obj.extend(argvals) - return obj - elif member == 'forEach': - assertion(argvals, 'takes one or more arguments') - assertion(len(argvals) <= 2, 'takes at-most 2 arguments') - f, this = (argvals + [''])[:2] - return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)] - elif member == 'indexOf': - assertion(argvals, 'takes one or more arguments') - assertion(len(argvals) <= 2, 'takes at-most 2 arguments') - idx, start = (argvals + [0])[:2] - try: - return obj.index(idx, start) - except ValueError: - return -1 - elif member == 'charCodeAt': - assertion(isinstance(obj, compat_str), 'must be applied on a string') - # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced - idx = argvals[0] if isinstance(argvals[0], int) else 0 - if idx >= len(obj): - return None - return ord(obj[idx]) - elif member in ('replace', 'replaceAll'): - assertion(isinstance(obj, compat_str), 'must be applied on a string') - assertion(len(argvals) == 2, 'takes exactly two arguments') - # TODO: argvals[1] callable, other Py vs JS edge cases - if isinstance(argvals[0], self.JS_RegExp): - count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1 - assertion(member != 'replaceAll' or count == 0, - 'replaceAll must be called with a global RegExp') - return argvals[0].sub(argvals[1], obj, count=count) - count = ('replaceAll', 'replace').index(member) - return re.sub(re.escape(argvals[0]), argvals[1], obj, count=count) - - idx = int(member) if isinstance(obj, list) else member - return obj[idx](argvals, allow_recursion=allow_recursion) - - if remaining: - ret, should_abort = self.interpret_statement( - self._named_object(local_vars, eval_method()) + remaining, - local_vars, allow_recursion) - return ret, should_return or should_abort - else: - return eval_method(), should_return - - elif md.get('function'): - fname = m.group('fname') - argvals = [self.interpret_expression(v, local_vars, allow_recursion) - for v in self._separate(m.group('args'))] - if fname in local_vars: - return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return - elif fname not in self._functions: - self._functions[fname] = self.extract_function(fname) - return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return - - raise self.Exception( - 'Unsupported JS expression ' + (expr[:40] if expr != stmt else ''), expr=stmt) - - def interpret_expression(self, expr, local_vars, allow_recursion): - ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion) - if should_return: - raise self.Exception('Cannot return from an expression', expr) - return ret - - def interpret_iter(self, list_txt, local_vars, allow_recursion): - for v in self._separate(list_txt): - yield self.interpret_expression(v, local_vars, allow_recursion) - - def extract_object(self, objname): - _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' - obj = {} - fields = None - for obj_m in re.finditer( - r'''(?xs) - {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* - (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) - }}\s*; - '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), - self.code): - fields = obj_m.group('fields') - if fields: - break - else: - raise self.Exception('Could not find object ' + objname) - # Currently, it only supports function definitions - fields_m = re.finditer( - r'''(?x) - (?P%s)\s*:\s*function\s*\((?P(?:%s|,)*)\){(?P[^}]+)} - ''' % (_FUNC_NAME_RE, _NAME_RE), - fields) - for f in fields_m: - argnames = self.build_arglist(f.group('args')) - obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) - - return obj - - @staticmethod - def _offset_e_by_d(d, e, local_vars): - """ Short-cut eval: (d%e.length+e.length)%e.length """ - try: - d = local_vars[d] - e = local_vars[e] - e = len(e) - return _js_mod(_js_mod(d, e) + e, e), False - except Exception: - return None, True - - def extract_function_code(self, funcname): - """ @returns argnames, code """ - func_m = re.search( - r'''(?xs) - (?: - function\s+%(name)s| - [{;,]\s*%(name)s\s*=\s*function| - (?:var|const|let)\s+%(name)s\s*=\s*function - )\s* - \((?P[^)]*)\)\s* - (?P{.+})''' % {'name': re.escape(funcname)}, - self.code) - if func_m is None: - raise self.Exception('Could not find JS function "{funcname}"'.format(**locals())) - code, _ = self._separate_at_paren(func_m.group('code')) # refine the match - return self.build_arglist(func_m.group('args')), code - - def extract_function(self, funcname): - return function_with_repr( - self.extract_function_from_code(*self.extract_function_code(funcname)), - 'F<%s>' % (funcname, )) - - def extract_function_from_code(self, argnames, code, *global_stack): - local_vars = {} - while True: - mobj = re.search(r'function\((?P[^)]*)\)\s*{', code) - if mobj is None: - break - start, body_start = mobj.span() - body, remaining = self._separate_at_paren(code[body_start - 1:], '}') - name = self._named_object(local_vars, self.extract_function_from_code( - [x.strip() for x in mobj.group('args').split(',')], - body, local_vars, *global_stack)) - code = code[:start] + name + remaining - return self.build_function(argnames, code, local_vars, *global_stack) - - def call_function(self, funcname, *args): - return self.extract_function(funcname)(args) - - @classmethod - def build_arglist(cls, arg_text): - if not arg_text: - return [] - - def valid_arg(y): - y = y.strip() - if not y: - raise cls.Exception('Missing arg in "%s"' % (arg_text, )) - return y - - return [valid_arg(x) for x in cls._separate(arg_text)] - - def build_function(self, argnames, code, *global_stack): - global_stack = list(global_stack) or [{}] - argnames = tuple(argnames) - - def resf(args, kwargs={}, allow_recursion=100): - global_stack[0].update( - zip_longest(argnames, args, fillvalue=None)) - global_stack[0].update(kwargs) - var_stack = LocalNameSpace(*global_stack) - ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1) - if should_abort: - return ret - return resf diff --git a/src/music_kraken/pages/youtube_music/yt_utils/socks.py b/src/music_kraken/pages/youtube_music/yt_utils/socks.py deleted file mode 100644 index 5d4adbe..0000000 --- a/src/music_kraken/pages/youtube_music/yt_utils/socks.py +++ /dev/null @@ -1,273 +0,0 @@ -# Public Domain SOCKS proxy protocol implementation -# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 - -from __future__ import unicode_literals - -# References: -# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol -# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol -# SOCKS5 protocol https://tools.ietf.org/html/rfc1928 -# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 - -import collections -import socket - -from .compat import ( - compat_ord, - compat_struct_pack, - compat_struct_unpack, -) - -__author__ = 'Timo Schmid ' - -SOCKS4_VERSION = 4 -SOCKS4_REPLY_VERSION = 0x00 -# Excerpt from SOCKS4A protocol: -# if the client cannot resolve the destination host's domain name to find its -# IP address, it should set the first three bytes of DSTIP to NULL and the last -# byte to a non-zero value. -SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF) - -SOCKS5_VERSION = 5 -SOCKS5_USER_AUTH_VERSION = 0x01 -SOCKS5_USER_AUTH_SUCCESS = 0x00 - - -class Socks4Command(object): - CMD_CONNECT = 0x01 - CMD_BIND = 0x02 - - -class Socks5Command(Socks4Command): - CMD_UDP_ASSOCIATE = 0x03 - - -class Socks5Auth(object): - AUTH_NONE = 0x00 - AUTH_GSSAPI = 0x01 - AUTH_USER_PASS = 0x02 - AUTH_NO_ACCEPTABLE = 0xFF # For server response - - -class Socks5AddressType(object): - ATYP_IPV4 = 0x01 - ATYP_DOMAINNAME = 0x03 - ATYP_IPV6 = 0x04 - - -class ProxyError(socket.error): - ERR_SUCCESS = 0x00 - - def __init__(self, code=None, msg=None): - if code is not None and msg is None: - msg = self.CODES.get(code) or 'unknown error' - super(ProxyError, self).__init__(code, msg) - - -class InvalidVersionError(ProxyError): - def __init__(self, expected_version, got_version): - msg = ('Invalid response version from server. Expected {0:02x} got ' - '{1:02x}'.format(expected_version, got_version)) - super(InvalidVersionError, self).__init__(0, msg) - - -class Socks4Error(ProxyError): - ERR_SUCCESS = 90 - - CODES = { - 91: 'request rejected or failed', - 92: 'request rejected because SOCKS server cannot connect to identd on the client', - 93: 'request rejected because the client program and identd report different user-ids' - } - - -class Socks5Error(ProxyError): - ERR_GENERAL_FAILURE = 0x01 - - CODES = { - 0x01: 'general SOCKS server failure', - 0x02: 'connection not allowed by ruleset', - 0x03: 'Network unreachable', - 0x04: 'Host unreachable', - 0x05: 'Connection refused', - 0x06: 'TTL expired', - 0x07: 'Command not supported', - 0x08: 'Address type not supported', - 0xFE: 'unknown username or invalid password', - 0xFF: 'all offered authentication methods were rejected' - } - - -class ProxyType(object): - SOCKS4 = 0 - SOCKS4A = 1 - SOCKS5 = 2 - - -Proxy = collections.namedtuple('Proxy', ( - 'type', 'host', 'port', 'username', 'password', 'remote_dns')) - - -class sockssocket(socket.socket): - def __init__(self, *args, **kwargs): - self._proxy = None - super(sockssocket, self).__init__(*args, **kwargs) - - def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): - assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) - - self._proxy = Proxy(proxytype, addr, port, username, password, rdns) - - def recvall(self, cnt): - data = b'' - while len(data) < cnt: - cur = self.recv(cnt - len(data)) - if not cur: - raise EOFError('{0} bytes missing'.format(cnt - len(data))) - data += cur - return data - - def _recv_bytes(self, cnt): - data = self.recvall(cnt) - return compat_struct_unpack('!{0}B'.format(cnt), data) - - @staticmethod - def _len_and_data(data): - return compat_struct_pack('!B', len(data)) + data - - def _check_response_version(self, expected_version, got_version): - if got_version != expected_version: - self.close() - raise InvalidVersionError(expected_version, got_version) - - def _resolve_address(self, destaddr, default, use_remote_dns): - try: - return socket.inet_aton(destaddr) - except socket.error: - if use_remote_dns and self._proxy.remote_dns: - return default - else: - return socket.inet_aton(socket.gethostbyname(destaddr)) - - def _setup_socks4(self, address, is_4a=False): - destaddr, port = address - - ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - - packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr - - username = (self._proxy.username or '').encode('utf-8') - packet += username + b'\x00' - - if is_4a and self._proxy.remote_dns: - packet += destaddr.encode('utf-8') + b'\x00' - - self.sendall(packet) - - version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8)) - - self._check_response_version(SOCKS4_REPLY_VERSION, version) - - if resp_code != Socks4Error.ERR_SUCCESS: - self.close() - raise Socks4Error(resp_code) - - return (dsthost, dstport) - - def _setup_socks4a(self, address): - self._setup_socks4(address, is_4a=True) - - def _socks5_auth(self): - packet = compat_struct_pack('!B', SOCKS5_VERSION) - - auth_methods = [Socks5Auth.AUTH_NONE] - if self._proxy.username and self._proxy.password: - auth_methods.append(Socks5Auth.AUTH_USER_PASS) - - packet += compat_struct_pack('!B', len(auth_methods)) - packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) - - self.sendall(packet) - - version, method = self._recv_bytes(2) - - self._check_response_version(SOCKS5_VERSION, version) - - if method == Socks5Auth.AUTH_NO_ACCEPTABLE or ( - method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)): - self.close() - raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE) - - if method == Socks5Auth.AUTH_USER_PASS: - username = self._proxy.username.encode('utf-8') - password = self._proxy.password.encode('utf-8') - packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) - packet += self._len_and_data(username) + self._len_and_data(password) - self.sendall(packet) - - version, status = self._recv_bytes(2) - - self._check_response_version(SOCKS5_USER_AUTH_VERSION, version) - - if status != SOCKS5_USER_AUTH_SUCCESS: - self.close() - raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) - - def _setup_socks5(self, address): - destaddr, port = address - - ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True) - - self._socks5_auth() - - reserved = 0 - packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) - if ipaddr is None: - destaddr = destaddr.encode('utf-8') - packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) - packet += self._len_and_data(destaddr) - else: - packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr - packet += compat_struct_pack('!H', port) - - self.sendall(packet) - - version, status, reserved, atype = self._recv_bytes(4) - - self._check_response_version(SOCKS5_VERSION, version) - - if status != Socks5Error.ERR_SUCCESS: - self.close() - raise Socks5Error(status) - - if atype == Socks5AddressType.ATYP_IPV4: - destaddr = self.recvall(4) - elif atype == Socks5AddressType.ATYP_DOMAINNAME: - alen = compat_ord(self.recv(1)) - destaddr = self.recvall(alen) - elif atype == Socks5AddressType.ATYP_IPV6: - destaddr = self.recvall(16) - destport = compat_struct_unpack('!H', self.recvall(2))[0] - - return (destaddr, destport) - - def _make_proxy(self, connect_func, address): - if not self._proxy: - return connect_func(self, address) - - result = connect_func(self, (self._proxy.host, self._proxy.port)) - if result != 0 and result is not None: - return result - setup_funcs = { - ProxyType.SOCKS4: self._setup_socks4, - ProxyType.SOCKS4A: self._setup_socks4a, - ProxyType.SOCKS5: self._setup_socks5, - } - setup_funcs[self._proxy.type](address) - return result - - def connect(self, address): - self._make_proxy(socket.socket.connect, address) - - def connect_ex(self, address): - return self._make_proxy(socket.socket.connect_ex, address) diff --git a/src/music_kraken/pages/youtube_music/yt_utils/utils.py b/src/music_kraken/pages/youtube_music/yt_utils/utils.py deleted file mode 100644 index 61b94d8..0000000 --- a/src/music_kraken/pages/youtube_music/yt_utils/utils.py +++ /dev/null @@ -1,6513 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from __future__ import unicode_literals - -import base64 -import binascii -import calendar -import codecs -import collections -import contextlib -import ctypes -import datetime -import email.utils -import email.header -import errno -import functools -import inspect -import io -import itertools -import json -import locale -import math -import operator -import os -import platform -import random -import re -import socket -import ssl -import subprocess -import sys -import tempfile -import time -import traceback -import unicodedata -import xml.etree.ElementTree -import zlib - -from .compat import ( - compat_HTMLParseError, - compat_HTMLParser, - compat_basestring, - compat_brotli as brotli, - compat_casefold, - compat_chr, - compat_collections_abc, - compat_cookiejar, - compat_ctypes_WINFUNCTYPE, - compat_datetime_timedelta_total_seconds, - compat_etree_fromstring, - compat_expanduser, - compat_html_entities, - compat_html_entities_html5, - compat_http_client, - compat_integer_types, - compat_kwargs, - compat_ncompress as ncompress, - compat_os_name, - compat_re_Match, - compat_re_Pattern, - compat_shlex_quote, - compat_str, - compat_struct_pack, - compat_struct_unpack, - compat_urllib_error, - compat_urllib_HTTPError, - compat_urllib_parse, - compat_urllib_parse_parse_qs as compat_parse_qs, - compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse, - compat_urllib_parse_unquote_plus, - compat_urllib_request, - compat_xpath, -) - -from .socks import ( - ProxyType, - sockssocket, -) - - -def register_socks_protocols(): - # "Register" SOCKS protocols - # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 - # URLs with protocols not in urlparse.uses_netloc are not handled correctly - for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): - if scheme not in compat_urllib_parse.uses_netloc: - compat_urllib_parse.uses_netloc.append(scheme) - - -# Unfavoured alias -compiled_regex_type = compat_re_Pattern - - -def random_user_agent(): - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '74.0.3729.129', - '76.0.3780.3', - '76.0.3780.2', - '74.0.3729.128', - '76.0.3780.1', - '76.0.3780.0', - '75.0.3770.15', - '74.0.3729.127', - '74.0.3729.126', - '76.0.3779.1', - '76.0.3779.0', - '75.0.3770.14', - '74.0.3729.125', - '76.0.3778.1', - '76.0.3778.0', - '75.0.3770.13', - '74.0.3729.124', - '74.0.3729.123', - '73.0.3683.121', - '76.0.3777.1', - '76.0.3777.0', - '75.0.3770.12', - '74.0.3729.122', - '76.0.3776.4', - '75.0.3770.11', - '74.0.3729.121', - '76.0.3776.3', - '76.0.3776.2', - '73.0.3683.120', - '74.0.3729.120', - '74.0.3729.119', - '74.0.3729.118', - '76.0.3776.1', - '76.0.3776.0', - '76.0.3775.5', - '75.0.3770.10', - '74.0.3729.117', - '76.0.3775.4', - '76.0.3775.3', - '74.0.3729.116', - '75.0.3770.9', - '76.0.3775.2', - '76.0.3775.1', - '76.0.3775.0', - '75.0.3770.8', - '74.0.3729.115', - '74.0.3729.114', - '76.0.3774.1', - '76.0.3774.0', - '75.0.3770.7', - '74.0.3729.113', - '74.0.3729.112', - '74.0.3729.111', - '76.0.3773.1', - '76.0.3773.0', - '75.0.3770.6', - '74.0.3729.110', - '74.0.3729.109', - '76.0.3772.1', - '76.0.3772.0', - '75.0.3770.5', - '74.0.3729.108', - '74.0.3729.107', - '76.0.3771.1', - '76.0.3771.0', - '75.0.3770.4', - '74.0.3729.106', - '74.0.3729.105', - '75.0.3770.3', - '74.0.3729.104', - '74.0.3729.103', - '74.0.3729.102', - '75.0.3770.2', - '74.0.3729.101', - '75.0.3770.1', - '75.0.3770.0', - '74.0.3729.100', - '75.0.3769.5', - '75.0.3769.4', - '74.0.3729.99', - '75.0.3769.3', - '75.0.3769.2', - '75.0.3768.6', - '74.0.3729.98', - '75.0.3769.1', - '75.0.3769.0', - '74.0.3729.97', - '73.0.3683.119', - '73.0.3683.118', - '74.0.3729.96', - '75.0.3768.5', - '75.0.3768.4', - '75.0.3768.3', - '75.0.3768.2', - '74.0.3729.95', - '74.0.3729.94', - '75.0.3768.1', - '75.0.3768.0', - '74.0.3729.93', - '74.0.3729.92', - '73.0.3683.117', - '74.0.3729.91', - '75.0.3766.3', - '74.0.3729.90', - '75.0.3767.2', - '75.0.3767.1', - '75.0.3767.0', - '74.0.3729.89', - '73.0.3683.116', - '75.0.3766.2', - '74.0.3729.88', - '75.0.3766.1', - '75.0.3766.0', - '74.0.3729.87', - '73.0.3683.115', - '74.0.3729.86', - '75.0.3765.1', - '75.0.3765.0', - '74.0.3729.85', - '73.0.3683.114', - '74.0.3729.84', - '75.0.3764.1', - '75.0.3764.0', - '74.0.3729.83', - '73.0.3683.113', - '75.0.3763.2', - '75.0.3761.4', - '74.0.3729.82', - '75.0.3763.1', - '75.0.3763.0', - '74.0.3729.81', - '73.0.3683.112', - '75.0.3762.1', - '75.0.3762.0', - '74.0.3729.80', - '75.0.3761.3', - '74.0.3729.79', - '73.0.3683.111', - '75.0.3761.2', - '74.0.3729.78', - '74.0.3729.77', - '75.0.3761.1', - '75.0.3761.0', - '73.0.3683.110', - '74.0.3729.76', - '74.0.3729.75', - '75.0.3760.0', - '74.0.3729.74', - '75.0.3759.8', - '75.0.3759.7', - '75.0.3759.6', - '74.0.3729.73', - '75.0.3759.5', - '74.0.3729.72', - '73.0.3683.109', - '75.0.3759.4', - '75.0.3759.3', - '74.0.3729.71', - '75.0.3759.2', - '74.0.3729.70', - '73.0.3683.108', - '74.0.3729.69', - '75.0.3759.1', - '75.0.3759.0', - '74.0.3729.68', - '73.0.3683.107', - '74.0.3729.67', - '75.0.3758.1', - '75.0.3758.0', - '74.0.3729.66', - '73.0.3683.106', - '74.0.3729.65', - '75.0.3757.1', - '75.0.3757.0', - '74.0.3729.64', - '73.0.3683.105', - '74.0.3729.63', - '75.0.3756.1', - '75.0.3756.0', - '74.0.3729.62', - '73.0.3683.104', - '75.0.3755.3', - '75.0.3755.2', - '73.0.3683.103', - '75.0.3755.1', - '75.0.3755.0', - '74.0.3729.61', - '73.0.3683.102', - '74.0.3729.60', - '75.0.3754.2', - '74.0.3729.59', - '75.0.3753.4', - '74.0.3729.58', - '75.0.3754.1', - '75.0.3754.0', - '74.0.3729.57', - '73.0.3683.101', - '75.0.3753.3', - '75.0.3752.2', - '75.0.3753.2', - '74.0.3729.56', - '75.0.3753.1', - '75.0.3753.0', - '74.0.3729.55', - '73.0.3683.100', - '74.0.3729.54', - '75.0.3752.1', - '75.0.3752.0', - '74.0.3729.53', - '73.0.3683.99', - '74.0.3729.52', - '75.0.3751.1', - '75.0.3751.0', - '74.0.3729.51', - '73.0.3683.98', - '74.0.3729.50', - '75.0.3750.0', - '74.0.3729.49', - '74.0.3729.48', - '74.0.3729.47', - '75.0.3749.3', - '74.0.3729.46', - '73.0.3683.97', - '75.0.3749.2', - '74.0.3729.45', - '75.0.3749.1', - '75.0.3749.0', - '74.0.3729.44', - '73.0.3683.96', - '74.0.3729.43', - '74.0.3729.42', - '75.0.3748.1', - '75.0.3748.0', - '74.0.3729.41', - '75.0.3747.1', - '73.0.3683.95', - '75.0.3746.4', - '74.0.3729.40', - '74.0.3729.39', - '75.0.3747.0', - '75.0.3746.3', - '75.0.3746.2', - '74.0.3729.38', - '75.0.3746.1', - '75.0.3746.0', - '74.0.3729.37', - '73.0.3683.94', - '75.0.3745.5', - '75.0.3745.4', - '75.0.3745.3', - '75.0.3745.2', - '74.0.3729.36', - '75.0.3745.1', - '75.0.3745.0', - '75.0.3744.2', - '74.0.3729.35', - '73.0.3683.93', - '74.0.3729.34', - '75.0.3744.1', - '75.0.3744.0', - '74.0.3729.33', - '73.0.3683.92', - '74.0.3729.32', - '74.0.3729.31', - '73.0.3683.91', - '75.0.3741.2', - '75.0.3740.5', - '74.0.3729.30', - '75.0.3741.1', - '75.0.3741.0', - '74.0.3729.29', - '75.0.3740.4', - '73.0.3683.90', - '74.0.3729.28', - '75.0.3740.3', - '73.0.3683.89', - '75.0.3740.2', - '74.0.3729.27', - '75.0.3740.1', - '75.0.3740.0', - '74.0.3729.26', - '73.0.3683.88', - '73.0.3683.87', - '74.0.3729.25', - '75.0.3739.1', - '75.0.3739.0', - '73.0.3683.86', - '74.0.3729.24', - '73.0.3683.85', - '75.0.3738.4', - '75.0.3738.3', - '75.0.3738.2', - '75.0.3738.1', - '75.0.3738.0', - '74.0.3729.23', - '73.0.3683.84', - '74.0.3729.22', - '74.0.3729.21', - '75.0.3737.1', - '75.0.3737.0', - '74.0.3729.20', - '73.0.3683.83', - '74.0.3729.19', - '75.0.3736.1', - '75.0.3736.0', - '74.0.3729.18', - '73.0.3683.82', - '74.0.3729.17', - '75.0.3735.1', - '75.0.3735.0', - '74.0.3729.16', - '73.0.3683.81', - '75.0.3734.1', - '75.0.3734.0', - '74.0.3729.15', - '73.0.3683.80', - '74.0.3729.14', - '75.0.3733.1', - '75.0.3733.0', - '75.0.3732.1', - '74.0.3729.13', - '74.0.3729.12', - '73.0.3683.79', - '74.0.3729.11', - '75.0.3732.0', - '74.0.3729.10', - '73.0.3683.78', - '74.0.3729.9', - '74.0.3729.8', - '74.0.3729.7', - '75.0.3731.3', - '75.0.3731.2', - '75.0.3731.0', - '74.0.3729.6', - '73.0.3683.77', - '73.0.3683.76', - '75.0.3730.5', - '75.0.3730.4', - '73.0.3683.75', - '74.0.3729.5', - '73.0.3683.74', - '75.0.3730.3', - '75.0.3730.2', - '74.0.3729.4', - '73.0.3683.73', - '73.0.3683.72', - '75.0.3730.1', - '75.0.3730.0', - '74.0.3729.3', - '73.0.3683.71', - '74.0.3729.2', - '73.0.3683.70', - '74.0.3729.1', - '74.0.3729.0', - '74.0.3726.4', - '73.0.3683.69', - '74.0.3726.3', - '74.0.3728.0', - '74.0.3726.2', - '73.0.3683.68', - '74.0.3726.1', - '74.0.3726.0', - '74.0.3725.4', - '73.0.3683.67', - '73.0.3683.66', - '74.0.3725.3', - '74.0.3725.2', - '74.0.3725.1', - '74.0.3724.8', - '74.0.3725.0', - '73.0.3683.65', - '74.0.3724.7', - '74.0.3724.6', - '74.0.3724.5', - '74.0.3724.4', - '74.0.3724.3', - '74.0.3724.2', - '74.0.3724.1', - '74.0.3724.0', - '73.0.3683.64', - '74.0.3723.1', - '74.0.3723.0', - '73.0.3683.63', - '74.0.3722.1', - '74.0.3722.0', - '73.0.3683.62', - '74.0.3718.9', - '74.0.3702.3', - '74.0.3721.3', - '74.0.3721.2', - '74.0.3721.1', - '74.0.3721.0', - '74.0.3720.6', - '73.0.3683.61', - '72.0.3626.122', - '73.0.3683.60', - '74.0.3720.5', - '72.0.3626.121', - '74.0.3718.8', - '74.0.3720.4', - '74.0.3720.3', - '74.0.3718.7', - '74.0.3720.2', - '74.0.3720.1', - '74.0.3720.0', - '74.0.3718.6', - '74.0.3719.5', - '73.0.3683.59', - '74.0.3718.5', - '74.0.3718.4', - '74.0.3719.4', - '74.0.3719.3', - '74.0.3719.2', - '74.0.3719.1', - '73.0.3683.58', - '74.0.3719.0', - '73.0.3683.57', - '73.0.3683.56', - '74.0.3718.3', - '73.0.3683.55', - '74.0.3718.2', - '74.0.3718.1', - '74.0.3718.0', - '73.0.3683.54', - '74.0.3717.2', - '73.0.3683.53', - '74.0.3717.1', - '74.0.3717.0', - '73.0.3683.52', - '74.0.3716.1', - '74.0.3716.0', - '73.0.3683.51', - '74.0.3715.1', - '74.0.3715.0', - '73.0.3683.50', - '74.0.3711.2', - '74.0.3714.2', - '74.0.3713.3', - '74.0.3714.1', - '74.0.3714.0', - '73.0.3683.49', - '74.0.3713.1', - '74.0.3713.0', - '72.0.3626.120', - '73.0.3683.48', - '74.0.3712.2', - '74.0.3712.1', - '74.0.3712.0', - '73.0.3683.47', - '72.0.3626.119', - '73.0.3683.46', - '74.0.3710.2', - '72.0.3626.118', - '74.0.3711.1', - '74.0.3711.0', - '73.0.3683.45', - '72.0.3626.117', - '74.0.3710.1', - '74.0.3710.0', - '73.0.3683.44', - '72.0.3626.116', - '74.0.3709.1', - '74.0.3709.0', - '74.0.3704.9', - '73.0.3683.43', - '72.0.3626.115', - '74.0.3704.8', - '74.0.3704.7', - '74.0.3708.0', - '74.0.3706.7', - '74.0.3704.6', - '73.0.3683.42', - '72.0.3626.114', - '74.0.3706.6', - '72.0.3626.113', - '74.0.3704.5', - '74.0.3706.5', - '74.0.3706.4', - '74.0.3706.3', - '74.0.3706.2', - '74.0.3706.1', - '74.0.3706.0', - '73.0.3683.41', - '72.0.3626.112', - '74.0.3705.1', - '74.0.3705.0', - '73.0.3683.40', - '72.0.3626.111', - '73.0.3683.39', - '74.0.3704.4', - '73.0.3683.38', - '74.0.3704.3', - '74.0.3704.2', - '74.0.3704.1', - '74.0.3704.0', - '73.0.3683.37', - '72.0.3626.110', - '72.0.3626.109', - '74.0.3703.3', - '74.0.3703.2', - '73.0.3683.36', - '74.0.3703.1', - '74.0.3703.0', - '73.0.3683.35', - '72.0.3626.108', - '74.0.3702.2', - '74.0.3699.3', - '74.0.3702.1', - '74.0.3702.0', - '73.0.3683.34', - '72.0.3626.107', - '73.0.3683.33', - '74.0.3701.1', - '74.0.3701.0', - '73.0.3683.32', - '73.0.3683.31', - '72.0.3626.105', - '74.0.3700.1', - '74.0.3700.0', - '73.0.3683.29', - '72.0.3626.103', - '74.0.3699.2', - '74.0.3699.1', - '74.0.3699.0', - '73.0.3683.28', - '72.0.3626.102', - '73.0.3683.27', - '73.0.3683.26', - '74.0.3698.0', - '74.0.3696.2', - '72.0.3626.101', - '73.0.3683.25', - '74.0.3696.1', - '74.0.3696.0', - '74.0.3694.8', - '72.0.3626.100', - '74.0.3694.7', - '74.0.3694.6', - '74.0.3694.5', - '74.0.3694.4', - '72.0.3626.99', - '72.0.3626.98', - '74.0.3694.3', - '73.0.3683.24', - '72.0.3626.97', - '72.0.3626.96', - '72.0.3626.95', - '73.0.3683.23', - '72.0.3626.94', - '73.0.3683.22', - '73.0.3683.21', - '72.0.3626.93', - '74.0.3694.2', - '72.0.3626.92', - '74.0.3694.1', - '74.0.3694.0', - '74.0.3693.6', - '73.0.3683.20', - '72.0.3626.91', - '74.0.3693.5', - '74.0.3693.4', - '74.0.3693.3', - '74.0.3693.2', - '73.0.3683.19', - '74.0.3693.1', - '74.0.3693.0', - '73.0.3683.18', - '72.0.3626.90', - '74.0.3692.1', - '74.0.3692.0', - '73.0.3683.17', - '72.0.3626.89', - '74.0.3687.3', - '74.0.3691.1', - '74.0.3691.0', - '73.0.3683.16', - '72.0.3626.88', - '72.0.3626.87', - '73.0.3683.15', - '74.0.3690.1', - '74.0.3690.0', - '73.0.3683.14', - '72.0.3626.86', - '73.0.3683.13', - '73.0.3683.12', - '74.0.3689.1', - '74.0.3689.0', - '73.0.3683.11', - '72.0.3626.85', - '73.0.3683.10', - '72.0.3626.84', - '73.0.3683.9', - '74.0.3688.1', - '74.0.3688.0', - '73.0.3683.8', - '72.0.3626.83', - '74.0.3687.2', - '74.0.3687.1', - '74.0.3687.0', - '73.0.3683.7', - '72.0.3626.82', - '74.0.3686.4', - '72.0.3626.81', - '74.0.3686.3', - '74.0.3686.2', - '74.0.3686.1', - '74.0.3686.0', - '73.0.3683.6', - '72.0.3626.80', - '74.0.3685.1', - '74.0.3685.0', - '73.0.3683.5', - '72.0.3626.79', - '74.0.3684.1', - '74.0.3684.0', - '73.0.3683.4', - '72.0.3626.78', - '72.0.3626.77', - '73.0.3683.3', - '73.0.3683.2', - '72.0.3626.76', - '73.0.3683.1', - '73.0.3683.0', - '72.0.3626.75', - '71.0.3578.141', - '73.0.3682.1', - '73.0.3682.0', - '72.0.3626.74', - '71.0.3578.140', - '73.0.3681.4', - '73.0.3681.3', - '73.0.3681.2', - '73.0.3681.1', - '73.0.3681.0', - '72.0.3626.73', - '71.0.3578.139', - '72.0.3626.72', - '72.0.3626.71', - '73.0.3680.1', - '73.0.3680.0', - '72.0.3626.70', - '71.0.3578.138', - '73.0.3678.2', - '73.0.3679.1', - '73.0.3679.0', - '72.0.3626.69', - '71.0.3578.137', - '73.0.3678.1', - '73.0.3678.0', - '71.0.3578.136', - '73.0.3677.1', - '73.0.3677.0', - '72.0.3626.68', - '72.0.3626.67', - '71.0.3578.135', - '73.0.3676.1', - '73.0.3676.0', - '73.0.3674.2', - '72.0.3626.66', - '71.0.3578.134', - '73.0.3674.1', - '73.0.3674.0', - '72.0.3626.65', - '71.0.3578.133', - '73.0.3673.2', - '73.0.3673.1', - '73.0.3673.0', - '72.0.3626.64', - '71.0.3578.132', - '72.0.3626.63', - '72.0.3626.62', - '72.0.3626.61', - '72.0.3626.60', - '73.0.3672.1', - '73.0.3672.0', - '72.0.3626.59', - '71.0.3578.131', - '73.0.3671.3', - '73.0.3671.2', - '73.0.3671.1', - '73.0.3671.0', - '72.0.3626.58', - '71.0.3578.130', - '73.0.3670.1', - '73.0.3670.0', - '72.0.3626.57', - '71.0.3578.129', - '73.0.3669.1', - '73.0.3669.0', - '72.0.3626.56', - '71.0.3578.128', - '73.0.3668.2', - '73.0.3668.1', - '73.0.3668.0', - '72.0.3626.55', - '71.0.3578.127', - '73.0.3667.2', - '73.0.3667.1', - '73.0.3667.0', - '72.0.3626.54', - '71.0.3578.126', - '73.0.3666.1', - '73.0.3666.0', - '72.0.3626.53', - '71.0.3578.125', - '73.0.3665.4', - '73.0.3665.3', - '72.0.3626.52', - '73.0.3665.2', - '73.0.3664.4', - '73.0.3665.1', - '73.0.3665.0', - '72.0.3626.51', - '71.0.3578.124', - '72.0.3626.50', - '73.0.3664.3', - '73.0.3664.2', - '73.0.3664.1', - '73.0.3664.0', - '73.0.3663.2', - '72.0.3626.49', - '71.0.3578.123', - '73.0.3663.1', - '73.0.3663.0', - '72.0.3626.48', - '71.0.3578.122', - '73.0.3662.1', - '73.0.3662.0', - '72.0.3626.47', - '71.0.3578.121', - '73.0.3661.1', - '72.0.3626.46', - '73.0.3661.0', - '72.0.3626.45', - '71.0.3578.120', - '73.0.3660.2', - '73.0.3660.1', - '73.0.3660.0', - '72.0.3626.44', - '71.0.3578.119', - '73.0.3659.1', - '73.0.3659.0', - '72.0.3626.43', - '71.0.3578.118', - '73.0.3658.1', - '73.0.3658.0', - '72.0.3626.42', - '71.0.3578.117', - '73.0.3657.1', - '73.0.3657.0', - '72.0.3626.41', - '71.0.3578.116', - '73.0.3656.1', - '73.0.3656.0', - '72.0.3626.40', - '71.0.3578.115', - '73.0.3655.1', - '73.0.3655.0', - '72.0.3626.39', - '71.0.3578.114', - '73.0.3654.1', - '73.0.3654.0', - '72.0.3626.38', - '71.0.3578.113', - '73.0.3653.1', - '73.0.3653.0', - '72.0.3626.37', - '71.0.3578.112', - '73.0.3652.1', - '73.0.3652.0', - '72.0.3626.36', - '71.0.3578.111', - '73.0.3651.1', - '73.0.3651.0', - '72.0.3626.35', - '71.0.3578.110', - '73.0.3650.1', - '73.0.3650.0', - '72.0.3626.34', - '71.0.3578.109', - '73.0.3649.1', - '73.0.3649.0', - '72.0.3626.33', - '71.0.3578.108', - '73.0.3648.2', - '73.0.3648.1', - '73.0.3648.0', - '72.0.3626.32', - '71.0.3578.107', - '73.0.3647.2', - '73.0.3647.1', - '73.0.3647.0', - '72.0.3626.31', - '71.0.3578.106', - '73.0.3635.3', - '73.0.3646.2', - '73.0.3646.1', - '73.0.3646.0', - '72.0.3626.30', - '71.0.3578.105', - '72.0.3626.29', - '73.0.3645.2', - '73.0.3645.1', - '73.0.3645.0', - '72.0.3626.28', - '71.0.3578.104', - '72.0.3626.27', - '72.0.3626.26', - '72.0.3626.25', - '72.0.3626.24', - '73.0.3644.0', - '73.0.3643.2', - '72.0.3626.23', - '71.0.3578.103', - '73.0.3643.1', - '73.0.3643.0', - '72.0.3626.22', - '71.0.3578.102', - '73.0.3642.1', - '73.0.3642.0', - '72.0.3626.21', - '71.0.3578.101', - '73.0.3641.1', - '73.0.3641.0', - '72.0.3626.20', - '71.0.3578.100', - '72.0.3626.19', - '73.0.3640.1', - '73.0.3640.0', - '72.0.3626.18', - '73.0.3639.1', - '71.0.3578.99', - '73.0.3639.0', - '72.0.3626.17', - '73.0.3638.2', - '72.0.3626.16', - '73.0.3638.1', - '73.0.3638.0', - '72.0.3626.15', - '71.0.3578.98', - '73.0.3635.2', - '71.0.3578.97', - '73.0.3637.1', - '73.0.3637.0', - '72.0.3626.14', - '71.0.3578.96', - '71.0.3578.95', - '72.0.3626.13', - '71.0.3578.94', - '73.0.3636.2', - '71.0.3578.93', - '73.0.3636.1', - '73.0.3636.0', - '72.0.3626.12', - '71.0.3578.92', - '73.0.3635.1', - '73.0.3635.0', - '72.0.3626.11', - '71.0.3578.91', - '73.0.3634.2', - '73.0.3634.1', - '73.0.3634.0', - '72.0.3626.10', - '71.0.3578.90', - '71.0.3578.89', - '73.0.3633.2', - '73.0.3633.1', - '73.0.3633.0', - '72.0.3610.4', - '72.0.3626.9', - '71.0.3578.88', - '73.0.3632.5', - '73.0.3632.4', - '73.0.3632.3', - '73.0.3632.2', - '73.0.3632.1', - '73.0.3632.0', - '72.0.3626.8', - '71.0.3578.87', - '73.0.3631.2', - '73.0.3631.1', - '73.0.3631.0', - '72.0.3626.7', - '71.0.3578.86', - '72.0.3626.6', - '73.0.3630.1', - '73.0.3630.0', - '72.0.3626.5', - '71.0.3578.85', - '72.0.3626.4', - '73.0.3628.3', - '73.0.3628.2', - '73.0.3629.1', - '73.0.3629.0', - '72.0.3626.3', - '71.0.3578.84', - '73.0.3628.1', - '73.0.3628.0', - '71.0.3578.83', - '73.0.3627.1', - '73.0.3627.0', - '72.0.3626.2', - '71.0.3578.82', - '71.0.3578.81', - '71.0.3578.80', - '72.0.3626.1', - '72.0.3626.0', - '71.0.3578.79', - '70.0.3538.124', - '71.0.3578.78', - '72.0.3623.4', - '72.0.3625.2', - '72.0.3625.1', - '72.0.3625.0', - '71.0.3578.77', - '70.0.3538.123', - '72.0.3624.4', - '72.0.3624.3', - '72.0.3624.2', - '71.0.3578.76', - '72.0.3624.1', - '72.0.3624.0', - '72.0.3623.3', - '71.0.3578.75', - '70.0.3538.122', - '71.0.3578.74', - '72.0.3623.2', - '72.0.3610.3', - '72.0.3623.1', - '72.0.3623.0', - '72.0.3622.3', - '72.0.3622.2', - '71.0.3578.73', - '70.0.3538.121', - '72.0.3622.1', - '72.0.3622.0', - '71.0.3578.72', - '70.0.3538.120', - '72.0.3621.1', - '72.0.3621.0', - '71.0.3578.71', - '70.0.3538.119', - '72.0.3620.1', - '72.0.3620.0', - '71.0.3578.70', - '70.0.3538.118', - '71.0.3578.69', - '72.0.3619.1', - '72.0.3619.0', - '71.0.3578.68', - '70.0.3538.117', - '71.0.3578.67', - '72.0.3618.1', - '72.0.3618.0', - '71.0.3578.66', - '70.0.3538.116', - '72.0.3617.1', - '72.0.3617.0', - '71.0.3578.65', - '70.0.3538.115', - '72.0.3602.3', - '71.0.3578.64', - '72.0.3616.1', - '72.0.3616.0', - '71.0.3578.63', - '70.0.3538.114', - '71.0.3578.62', - '72.0.3615.1', - '72.0.3615.0', - '71.0.3578.61', - '70.0.3538.113', - '72.0.3614.1', - '72.0.3614.0', - '71.0.3578.60', - '70.0.3538.112', - '72.0.3613.1', - '72.0.3613.0', - '71.0.3578.59', - '70.0.3538.111', - '72.0.3612.2', - '72.0.3612.1', - '72.0.3612.0', - '70.0.3538.110', - '71.0.3578.58', - '70.0.3538.109', - '72.0.3611.2', - '72.0.3611.1', - '72.0.3611.0', - '71.0.3578.57', - '70.0.3538.108', - '72.0.3610.2', - '71.0.3578.56', - '71.0.3578.55', - '72.0.3610.1', - '72.0.3610.0', - '71.0.3578.54', - '70.0.3538.107', - '71.0.3578.53', - '72.0.3609.3', - '71.0.3578.52', - '72.0.3609.2', - '71.0.3578.51', - '72.0.3608.5', - '72.0.3609.1', - '72.0.3609.0', - '71.0.3578.50', - '70.0.3538.106', - '72.0.3608.4', - '72.0.3608.3', - '72.0.3608.2', - '71.0.3578.49', - '72.0.3608.1', - '72.0.3608.0', - '70.0.3538.105', - '71.0.3578.48', - '72.0.3607.1', - '72.0.3607.0', - '71.0.3578.47', - '70.0.3538.104', - '72.0.3606.2', - '72.0.3606.1', - '72.0.3606.0', - '71.0.3578.46', - '70.0.3538.103', - '70.0.3538.102', - '72.0.3605.3', - '72.0.3605.2', - '72.0.3605.1', - '72.0.3605.0', - '71.0.3578.45', - '70.0.3538.101', - '71.0.3578.44', - '71.0.3578.43', - '70.0.3538.100', - '70.0.3538.99', - '71.0.3578.42', - '72.0.3604.1', - '72.0.3604.0', - '71.0.3578.41', - '70.0.3538.98', - '71.0.3578.40', - '72.0.3603.2', - '72.0.3603.1', - '72.0.3603.0', - '71.0.3578.39', - '70.0.3538.97', - '72.0.3602.2', - '71.0.3578.38', - '71.0.3578.37', - '72.0.3602.1', - '72.0.3602.0', - '71.0.3578.36', - '70.0.3538.96', - '72.0.3601.1', - '72.0.3601.0', - '71.0.3578.35', - '70.0.3538.95', - '72.0.3600.1', - '72.0.3600.0', - '71.0.3578.34', - '70.0.3538.94', - '72.0.3599.3', - '72.0.3599.2', - '72.0.3599.1', - '72.0.3599.0', - '71.0.3578.33', - '70.0.3538.93', - '72.0.3598.1', - '72.0.3598.0', - '71.0.3578.32', - '70.0.3538.87', - '72.0.3597.1', - '72.0.3597.0', - '72.0.3596.2', - '71.0.3578.31', - '70.0.3538.86', - '71.0.3578.30', - '71.0.3578.29', - '72.0.3596.1', - '72.0.3596.0', - '71.0.3578.28', - '70.0.3538.85', - '72.0.3595.2', - '72.0.3591.3', - '72.0.3595.1', - '72.0.3595.0', - '71.0.3578.27', - '70.0.3538.84', - '72.0.3594.1', - '72.0.3594.0', - '71.0.3578.26', - '70.0.3538.83', - '72.0.3593.2', - '72.0.3593.1', - '72.0.3593.0', - '71.0.3578.25', - '70.0.3538.82', - '72.0.3589.3', - '72.0.3592.2', - '72.0.3592.1', - '72.0.3592.0', - '71.0.3578.24', - '72.0.3589.2', - '70.0.3538.81', - '70.0.3538.80', - '72.0.3591.2', - '72.0.3591.1', - '72.0.3591.0', - '71.0.3578.23', - '70.0.3538.79', - '71.0.3578.22', - '72.0.3590.1', - '72.0.3590.0', - '71.0.3578.21', - '70.0.3538.78', - '70.0.3538.77', - '72.0.3589.1', - '72.0.3589.0', - '71.0.3578.20', - '70.0.3538.76', - '71.0.3578.19', - '70.0.3538.75', - '72.0.3588.1', - '72.0.3588.0', - '71.0.3578.18', - '70.0.3538.74', - '72.0.3586.2', - '72.0.3587.0', - '71.0.3578.17', - '70.0.3538.73', - '72.0.3586.1', - '72.0.3586.0', - '71.0.3578.16', - '70.0.3538.72', - '72.0.3585.1', - '72.0.3585.0', - '71.0.3578.15', - '70.0.3538.71', - '71.0.3578.14', - '72.0.3584.1', - '72.0.3584.0', - '71.0.3578.13', - '70.0.3538.70', - '72.0.3583.2', - '71.0.3578.12', - '72.0.3583.1', - '72.0.3583.0', - '71.0.3578.11', - '70.0.3538.69', - '71.0.3578.10', - '72.0.3582.0', - '72.0.3581.4', - '71.0.3578.9', - '70.0.3538.67', - '72.0.3581.3', - '72.0.3581.2', - '72.0.3581.1', - '72.0.3581.0', - '71.0.3578.8', - '70.0.3538.66', - '72.0.3580.1', - '72.0.3580.0', - '71.0.3578.7', - '70.0.3538.65', - '71.0.3578.6', - '72.0.3579.1', - '72.0.3579.0', - '71.0.3578.5', - '70.0.3538.64', - '71.0.3578.4', - '71.0.3578.3', - '71.0.3578.2', - '71.0.3578.1', - '71.0.3578.0', - '70.0.3538.63', - '69.0.3497.128', - '70.0.3538.62', - '70.0.3538.61', - '70.0.3538.60', - '70.0.3538.59', - '71.0.3577.1', - '71.0.3577.0', - '70.0.3538.58', - '69.0.3497.127', - '71.0.3576.2', - '71.0.3576.1', - '71.0.3576.0', - '70.0.3538.57', - '70.0.3538.56', - '71.0.3575.2', - '70.0.3538.55', - '69.0.3497.126', - '70.0.3538.54', - '71.0.3575.1', - '71.0.3575.0', - '71.0.3574.1', - '71.0.3574.0', - '70.0.3538.53', - '69.0.3497.125', - '70.0.3538.52', - '71.0.3573.1', - '71.0.3573.0', - '70.0.3538.51', - '69.0.3497.124', - '71.0.3572.1', - '71.0.3572.0', - '70.0.3538.50', - '69.0.3497.123', - '71.0.3571.2', - '70.0.3538.49', - '69.0.3497.122', - '71.0.3571.1', - '71.0.3571.0', - '70.0.3538.48', - '69.0.3497.121', - '71.0.3570.1', - '71.0.3570.0', - '70.0.3538.47', - '69.0.3497.120', - '71.0.3568.2', - '71.0.3569.1', - '71.0.3569.0', - '70.0.3538.46', - '69.0.3497.119', - '70.0.3538.45', - '71.0.3568.1', - '71.0.3568.0', - '70.0.3538.44', - '69.0.3497.118', - '70.0.3538.43', - '70.0.3538.42', - '71.0.3567.1', - '71.0.3567.0', - '70.0.3538.41', - '69.0.3497.117', - '71.0.3566.1', - '71.0.3566.0', - '70.0.3538.40', - '69.0.3497.116', - '71.0.3565.1', - '71.0.3565.0', - '70.0.3538.39', - '69.0.3497.115', - '71.0.3564.1', - '71.0.3564.0', - '70.0.3538.38', - '69.0.3497.114', - '71.0.3563.0', - '71.0.3562.2', - '70.0.3538.37', - '69.0.3497.113', - '70.0.3538.36', - '70.0.3538.35', - '71.0.3562.1', - '71.0.3562.0', - '70.0.3538.34', - '69.0.3497.112', - '70.0.3538.33', - '71.0.3561.1', - '71.0.3561.0', - '70.0.3538.32', - '69.0.3497.111', - '71.0.3559.6', - '71.0.3560.1', - '71.0.3560.0', - '71.0.3559.5', - '71.0.3559.4', - '70.0.3538.31', - '69.0.3497.110', - '71.0.3559.3', - '70.0.3538.30', - '69.0.3497.109', - '71.0.3559.2', - '71.0.3559.1', - '71.0.3559.0', - '70.0.3538.29', - '69.0.3497.108', - '71.0.3558.2', - '71.0.3558.1', - '71.0.3558.0', - '70.0.3538.28', - '69.0.3497.107', - '71.0.3557.2', - '71.0.3557.1', - '71.0.3557.0', - '70.0.3538.27', - '69.0.3497.106', - '71.0.3554.4', - '70.0.3538.26', - '71.0.3556.1', - '71.0.3556.0', - '70.0.3538.25', - '71.0.3554.3', - '69.0.3497.105', - '71.0.3554.2', - '70.0.3538.24', - '69.0.3497.104', - '71.0.3555.2', - '70.0.3538.23', - '71.0.3555.1', - '71.0.3555.0', - '70.0.3538.22', - '69.0.3497.103', - '71.0.3554.1', - '71.0.3554.0', - '70.0.3538.21', - '69.0.3497.102', - '71.0.3553.3', - '70.0.3538.20', - '69.0.3497.101', - '71.0.3553.2', - '69.0.3497.100', - '71.0.3553.1', - '71.0.3553.0', - '70.0.3538.19', - '69.0.3497.99', - '69.0.3497.98', - '69.0.3497.97', - '71.0.3552.6', - '71.0.3552.5', - '71.0.3552.4', - '71.0.3552.3', - '71.0.3552.2', - '71.0.3552.1', - '71.0.3552.0', - '70.0.3538.18', - '69.0.3497.96', - '71.0.3551.3', - '71.0.3551.2', - '71.0.3551.1', - '71.0.3551.0', - '70.0.3538.17', - '69.0.3497.95', - '71.0.3550.3', - '71.0.3550.2', - '71.0.3550.1', - '71.0.3550.0', - '70.0.3538.16', - '69.0.3497.94', - '71.0.3549.1', - '71.0.3549.0', - '70.0.3538.15', - '69.0.3497.93', - '69.0.3497.92', - '71.0.3548.1', - '71.0.3548.0', - '70.0.3538.14', - '69.0.3497.91', - '71.0.3547.1', - '71.0.3547.0', - '70.0.3538.13', - '69.0.3497.90', - '71.0.3546.2', - '69.0.3497.89', - '71.0.3546.1', - '71.0.3546.0', - '70.0.3538.12', - '69.0.3497.88', - '71.0.3545.4', - '71.0.3545.3', - '71.0.3545.2', - '71.0.3545.1', - '71.0.3545.0', - '70.0.3538.11', - '69.0.3497.87', - '71.0.3544.5', - '71.0.3544.4', - '71.0.3544.3', - '71.0.3544.2', - '71.0.3544.1', - '71.0.3544.0', - '69.0.3497.86', - '70.0.3538.10', - '69.0.3497.85', - '70.0.3538.9', - '69.0.3497.84', - '71.0.3543.4', - '70.0.3538.8', - '71.0.3543.3', - '71.0.3543.2', - '71.0.3543.1', - '71.0.3543.0', - '70.0.3538.7', - '69.0.3497.83', - '71.0.3542.2', - '71.0.3542.1', - '71.0.3542.0', - '70.0.3538.6', - '69.0.3497.82', - '69.0.3497.81', - '71.0.3541.1', - '71.0.3541.0', - '70.0.3538.5', - '69.0.3497.80', - '71.0.3540.1', - '71.0.3540.0', - '70.0.3538.4', - '69.0.3497.79', - '70.0.3538.3', - '71.0.3539.1', - '71.0.3539.0', - '69.0.3497.78', - '68.0.3440.134', - '69.0.3497.77', - '70.0.3538.2', - '70.0.3538.1', - '70.0.3538.0', - '69.0.3497.76', - '68.0.3440.133', - '69.0.3497.75', - '70.0.3537.2', - '70.0.3537.1', - '70.0.3537.0', - '69.0.3497.74', - '68.0.3440.132', - '70.0.3536.0', - '70.0.3535.5', - '70.0.3535.4', - '70.0.3535.3', - '69.0.3497.73', - '68.0.3440.131', - '70.0.3532.8', - '70.0.3532.7', - '69.0.3497.72', - '69.0.3497.71', - '70.0.3535.2', - '70.0.3535.1', - '70.0.3535.0', - '69.0.3497.70', - '68.0.3440.130', - '69.0.3497.69', - '68.0.3440.129', - '70.0.3534.4', - '70.0.3534.3', - '70.0.3534.2', - '70.0.3534.1', - '70.0.3534.0', - '69.0.3497.68', - '68.0.3440.128', - '70.0.3533.2', - '70.0.3533.1', - '70.0.3533.0', - '69.0.3497.67', - '68.0.3440.127', - '70.0.3532.6', - '70.0.3532.5', - '70.0.3532.4', - '69.0.3497.66', - '68.0.3440.126', - '70.0.3532.3', - '70.0.3532.2', - '70.0.3532.1', - '69.0.3497.60', - '69.0.3497.65', - '69.0.3497.64', - '70.0.3532.0', - '70.0.3531.0', - '70.0.3530.4', - '70.0.3530.3', - '70.0.3530.2', - '69.0.3497.58', - '68.0.3440.125', - '69.0.3497.57', - '69.0.3497.56', - '69.0.3497.55', - '69.0.3497.54', - '70.0.3530.1', - '70.0.3530.0', - '69.0.3497.53', - '68.0.3440.124', - '69.0.3497.52', - '70.0.3529.3', - '70.0.3529.2', - '70.0.3529.1', - '70.0.3529.0', - '69.0.3497.51', - '70.0.3528.4', - '68.0.3440.123', - '70.0.3528.3', - '70.0.3528.2', - '70.0.3528.1', - '70.0.3528.0', - '69.0.3497.50', - '68.0.3440.122', - '70.0.3527.1', - '70.0.3527.0', - '69.0.3497.49', - '68.0.3440.121', - '70.0.3526.1', - '70.0.3526.0', - '68.0.3440.120', - '69.0.3497.48', - '69.0.3497.47', - '68.0.3440.119', - '68.0.3440.118', - '70.0.3525.5', - '70.0.3525.4', - '70.0.3525.3', - '68.0.3440.117', - '69.0.3497.46', - '70.0.3525.2', - '70.0.3525.1', - '70.0.3525.0', - '69.0.3497.45', - '68.0.3440.116', - '70.0.3524.4', - '70.0.3524.3', - '69.0.3497.44', - '70.0.3524.2', - '70.0.3524.1', - '70.0.3524.0', - '70.0.3523.2', - '69.0.3497.43', - '68.0.3440.115', - '70.0.3505.9', - '69.0.3497.42', - '70.0.3505.8', - '70.0.3523.1', - '70.0.3523.0', - '69.0.3497.41', - '68.0.3440.114', - '70.0.3505.7', - '69.0.3497.40', - '70.0.3522.1', - '70.0.3522.0', - '70.0.3521.2', - '69.0.3497.39', - '68.0.3440.113', - '70.0.3505.6', - '70.0.3521.1', - '70.0.3521.0', - '69.0.3497.38', - '68.0.3440.112', - '70.0.3520.1', - '70.0.3520.0', - '69.0.3497.37', - '68.0.3440.111', - '70.0.3519.3', - '70.0.3519.2', - '70.0.3519.1', - '70.0.3519.0', - '69.0.3497.36', - '68.0.3440.110', - '70.0.3518.1', - '70.0.3518.0', - '69.0.3497.35', - '69.0.3497.34', - '68.0.3440.109', - '70.0.3517.1', - '70.0.3517.0', - '69.0.3497.33', - '68.0.3440.108', - '69.0.3497.32', - '70.0.3516.3', - '70.0.3516.2', - '70.0.3516.1', - '70.0.3516.0', - '69.0.3497.31', - '68.0.3440.107', - '70.0.3515.4', - '68.0.3440.106', - '70.0.3515.3', - '70.0.3515.2', - '70.0.3515.1', - '70.0.3515.0', - '69.0.3497.30', - '68.0.3440.105', - '68.0.3440.104', - '70.0.3514.2', - '70.0.3514.1', - '70.0.3514.0', - '69.0.3497.29', - '68.0.3440.103', - '70.0.3513.1', - '70.0.3513.0', - '69.0.3497.28', - ) - return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) - - -std_headers = { - 'User-Agent': random_user_agent(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-us,en;q=0.5', -} - - -USER_AGENTS = { - 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', -} - - -NO_DEFAULT = object() -IDENTITY = lambda x: x - -ENGLISH_MONTH_NAMES = [ - 'January', 'February', 'March', 'April', 'May', 'June', - 'July', 'August', 'September', 'October', 'November', 'December'] - -MONTH_NAMES = { - 'en': ENGLISH_MONTH_NAMES, - 'fr': [ - 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], -} - -# Timezone names for RFC2822 obs-zone -# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42 -TIMEZONE_NAMES = { - 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0, - 'AST': -4, 'ADT': -3, # Atlantic (used in Canada) - 'EST': -5, 'EDT': -4, # Eastern - 'CST': -6, 'CDT': -5, # Central - 'MST': -7, 'MDT': -6, # Mountain - 'PST': -8, 'PDT': -7 # Pacific -} - -KNOWN_EXTENSIONS = ( - 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', - 'flv', 'f4v', 'f4a', 'f4b', - 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', - 'mkv', 'mka', 'mk3d', - 'avi', 'divx', - 'mov', - 'asf', 'wmv', 'wma', - '3gp', '3g2', - 'mp3', - 'flac', - 'ape', - 'wav', - 'f4f', 'f4m', 'm3u8', 'smil') - -# needed for sanitizing filenames in restricted mode -ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) - -DATE_FORMATS = ( - '%d %B %Y', - '%d %b %Y', - '%B %d %Y', - '%B %dst %Y', - '%B %dnd %Y', - '%B %drd %Y', - '%B %dth %Y', - '%b %d %Y', - '%b %dst %Y', - '%b %dnd %Y', - '%b %drd %Y', - '%b %dth %Y', - '%b %dst %Y %I:%M', - '%b %dnd %Y %I:%M', - '%b %drd %Y %I:%M', - '%b %dth %Y %I:%M', - '%Y %m %d', - '%Y-%m-%d', - '%Y.%m.%d.', - '%Y/%m/%d', - '%Y/%m/%d %H:%M', - '%Y/%m/%d %H:%M:%S', - '%Y%m%d%H%M', - '%Y%m%d%H%M%S', - '%Y%m%d', - '%Y-%m-%d %H:%M', - '%Y-%m-%d %H:%M:%S', - '%Y-%m-%d %H:%M:%S.%f', - '%Y-%m-%d %H:%M:%S:%f', - '%d.%m.%Y %H:%M', - '%d.%m.%Y %H.%M', - '%Y-%m-%dT%H:%M:%SZ', - '%Y-%m-%dT%H:%M:%S.%fZ', - '%Y-%m-%dT%H:%M:%S.%f0Z', - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%dT%H:%M:%S.%f', - '%Y-%m-%dT%H:%M', - '%b %d %Y at %H:%M', - '%b %d %Y at %H:%M:%S', - '%B %d %Y at %H:%M', - '%B %d %Y at %H:%M:%S', - '%H:%M %d-%b-%Y', -) - -DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) -DATE_FORMATS_DAY_FIRST.extend([ - '%d-%m-%Y', - '%d.%m.%Y', - '%d.%m.%y', - '%d/%m/%Y', - '%d/%m/%y', - '%d/%m/%Y %H:%M:%S', - '%d-%m-%Y %H:%M', -]) - -DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) -DATE_FORMATS_MONTH_FIRST.extend([ - '%m-%d-%Y', - '%m.%d.%Y', - '%m/%d/%Y', - '%m/%d/%y', - '%m/%d/%Y %H:%M:%S', -]) - -PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" -JSON_LD_RE = r'(?is)]+type=(["\']?)application/ld\+json\1[^>]*>(?P.+?)' - - -def preferredencoding(): - """Get preferred encoding. - - Returns the best encoding scheme for the system, based on - locale.getpreferredencoding() and some further tweaks. - """ - try: - pref = locale.getpreferredencoding() - 'TEST'.encode(pref) - except Exception: - pref = 'UTF-8' - - return pref - - -def write_json_file(obj, fn): - """ Encode obj as JSON and write it to fn, atomically if possible """ - - fn = encodeFilename(fn) - if sys.version_info < (3, 0) and sys.platform != 'win32': - encoding = get_filesystem_encoding() - # os.path.basename returns a bytes object, but NamedTemporaryFile - # will fail if the filename contains non-ascii characters unless we - # use a unicode object - path_basename = lambda f: os.path.basename(f).decode(encoding) - # the same for os.path.dirname - path_dirname = lambda f: os.path.dirname(f).decode(encoding) - else: - path_basename = os.path.basename - path_dirname = os.path.dirname - - args = { - 'suffix': '.tmp', - 'prefix': path_basename(fn) + '.', - 'dir': path_dirname(fn), - 'delete': False, - } - - # In Python 2.x, json.dump expects a bytestream. - # In Python 3.x, it writes to a character stream - if sys.version_info < (3, 0): - args['mode'] = 'wb' - else: - args.update({ - 'mode': 'w', - 'encoding': 'utf-8', - }) - - tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) - - try: - with tf: - json.dump(obj, tf) - if sys.platform == 'win32': - # Need to remove existing file on Windows, else os.rename raises - # WindowsError or FileExistsError. - try: - os.unlink(fn) - except OSError: - pass - try: - mask = os.umask(0) - os.umask(mask) - os.chmod(tf.name, 0o666 & ~mask) - except OSError: - pass - os.rename(tf.name, fn) - except Exception: - try: - os.remove(tf.name) - except OSError: - pass - raise - - -if sys.version_info >= (2, 7): - def find_xpath_attr(node, xpath, key, val=None): - """ Find the xpath xpath[@key=val] """ - assert re.match(r'^[a-zA-Z_-]+$', key) - expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) - return node.find(expr) -else: - def find_xpath_attr(node, xpath, key, val=None): - for f in node.findall(compat_xpath(xpath)): - if key not in f.attrib: - continue - if val is None or f.attrib.get(key) == val: - return f - return None - - -# On python2.6 the xml.etree.ElementTree.Element methods don't support -# the namespace parameter - -def xpath_with_ns(path, ns_map): - components = [c.split(':') for c in path.split('/')] - replaced = [] - for c in components: - if len(c) == 1: - replaced.append(c[0]) - else: - ns, tag = c - replaced.append('{%s}%s' % (ns_map[ns], tag)) - return '/'.join(replaced) - - -def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): - def _find_xpath(xpath): - return node.find(compat_xpath(xpath)) - - if isinstance(xpath, compat_basestring): - n = _find_xpath(xpath) - else: - for xp in xpath: - n = _find_xpath(xp) - if n is not None: - break - - if n is None: - if default is not NO_DEFAULT: - return default - elif fatal: - name = xpath if name is None else name - raise ExtractorError('Could not find XML element %s' % name) - else: - return None - return n - - -def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT): - n = xpath_element(node, xpath, name, fatal=fatal, default=default) - if n is None or n == default: - return n - if n.text is None: - if default is not NO_DEFAULT: - return default - elif fatal: - name = xpath if name is None else name - raise ExtractorError('Could not find XML element\'s text %s' % name) - else: - return None - return n.text - - -def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT): - n = find_xpath_attr(node, xpath, key) - if n is None: - if default is not NO_DEFAULT: - return default - elif fatal: - name = '%s[@%s]' % (xpath, key) if name is None else name - raise ExtractorError('Could not find XML attribute %s' % name) - else: - return None - return n.attrib[key] - - -def get_element_by_id(id, html): - """Return the content of the tag with the specified ID in the passed HTML document""" - return get_element_by_attribute('id', id, html) - - -def get_element_by_class(class_name, html): - """Return the content of the first tag with the specified class in the passed HTML document""" - retval = get_elements_by_class(class_name, html) - return retval[0] if retval else None - - -def get_element_by_attribute(attribute, value, html, escape_value=True): - retval = get_elements_by_attribute(attribute, value, html, escape_value) - return retval[0] if retval else None - - -def get_elements_by_class(class_name, html): - """Return the content of all tags with the specified class in the passed HTML document as a list""" - return get_elements_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), - html, escape_value=False) - - -def get_elements_by_attribute(attribute, value, html, escape_value=True): - """Return the content of the tag with the specified attribute in the passed HTML document""" - - value = re.escape(value) if escape_value else value - - retlist = [] - for m in re.finditer(r'''(?xs) - <([a-zA-Z0-9:._-]+) - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? - \s+%s=['"]?%s['"]? - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? - \s*> - (?P.*?) - - ''' % (re.escape(attribute), value), html): - res = m.group('content') - - if res.startswith('"') or res.startswith("'"): - res = res[1:-1] - - retlist.append(unescapeHTML(res)) - - return retlist - - -class HTMLAttributeParser(compat_HTMLParser): - """Trivial HTML parser to gather the attributes for a single element""" - def __init__(self): - self.attrs = {} - compat_HTMLParser.__init__(self) - - def handle_starttag(self, tag, attrs): - self.attrs = dict(attrs) - - -def extract_attributes(html_element): - """Given a string for an HTML element such as - - Decode and return a dictionary of attributes. - { - 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz', - 'empty': '', 'noval': None, 'entity': '&', - 'sq': '"', 'dq': '\'' - }. - NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, - but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. - """ - parser = HTMLAttributeParser() - try: - parser.feed(html_element) - parser.close() - # Older Python may throw HTMLParseError in case of malformed HTML - except compat_HTMLParseError: - pass - return parser.attrs - - -def clean_html(html): - """Clean an HTML snippet into a readable string""" - - if html is None: # Convenience for sanitizing descriptions etc. - return html - - # Newline vs
- html = html.replace('\n', ' ') - html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) - html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) - # Strip html tags - html = re.sub('<.*?>', '', html) - # Replace html entities - html = unescapeHTML(html) - return html.strip() - - -def sanitize_open(filename, open_mode): - """Try to open the given filename, and slightly tweak it if this fails. - - Attempts to open the given filename. If this fails, it tries to change - the filename slightly, step by step, until it's either able to open it - or it fails and raises a final exception, like the standard open() - function. - - It returns the tuple (stream, definitive_file_name). - """ - try: - if filename == '-': - if sys.platform == 'win32': - import msvcrt - msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) - stream = open(encodeFilename(filename), open_mode) - return (stream, filename) - except (IOError, OSError) as err: - if err.errno in (errno.EACCES,): - raise - - # In case of error, try to remove win32 forbidden chars - alt_filename = sanitize_path(filename) - if alt_filename == filename: - raise - else: - # An exception here should be caught in the caller - stream = open(encodeFilename(alt_filename), open_mode) - return (stream, alt_filename) - - -def timeconvert(timestr): - """Convert RFC 2822 defined time string into system timestamp""" - timestamp = None - timetuple = email.utils.parsedate_tz(timestr) - if timetuple is not None: - timestamp = email.utils.mktime_tz(timetuple) - return timestamp - - -def sanitize_filename(s, restricted=False, is_id=False): - """Sanitizes a string so it could be used as part of a filename. - If restricted is set, use a stricter subset of allowed characters. - Set is_id if this is not an arbitrary string, but an ID that should be kept - if possible. - """ - def replace_insane(char): - if restricted and char in ACCENT_CHARS: - return ACCENT_CHARS[char] - if char == '?' or ord(char) < 32 or ord(char) == 127: - return '' - elif char == '"': - return '' if restricted else '\'' - elif char == ':': - return '_-' if restricted else ' -' - elif char in '\\/|*<>': - return '_' - if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): - return '_' - if restricted and ord(char) > 127: - return '' if unicodedata.category(char)[0] in 'CM' else '_' - - return char - - # Replace look-alike Unicode glyphs - if restricted and not is_id: - s = unicodedata.normalize('NFKC', s) - # Handle timestamps - s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) - result = ''.join(map(replace_insane, s)) - if not is_id: - while '__' in result: - result = result.replace('__', '_') - result = result.strip('_') - # Common case of "Foreign band name - English song title" - if restricted and result.startswith('-_'): - result = result[2:] - if result.startswith('-'): - result = '_' + result[len('-'):] - result = result.lstrip('.') - if not result: - result = '_' - return result - - -def sanitize_path(s): - """Sanitizes and normalizes path on Windows""" - if sys.platform != 'win32': - return s - drive_or_unc, _ = os.path.splitdrive(s) - if sys.version_info < (2, 7) and not drive_or_unc: - drive_or_unc, _ = os.path.splitunc(s) - norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) - if drive_or_unc: - norm_path.pop(0) - sanitized_path = [ - path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part) - for path_part in norm_path] - if drive_or_unc: - sanitized_path.insert(0, drive_or_unc + os.path.sep) - return os.path.join(*sanitized_path) - - -def sanitize_url(url): - # Prepend protocol-less URLs with `http:` scheme in order to mitigate - # the number of unwanted failures due to missing protocol - if url.startswith('//'): - return 'http:%s' % url - # Fix some common typos seen so far - COMMON_TYPOS = ( - # https://github.com/ytdl-org/youtube-dl/issues/15649 - (r'^httpss://', r'https://'), - # https://bx1.be/lives/direct-tv/ - (r'^rmtp([es]?)://', r'rtmp\1://'), - ) - for mistake, fixup in COMMON_TYPOS: - if re.match(mistake, url): - return re.sub(mistake, fixup, url) - return url - - -def sanitized_Request(url, *args, **kwargs): - return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs) - - -def expand_path(s): - """Expand shell variables and ~""" - return os.path.expandvars(compat_expanduser(s)) - - -def orderedSet(iterable): - """ Remove all duplicates from the input iterable """ - res = [] - for el in iterable: - if el not in res: - res.append(el) - return res - - -def _htmlentity_transform(entity_with_semicolon): - """Transforms an HTML entity to a character.""" - entity = entity_with_semicolon[:-1] - - # Known non-numeric HTML entity - if entity in compat_html_entities.name2codepoint: - return compat_chr(compat_html_entities.name2codepoint[entity]) - - # TODO: HTML5 allows entities without a semicolon. For example, - # 'Éric' should be decoded as 'Éric'. - if entity_with_semicolon in compat_html_entities_html5: - return compat_html_entities_html5[entity_with_semicolon] - - mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) - if mobj is not None: - numstr = mobj.group(1) - if numstr.startswith('x'): - base = 16 - numstr = '0%s' % numstr - else: - base = 10 - # See https://github.com/ytdl-org/youtube-dl/issues/7518 - try: - return compat_chr(int(numstr, base)) - except ValueError: - pass - - # Unknown entity in name, return its literal representation - return '&%s;' % entity - - -def unescapeHTML(s): - if s is None: - return None - assert isinstance(s, compat_str) - - return re.sub( - r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) - - -def process_communicate_or_kill(p, *args, **kwargs): - try: - return p.communicate(*args, **kwargs) - except BaseException: # Including KeyboardInterrupt - p.kill() - p.wait() - raise - - -def get_subprocess_encoding(): - if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - # For subprocess calls, encode with locale encoding - # Refer to http://stackoverflow.com/a/9951851/35070 - encoding = preferredencoding() - else: - encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' - return encoding - - -# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible -if sys.version_info < (3, 0) and not sys.platform.startswith('java'): - - def encodeFilename(s, for_subprocess=False): - """ - @param s The name of the file - """ - - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if (not for_subprocess - and sys.platform == 'win32' - and sys.getwindowsversion()[0] >= 5 - and isinstance(s, compat_str)): - return s - - return _encode_compat_str(s, get_subprocess_encoding(), 'ignore') - - def decodeFilename(b, for_subprocess=False): - return _decode_compat_str(b, get_subprocess_encoding(), 'ignore') - -else: - - # Python 3 has a Unicode API - encodeFilename = decodeFilename = lambda *s, **k: s[0] - - -def encodeArgument(s): - if not isinstance(s, compat_str): - # Legacy code that uses byte strings - # Uncomment the following line after fixing all post processors - # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) - s = s.decode('ascii') - return encodeFilename(s, True) - - -def decodeArgument(b): - return decodeFilename(b, True) - - -def decodeOption(optval): - if optval is None: - return optval - return _decode_compat_str(optval) - - -def formatSeconds(secs): - if secs > 3600: - return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) - elif secs > 60: - return '%d:%02d' % (secs // 60, secs % 60) - else: - return '%d' % secs - - -def make_HTTPS_handler(params, **kwargs): - - # https://www.rfc-editor.org/info/rfc7301 - ALPN_PROTOCOLS = ['http/1.1'] - - def set_alpn_protocols(ctx): - # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0 - # Thanks @coletdjnz - # Some servers may (wrongly) reject requests if ALPN extension is not sent. See: - # https://github.com/python/cpython/issues/85140 - # https://github.com/yt-dlp/yt-dlp/issues/3878 - try: - ctx.set_alpn_protocols(ALPN_PROTOCOLS) - except (AttributeError, NotImplementedError): - # Python < 2.7.10, not ssl.HAS_ALPN - pass - - opts_no_check_certificate = params.get('nocheckcertificate', False) - if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 - context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) - set_alpn_protocols(context) - if opts_no_check_certificate: - context.check_hostname = False - context.verify_mode = ssl.CERT_NONE - - try: - return YoutubeDLHTTPSHandler(params, context=context, **kwargs) - except TypeError: - # Python 2.7.8 - # (create_default_context present but HTTPSHandler has no context=) - pass - - if sys.version_info < (3, 2): - return YoutubeDLHTTPSHandler(params, **kwargs) - else: # Python3 < 3.4 - context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) - context.verify_mode = (ssl.CERT_NONE - if opts_no_check_certificate - else ssl.CERT_REQUIRED) - context.set_default_verify_paths() - set_alpn_protocols(context) - return YoutubeDLHTTPSHandler(params, context=context, **kwargs) - - -def bug_reports_message(): - if ytdl_is_updateable(): - update_cmd = 'type youtube-dl -U to update' - else: - update_cmd = 'see https://yt-dl.org/update on how to update' - msg = '; please report this issue on https://yt-dl.org/bug .' - msg += ' Make sure you are using the latest version; %s.' % update_cmd - msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' - return msg - - -class YoutubeDLError(Exception): - """Base exception for YoutubeDL errors.""" - pass - - -class ExtractorError(YoutubeDLError): - """Error during info extraction.""" - - def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): - """ tb, if given, is the original traceback (so that it can be printed out). - If expected is set, this is a normal error message and most likely not a bug in youtube-dl. - """ - - if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): - expected = True - if video_id is not None: - msg = video_id + ': ' + msg - if cause: - msg += ' (caused by %r)' % cause - if not expected: - msg += bug_reports_message() - super(ExtractorError, self).__init__(msg) - - self.traceback = tb - self.exc_info = sys.exc_info() # preserve original exception - self.cause = cause - self.video_id = video_id - - def format_traceback(self): - if self.traceback is None: - return None - return ''.join(traceback.format_tb(self.traceback)) - - -class UnsupportedError(ExtractorError): - def __init__(self, url): - super(UnsupportedError, self).__init__( - 'Unsupported URL: %s' % url, expected=True) - self.url = url - - -class RegexNotFoundError(ExtractorError): - """Error when a regex didn't match""" - pass - - -class GeoRestrictedError(ExtractorError): - """Geographic restriction Error exception. - - This exception may be thrown when a video is not available from your - geographic location due to geographic restrictions imposed by a website. - """ - def __init__(self, msg, countries=None): - super(GeoRestrictedError, self).__init__(msg, expected=True) - self.msg = msg - self.countries = countries - - -class DownloadError(YoutubeDLError): - """Download Error exception. - - This exception may be thrown by FileDownloader objects if they are not - configured to continue on errors. They will contain the appropriate - error message. - """ - - def __init__(self, msg, exc_info=None): - """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ - super(DownloadError, self).__init__(msg) - self.exc_info = exc_info - - -class SameFileError(YoutubeDLError): - """Same File exception. - - This exception will be thrown by FileDownloader objects if they detect - multiple files would have to be downloaded to the same file on disk. - """ - pass - - -class PostProcessingError(YoutubeDLError): - """Post Processing exception. - - This exception may be raised by PostProcessor's .run() method to - indicate an error in the postprocessing task. - """ - - def __init__(self, msg): - super(PostProcessingError, self).__init__(msg) - self.msg = msg - - -class MaxDownloadsReached(YoutubeDLError): - """ --max-downloads limit has been reached. """ - pass - - -class UnavailableVideoError(YoutubeDLError): - """Unavailable Format exception. - - This exception will be thrown when a video is requested - in a format that is not available for that video. - """ - pass - - -class ContentTooShortError(YoutubeDLError): - """Content Too Short exception. - - This exception may be raised by FileDownloader objects when a file they - download is too small for what the server announced first, indicating - the connection was probably interrupted. - """ - - def __init__(self, downloaded, expected): - super(ContentTooShortError, self).__init__( - 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) - ) - # Both in bytes - self.downloaded = downloaded - self.expected = expected - - -class XAttrMetadataError(YoutubeDLError): - def __init__(self, code=None, msg='Unknown error'): - super(XAttrMetadataError, self).__init__(msg) - self.code = code - self.msg = msg - - # Parsing code and msg - if (self.code in (errno.ENOSPC, errno.EDQUOT) - or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg): - self.reason = 'NO_SPACE' - elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: - self.reason = 'VALUE_TOO_LONG' - else: - self.reason = 'NOT_SUPPORTED' - - -class XAttrUnavailableError(YoutubeDLError): - pass - - -def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): - # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting - # expected HTTP responses to meet HTTP/1.0 or later (see also - # https://github.com/ytdl-org/youtube-dl/issues/6727) - if sys.version_info < (3, 0): - kwargs['strict'] = True - hc = http_class(*args, **compat_kwargs(kwargs)) - source_address = ydl_handler._params.get('source_address') - - if source_address is not None: - # This is to workaround _create_connection() from socket where it will try all - # address data from getaddrinfo() including IPv6. This filters the result from - # getaddrinfo() based on the source_address value. - # This is based on the cpython socket.create_connection() function. - # https://github.com/python/cpython/blob/master/Lib/socket.py#L691 - def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): - host, port = address - err = None - addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) - af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6 - ip_addrs = [addr for addr in addrs if addr[0] == af] - if addrs and not ip_addrs: - ip_version = 'v4' if af == socket.AF_INET else 'v6' - raise socket.error( - "No remote IP%s addresses available for connect, can't use '%s' as source address" - % (ip_version, source_address[0])) - for res in ip_addrs: - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket.socket(af, socktype, proto) - if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: - sock.settimeout(timeout) - sock.bind(source_address) - sock.connect(sa) - err = None # Explicitly break reference cycle - return sock - except socket.error as _: - err = _ - if sock is not None: - sock.close() - if err is not None: - raise err - else: - raise socket.error('getaddrinfo returns an empty list') - if hasattr(hc, '_create_connection'): - hc._create_connection = _create_connection - sa = (source_address, 0) - if hasattr(hc, 'source_address'): # Python 2.7+ - hc.source_address = sa - else: # Python 2.6 - def _hc_connect(self, *args, **kwargs): - sock = _create_connection( - (self.host, self.port), self.timeout, sa) - if is_https: - self.sock = ssl.wrap_socket( - sock, self.key_file, self.cert_file, - ssl_version=ssl.PROTOCOL_TLSv1) - else: - self.sock = sock - hc.connect = functools.partial(_hc_connect, hc) - - return hc - - -def handle_youtubedl_headers(headers): - filtered_headers = headers - - if 'Youtubedl-no-compression' in filtered_headers: - filtered_headers = filter_dict(filtered_headers, cndn=lambda k, _: k.lower() != 'accept-encoding') - del filtered_headers['Youtubedl-no-compression'] - - return filtered_headers - - -class YoutubeDLHandler(compat_urllib_request.HTTPHandler): - """Handler for HTTP requests and responses. - - This class, when installed with an OpenerDirector, automatically adds - the standard headers to every HTTP request and handles gzipped and - deflated responses from web servers. If compression is to be avoided in - a particular request, the original request in the program code only has - to include the HTTP header "Youtubedl-no-compression", which will be - removed before making the real request. - - Part of this code was copied from: - - http://techknack.net/python-urllib2-handlers/, archived at - https://web.archive.org/web/20130527205558/http://techknack.net/python-urllib2-handlers/ - - Andrew Rowls, the author of that code, agreed to release it to the - public domain. - """ - - def __init__(self, params, *args, **kwargs): - compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs) - self._params = params - - def http_open(self, req): - conn_class = compat_http_client.HTTPConnection - - socks_proxy = req.headers.get('Ytdl-socks-proxy') - if socks_proxy: - conn_class = make_socks_conn_class(conn_class, socks_proxy) - del req.headers['Ytdl-socks-proxy'] - - return self.do_open(functools.partial( - _create_http_connection, self, conn_class, False), - req) - - @staticmethod - def deflate_gz(data): - try: - # format:zlib,gzip + windowsize:32768 - return data and zlib.decompress(data, 32 + zlib.MAX_WBITS) - except zlib.error: - # raw zlib * windowsize:32768 (RFC 9110: "non-conformant") - return zlib.decompress(data, -zlib.MAX_WBITS) - - @staticmethod - def gzip(data): - - from gzip import GzipFile - - def _gzip(data): - with io.BytesIO(data) as data_buf: - gz = GzipFile(fileobj=data_buf, mode='rb') - return gz.read() - - try: - return _gzip(data) - except IOError as original_ioerror: - # There may be junk at the end of the file - # See http://stackoverflow.com/q/4928560/35070 for details - for i in range(1, 1024): - try: - return _gzip(data[:-i]) - except IOError: - continue - else: - raise original_ioerror - - @staticmethod - def brotli(data): - return data and brotli.decompress(data) - - @staticmethod - def compress(data): - return data and ncompress.decompress(data) - - @staticmethod - def _fix_path(url): - # an embedded /../ or /./ sequence is not automatically handled by urllib2 - # see https://github.com/yt-dlp/yt-dlp/issues/3355 - parsed_url = compat_urllib_parse.urlsplit(url) - path = parsed_url.path - if not path.endswith('/'): - path += '/' - parts = path.partition('/./') - if not parts[1]: - parts = path.partition('/../') - if parts[1]: - path = compat_urllib_parse.urljoin( - parts[0] + parts[1][:1], - parts[1][1:] + (parts[2] if parsed_url.path.endswith('/') else parts[2][:-1])) - url = parsed_url._replace(path=path).geturl() - if '/.' in url: - # worse, URL path may have initial /../ against RFCs: work-around - # by stripping such prefixes, like eg Firefox - path = parsed_url.path + '/' - while path.startswith('/.'): - if path.startswith('/../'): - path = path[3:] - elif path.startswith('/./'): - path = path[2:] - else: - break - path = path[:-1] - if not path.startswith('/') and parsed_url.path.startswith('/'): - path = '/' + path - url = parsed_url._replace(path=path).geturl() - return url - - def http_request(self, req): - url = req.get_full_url() - # resolve embedded . and .. - url_fixed = self._fix_path(url) - # According to RFC 3986, URLs can not contain non-ASCII characters; however this is not - # always respected by websites: some tend to give out URLs with non percent-encoded - # non-ASCII characters (see telemb.py, ard.py [#3412]) - # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) - # To work around aforementioned issue we will replace request's original URL with - # percent-encoded one - # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) - # the code of this workaround has been moved here from YoutubeDL.urlopen() - url_escaped = escape_url(url_fixed) - - # Substitute URL if any change after escaping - if url != url_escaped: - req = update_Request(req, url=url_escaped) - - for h, v in std_headers.items(): - # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 - # The dict keys are capitalized because of this bug by urllib - if h.capitalize() not in req.headers: - req.add_header(h, v) - - # Similarly, 'Accept-encoding' - if 'Accept-encoding' not in req.headers: - req.add_header( - 'Accept-Encoding', join_nonempty( - 'gzip', 'deflate', brotli and 'br', ncompress and 'compress', - delim=', ')) - - req.headers = handle_youtubedl_headers(req.headers) - - if sys.version_info < (2, 7): - # avoid possible race where __r_type may be unset - req.get_type() - if '#' in req.get_full_url(): - # Python 2.6 is brain-dead when it comes to fragments - req._Request__original = req._Request__original.partition('#')[0] - req._Request__r_type = req._Request__r_type.partition('#')[0] - - # Use the totally undocumented AbstractHTTPHandler per - # https://github.com/yt-dlp/yt-dlp/pull/4158 - return compat_urllib_request.AbstractHTTPHandler.do_request_(self, req) - - def http_response(self, req, resp): - old_resp = resp - - # Content-Encoding header lists the encodings in order that they were applied [1]. - # To decompress, we simply do the reverse. - # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding - decoded_response = None - decoders = { - 'gzip': self.deflate_gz, - 'deflate': self.deflate_gz, - } - if brotli: - decoders['br'] = self.brotli - if ncompress: - decoders['compress'] = self.compress - if sys.platform.startswith('java'): - # Jython zlib implementation misses gzip - decoders['gzip'] = self.gzip - - def encodings(hdrs): - # A header field that allows multiple values can have multiple instances [2]. - # [2]: https://datatracker.ietf.org/doc/html/rfc9110#name-fields - for e in reversed(','.join(hdrs).split(',')): - if e: - yield e.strip() - - encodings_left = [] - try: - resp.headers.get_all - hdrs = resp.headers - except AttributeError: - # Py2 has no get_all() method: headers are rfc822.Message - from email.message import Message - hdrs = Message() - for k, v in resp.headers.items(): - hdrs[k] = v - - decoder, decoded_response = True, None - for encoding in encodings(hdrs.get_all('Content-Encoding', [])): - # "SHOULD consider" x-compress, x-gzip as compress, gzip - decoder = decoder and decoders.get(remove_start(encoding, 'x-')) - if not decoder: - encodings_left.insert(0, encoding) - continue - decoded_response = decoder(decoded_response or resp.read()) - if decoded_response is not None: - resp = compat_urllib_request.addinfourl( - io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code) - resp.msg = old_resp.msg - del resp.headers['Content-Length'] - resp.headers['Content-Length'] = '%d' % len(decoded_response) - del resp.headers['Content-Encoding'] - if encodings_left: - resp.headers['Content-Encoding'] = ', '.join(encodings_left) - - # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see - # https://github.com/ytdl-org/youtube-dl/issues/6457). - if 300 <= resp.code < 400: - location = resp.headers.get('Location') - if location: - # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 - if sys.version_info >= (3, 0): - location = location.encode('iso-8859-1') - location = location.decode('utf-8') - # resolve embedded . and .. - location_fixed = self._fix_path(location) - location_escaped = escape_url(location_fixed) - if location != location_escaped: - del resp.headers['Location'] - if not isinstance(location_escaped, str): # Py 2 case - location_escaped = location_escaped.encode('utf-8') - resp.headers['Location'] = location_escaped - return resp - - https_request = http_request - https_response = http_response - - -def make_socks_conn_class(base_class, socks_proxy): - assert issubclass(base_class, ( - compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) - - url_components = compat_urllib_parse.urlparse(socks_proxy) - if url_components.scheme.lower() == 'socks5': - socks_type = ProxyType.SOCKS5 - elif url_components.scheme.lower() in ('socks', 'socks4'): - socks_type = ProxyType.SOCKS4 - elif url_components.scheme.lower() == 'socks4a': - socks_type = ProxyType.SOCKS4A - - def unquote_if_non_empty(s): - if not s: - return s - return compat_urllib_parse_unquote_plus(s) - - proxy_args = ( - socks_type, - url_components.hostname, url_components.port or 1080, - True, # Remote DNS - unquote_if_non_empty(url_components.username), - unquote_if_non_empty(url_components.password), - ) - - class SocksConnection(base_class): - def connect(self): - self.sock = sockssocket() - self.sock.setproxy(*proxy_args) - if type(self.timeout) in (int, float): - self.sock.settimeout(self.timeout) - self.sock.connect((self.host, self.port)) - - if isinstance(self, compat_http_client.HTTPSConnection): - if hasattr(self, '_context'): # Python > 2.6 - self.sock = self._context.wrap_socket( - self.sock, server_hostname=self.host) - else: - self.sock = ssl.wrap_socket(self.sock) - - return SocksConnection - - -class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): - def __init__(self, params, https_conn_class=None, *args, **kwargs): - compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) - self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection - self._params = params - - def https_open(self, req): - kwargs = {} - conn_class = self._https_conn_class - - if hasattr(self, '_context'): # python > 2.6 - kwargs['context'] = self._context - if hasattr(self, '_check_hostname'): # python 3.x - kwargs['check_hostname'] = self._check_hostname - - socks_proxy = req.headers.get('Ytdl-socks-proxy') - if socks_proxy: - conn_class = make_socks_conn_class(conn_class, socks_proxy) - del req.headers['Ytdl-socks-proxy'] - - return self.do_open(functools.partial( - _create_http_connection, self, conn_class, True), - req, **kwargs) - - -class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): - """ - See [1] for cookie file format. - - 1. https://curl.haxx.se/docs/http-cookies.html - """ - _HTTPONLY_PREFIX = '#HttpOnly_' - _ENTRY_LEN = 7 - _HEADER = '''# Netscape HTTP Cookie File -# This file is generated by youtube-dl. Do not edit. - -''' - _CookieFileEntry = collections.namedtuple( - 'CookieFileEntry', - ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """ - Save cookies to a file. - - Most of the code is taken from CPython 3.8 and slightly adapted - to support cookie files with UTF-8 in both python 2 and 3. - """ - if filename is None: - if self.filename is not None: - filename = self.filename - else: - raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) - - # Store session cookies with `expires` set to 0 instead of an empty - # string - for cookie in self: - if cookie.expires is None: - cookie.expires = 0 - - with io.open(filename, 'w', encoding='utf-8') as f: - f.write(self._HEADER) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - if cookie.secure: - secure = 'TRUE' - else: - secure = 'FALSE' - if cookie.domain.startswith('.'): - initial_dot = 'TRUE' - else: - initial_dot = 'FALSE' - if cookie.expires is not None: - expires = compat_str(cookie.expires) - else: - expires = '' - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = '' - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - '\t'.join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value]) + '\n') - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file.""" - if filename is None: - if self.filename is not None: - filename = self.filename - else: - raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) - - def prepare_line(line): - if line.startswith(self._HTTPONLY_PREFIX): - line = line[len(self._HTTPONLY_PREFIX):] - # comments and empty lines are fine - if line.startswith('#') or not line.strip(): - return line - cookie_list = line.split('\t') - if len(cookie_list) != self._ENTRY_LEN: - raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list)) - cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): - raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) - return line - - cf = io.StringIO() - with io.open(filename, encoding='utf-8') as f: - for line in f: - try: - cf.write(prepare_line(line)) - except compat_cookiejar.LoadError as e: - write_string( - 'WARNING: skipping cookie file entry due to %s: %r\n' - % (e, line), sys.stderr) - continue - cf.seek(0) - self._really_load(cf, filename, ignore_discard, ignore_expires) - # Session cookies are denoted by either `expires` field set to - # an empty string or 0. MozillaCookieJar only recognizes the former - # (see [1]). So we need force the latter to be recognized as session - # cookies on our own. - # Session cookies may be important for cookies-based authentication, - # e.g. usually, when user does not check 'Remember me' check box while - # logging in on a site, some important cookies are stored as session - # cookies so that not recognizing them will result in failed login. - # 1. https://bugs.python.org/issue17164 - for cookie in self: - # Treat `expires=0` cookies as session cookies - if cookie.expires == 0: - cookie.expires = None - cookie.discard = True - - def get_cookie_header(self, url): - """Generate a Cookie HTTP header for a given url""" - cookie_req = sanitized_Request(url) - self.add_cookie_header(cookie_req) - return cookie_req.get_header('Cookie') - - def get_cookies_for_url(self, url): - """Generate a list of Cookie objects for a given url""" - # Policy `_now` attribute must be set before calling `_cookies_for_request` - # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360 - self._policy._now = self._now = int(time.time()) - return self._cookies_for_request(sanitized_Request(url)) - - -class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): - def __init__(self, cookiejar=None): - compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) - - def http_response(self, request, response): - # Python 2 will choke on next HTTP request in row if there are non-ASCII - # characters in Set-Cookie HTTP header of last response (see - # https://github.com/ytdl-org/youtube-dl/issues/6769). - # In order to at least prevent crashing we will percent encode Set-Cookie - # header before HTTPCookieProcessor starts processing it. - # if sys.version_info < (3, 0) and response.headers: - # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): - # set_cookie = response.headers.get(set_cookie_header) - # if set_cookie: - # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") - # if set_cookie != set_cookie_escaped: - # del response.headers[set_cookie_header] - # response.headers[set_cookie_header] = set_cookie_escaped - return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response) - - https_request = compat_urllib_request.HTTPCookieProcessor.http_request - https_response = http_response - - -class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): - """YoutubeDL redirect handler - - The code is based on HTTPRedirectHandler implementation from CPython [1]. - - This redirect handler fixes and improves the logic to better align with RFC7261 - and what browsers tend to do [2][3] - - 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py - 2. https://datatracker.ietf.org/doc/html/rfc7231 - 3. https://github.com/python/cpython/issues/91306 - """ - - # Supply possibly missing alias - http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 - - def redirect_request(self, req, fp, code, msg, headers, newurl): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a - redirection response is received. If a redirection should - take place, return a new Request to allow http_error_30x to - perform the redirect. Otherwise, raise HTTPError if no-one - else should try to handle this url. Return None if you can't - but another Handler might. - """ - if code not in (301, 302, 303, 307, 308): - raise compat_urllib_HTTPError(req.full_url, code, msg, headers, fp) - - new_method = req.get_method() - new_data = req.data - - # On python 2 urlh.geturl() may sometimes return redirect URL - # as a byte string instead of unicode. This workaround forces - # it to return unicode. - newurl = _decode_compat_str(newurl) - - # Be conciliant with URIs containing a space. This is mainly - # redundant with the more complete encoding done in http_error_302(), - # but it is kept for compatibility with other callers. - newurl = newurl.replace(' ', '%20') - - # Technically the Cookie header should be in unredirected_hdrs; - # however in practice some may set it in normal headers anyway. - # We will remove it here to prevent any leaks. - remove_headers = ['Cookie'] - - # A 303 must either use GET or HEAD for subsequent request - # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 - if code == 303 and req.get_method() != 'HEAD': - new_method = 'GET' - # 301 and 302 redirects are commonly turned into a GET from a POST - # for subsequent requests by browsers, so we'll do the same. - # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 - # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 - elif code in (301, 302) and req.get_method() == 'POST': - new_method = 'GET' - - # only remove payload if method changed (e.g. POST to GET) - if new_method != req.get_method(): - new_data = None - remove_headers.extend(['Content-Length', 'Content-Type']) - - new_headers = filter_dict(req.headers, cndn=lambda k, _: k.title() not in remove_headers) - - return compat_urllib_request.Request( - newurl, headers=new_headers, origin_req_host=req.origin_req_host, - unverifiable=True, method=new_method, data=new_data) - - -def extract_timezone(date_str): - m = re.search( - r'''(?x) - ^.{8,}? # >=8 char non-TZ prefix, if present - (?PZ| # just the UTC Z, or - (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or - (?= 4 alpha or 2 digits - [ ]? # optional space - (?P\+|-) # +/- - (?P[0-9]{2}):?(?P[0-9]{2}) # hh[:]mm - $) - ''', date_str) - if not m: - m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P\s*[A-Z]+)$', date_str) - timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip()) - if timezone is not None: - date_str = date_str[:-len(m.group('tz'))] - timezone = datetime.timedelta(hours=timezone or 0) - else: - date_str = date_str[:-len(m.group('tz'))] - if not m.group('sign'): - timezone = datetime.timedelta() - else: - sign = 1 if m.group('sign') == '+' else -1 - timezone = datetime.timedelta( - hours=sign * int(m.group('hours')), - minutes=sign * int(m.group('minutes'))) - return timezone, date_str - - -def parse_iso8601(date_str, delimiter='T', timezone=None): - """ Return a UNIX timestamp from the given date """ - - if date_str is None: - return None - - date_str = re.sub(r'\.[0-9]+', '', date_str) - - if timezone is None: - timezone, date_str = extract_timezone(date_str) - - try: - date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) - dt = datetime.datetime.strptime(date_str, date_format) - timezone - return calendar.timegm(dt.timetuple()) - except ValueError: - pass - - -def date_formats(day_first=True): - return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST - - -def unified_strdate(date_str, day_first=True): - """Return a string with the date in the format YYYYMMDD""" - - if date_str is None: - return None - upload_date = None - # Replace commas - date_str = date_str.replace(',', ' ') - # Remove AM/PM + timezone - date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) - _, date_str = extract_timezone(date_str) - - for expression in date_formats(day_first): - try: - upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') - except ValueError: - pass - if upload_date is None: - timetuple = email.utils.parsedate_tz(date_str) - if timetuple: - try: - upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') - except ValueError: - pass - if upload_date is not None: - return compat_str(upload_date) - - -def unified_timestamp(date_str, day_first=True): - if date_str is None: - return None - - date_str = re.sub(r'\s+', ' ', re.sub( - r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str)) - - pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 - timezone, date_str = extract_timezone(date_str) - - # Remove AM/PM + timezone - date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) - - # Remove unrecognized timezones from ISO 8601 alike timestamps - m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P\s*[A-Z]+)$', date_str) - if m: - date_str = date_str[:-len(m.group('tz'))] - - # Python only supports microseconds, so remove nanoseconds - m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str) - if m: - date_str = m.group(1) - - for expression in date_formats(day_first): - try: - dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) - return calendar.timegm(dt.timetuple()) - except ValueError: - pass - timetuple = email.utils.parsedate_tz(date_str) - if timetuple: - return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) - - -def determine_ext(url, default_ext='unknown_video'): - if url is None or '.' not in url: - return default_ext - guess = url.partition('?')[0].rpartition('.')[2] - if re.match(r'^[A-Za-z0-9]+$', guess): - return guess - # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download - elif guess.rstrip('/') in KNOWN_EXTENSIONS: - return guess.rstrip('/') - else: - return default_ext - - -def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): - return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext) - - -def date_from_str(date_str): - """ - Return a datetime object from a string in the format YYYYMMDD or - (now|today)[+-][0-9](day|week|month|year)(s)?""" - today = datetime.date.today() - if date_str in ('now', 'today'): - return today - if date_str == 'yesterday': - return today - datetime.timedelta(days=1) - match = re.match(r'(now|today)(?P[+-])(?P