diff --git a/.vscode/settings.json b/.vscode/settings.json index 3f0bf4f..d33da75 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -16,6 +16,7 @@ }, "python.formatting.provider": "none", "cSpell.words": [ - "Bandcamp" + "Bandcamp", + "levenshtein" ] } \ No newline at end of file diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py index be53252..36541fe 100644 --- a/music_kraken/cli/main_downloader.py +++ b/music_kraken/cli/main_downloader.py @@ -7,7 +7,7 @@ from .utils import cli_function from .options.first_config import initial_config from ..utils.config import write_config, main_settings -from ..utils.regex import URL_PATTERN +from ..utils.shared import URL_PATTERN from ..utils.string_processing import fit_to_file_system from ..utils.support_classes.query import Query from ..utils.support_classes.download_result import DownloadResult diff --git a/music_kraken/utils/phonetic_compares.py b/music_kraken/utils/phonetic_compares.py deleted file mode 100644 index 65f5deb..0000000 --- a/music_kraken/utils/phonetic_compares.py +++ /dev/null @@ -1,57 +0,0 @@ -import jellyfish -import string - -TITLE_THRESHOLD_LEVENSHTEIN = 1 -UNIFY_TO = " " - -ALLOWED_LENGTH_DISTANCE = 20 - - -def unify_punctuation(to_unify: str) -> str: - for char in string.punctuation: - to_unify = to_unify.replace(char, UNIFY_TO) - return to_unify - - -def remove_feature_part_from_track(title: str) -> str: - if ")" != title[-1]: - return title - if "(" not in title: - return title - - return title[:title.index("(")] - - -def modify_title(to_modify: str) -> str: - to_modify = to_modify.strip() - to_modify = to_modify.lower() - to_modify = remove_feature_part_from_track(to_modify) - to_modify = unify_punctuation(to_modify) - return to_modify - - -def match_titles(title_1: str, title_2: str): - title_1, title_2 = modify_title(title_1), modify_title(title_2) - distance = jellyfish.levenshtein_distance(title_1, title_2) - return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance - - -def match_artists(artist_1, artist_2: str): - if type(artist_1) == list: - distances = [] - - for artist_1_ in artist_1: - match, distance = match_titles(artist_1_, artist_2) - if not match: - return match, distance - - distances.append(distance) - return True, min(distances) - return match_titles(artist_1, artist_2) - -def match_length(length_1: int | None, length_2: int | None) -> bool: - # returning true if either one is Null, because if one value is not known, - # then it shouldn't be an attribute which could reject an audio source - if length_1 is None or length_2 is None: - return True - return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE diff --git a/music_kraken/utils/regex.py b/music_kraken/utils/regex.py deleted file mode 100644 index d8f58f5..0000000 --- a/music_kraken/utils/regex.py +++ /dev/null @@ -1,3 +0,0 @@ -URL_PATTERN = r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+" -INT_PATTERN = r"^\d*$" -FLOAT_PATTERN = r"^[\d|\,|\.]*$" diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 31cadec..d7be692 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -3,7 +3,7 @@ import random from .path_manager import LOCATIONS from .config import main_settings -DEBUG = True +DEBUG = False DEBUG_LOGGING = DEBUG and True DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False @@ -32,3 +32,8 @@ to download: > d: https://musify.club/release/some-random-release-183028492 have fun :3""".strip() + +# regex pattern +URL_PATTERN = r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+" +INT_PATTERN = r"^\d*$" +FLOAT_PATTERN = r"^[\d|\,|\.]*$" diff --git a/music_kraken/utils/string_processing.py b/music_kraken/utils/string_processing.py index 3c1cedc..39963e9 100644 --- a/music_kraken/utils/string_processing.py +++ b/music_kraken/utils/string_processing.py @@ -1,5 +1,6 @@ from typing import Tuple, Union from pathlib import Path +import string from transliterate.exceptions import LanguageDetectionError from transliterate import translit @@ -82,3 +83,60 @@ def comment(uncommented_string: str) -> str: _fragments = ["# " + frag for frag in _fragments] return "\n".join(_fragments) + +# comparisons +TITLE_THRESHOLD_LEVENSHTEIN = 1 +UNIFY_TO = " " + +ALLOWED_LENGTH_DISTANCE = 20 + + +def unify_punctuation(to_unify: str) -> str: + for char in string.punctuation: + to_unify = to_unify.replace(char, UNIFY_TO) + return to_unify + + +def remove_feature_part_from_track(title: str) -> str: + if ")" != title[-1]: + return title + if "(" not in title: + return title + + return title[:title.index("(")] + + +def modify_title(to_modify: str) -> str: + to_modify = to_modify.strip() + to_modify = to_modify.lower() + to_modify = remove_feature_part_from_track(to_modify) + to_modify = unify_punctuation(to_modify) + return to_modify + + +def match_titles(title_1: str, title_2: str): + title_1, title_2 = modify_title(title_1), modify_title(title_2) + distance = jellyfish.levenshtein_distance(title_1, title_2) + return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance + + +def match_artists(artist_1, artist_2: str): + if type(artist_1) == list: + distances = [] + + for artist_1_ in artist_1: + match, distance = match_titles(artist_1_, artist_2) + if not match: + return match, distance + + distances.append(distance) + return True, min(distances) + return match_titles(artist_1, artist_2) + +def match_length(length_1: int | None, length_2: int | None) -> bool: + # returning true if either one is Null, because if one value is not known, + # then it shouldn't be an attribute which could reject an audio source + if length_1 is None or length_2 is None: + return True + return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE + diff --git a/music_kraken/utils/support_classes/__init__.py b/music_kraken/utils/support_classes/__init__.py index 84edf54..e69de29 100644 --- a/music_kraken/utils/support_classes/__init__.py +++ b/music_kraken/utils/support_classes/__init__.py @@ -1 +0,0 @@ -from .thread_classes import EndThread, FinishedSearch diff --git a/music_kraken/utils/support_classes/hacking.py b/music_kraken/utils/support_classes/hacking.py deleted file mode 100644 index 1125768..0000000 --- a/music_kraken/utils/support_classes/hacking.py +++ /dev/null @@ -1,104 +0,0 @@ -import weakref -from types import FunctionType -from functools import wraps - -from typing import Dict, Set - -class Lake: - def __init__(self): - self.redirects: Dict[int, int] = {} - self.id_to_object: Dict[int, object] = {} - - def get_real_object(self, db_object: object) -> object: - _id = id(db_object) - while _id in self.redirects: - _id = self.redirects[_id] - - try: - return self.id_to_object[_id] - except KeyError: - self.add(db_object) - return db_object - - def add(self, db_object: object): - self.id_to_object[id(db_object)] = db_object - - def override(self, to_override: object, new_db_object: object): - _id = id(to_override) - while _id in self.redirects: - _id = self.redirects[_id] - - if id(new_db_object) in self.id_to_object: - print("!!!!!") - - self.add(new_db_object) - self.redirects[_id] = id(new_db_object) - # if _id in self.id_to_object: - # del self.id_to_object[_id] - - def is_same(self, __object: object, other: object) -> bool: - _self_id = id(__object) - while _self_id in self.redirects: - _self_id = self.redirects[_self_id] - - _other_id = id(other) - while _other_id in self.redirects: - _other_id = self.redirects[_other_id] - - return _self_id == _other_id - - -lake = Lake() - - -def wrapper(method): - @wraps(method) - def wrapped(*args, **kwargs): - return method(*(lake.get_real_object(args[0]), *args[1:]), **kwargs) - - return wrapped - - -class BaseClass: - def __new__(cls, *args, **kwargs): - instance = cls(*args, **kwargs) - print("new") - lake.add(instance) - return instance - - def __eq__(self, other): - return lake.is_same(self, other) - - def _risky_merge(self, to_replace): - lake.override(to_replace, self) - - -class MetaClass(type): - def __new__(meta, classname, bases, classDict): - bases = (*bases, BaseClass) - newClassDict = {} - - ignore_functions: Set[str] = {"__new__", "__init__"} - - for attributeName, attribute in classDict.items(): - if isinstance(attribute, FunctionType) and (attributeName not in ignore_functions): - """ - The funktion new and init shouldn't be accounted for because we can assume the class is - independent on initialization. - """ - attribute = wrapper(attribute) - - newClassDict[attributeName] = attribute - - print() - - for key, value in object.__dict__.items(): - # hasattr( value, '__call__' ) and - if hasattr(value, '__call__') and value not in newClassDict and key not in ("__new__", "__init__"): - newClassDict[key] = wrapper(value) - - new_instance = type.__new__(meta, classname, bases, newClassDict) - - lake.add(new_instance) - - return new_instance diff --git a/music_kraken/utils/support_classes/thread_classes.py b/music_kraken/utils/support_classes/thread_classes.py deleted file mode 100644 index 1a17e57..0000000 --- a/music_kraken/utils/support_classes/thread_classes.py +++ /dev/null @@ -1,12 +0,0 @@ -class EndThread: - _has_ended: bool = False - - def __bool__(self): - return self._has_ended - - def exit(self): - self._has_ended - -class FinishedSearch: - pass - \ No newline at end of file