2022-11-09 18:01:13 +00:00
|
|
|
import jellyfish
|
2022-11-10 09:01:26 +00:00
|
|
|
import string
|
2022-11-09 18:01:13 +00:00
|
|
|
|
2022-11-23 22:44:25 +00:00
|
|
|
TITLE_THRESHOLD_LEVENSHTEIN = 1
|
2022-11-10 09:01:26 +00:00
|
|
|
UNIFY_TO = " "
|
|
|
|
|
2022-11-29 13:49:56 +00:00
|
|
|
ALLOWED_LENGTH_DISTANCE = 20
|
|
|
|
|
2022-11-10 09:01:26 +00:00
|
|
|
|
|
|
|
def unify_punctuation(to_unify: str) -> str:
|
|
|
|
for char in string.punctuation:
|
|
|
|
to_unify = to_unify.replace(char, UNIFY_TO)
|
|
|
|
return to_unify
|
2022-11-09 18:01:13 +00:00
|
|
|
|
|
|
|
|
2022-11-13 15:40:44 +00:00
|
|
|
def remove_feature_part_from_track(title: str) -> str:
|
|
|
|
if ")" != title[-1]:
|
|
|
|
return title
|
|
|
|
if "(" not in title:
|
|
|
|
return title
|
|
|
|
|
|
|
|
return title[:title.index("(")]
|
|
|
|
|
|
|
|
|
|
|
|
def modify_title(to_modify: str) -> str:
|
|
|
|
to_modify = to_modify.strip()
|
|
|
|
to_modify = to_modify.lower()
|
|
|
|
to_modify = remove_feature_part_from_track(to_modify)
|
|
|
|
to_modify = unify_punctuation(to_modify)
|
|
|
|
return to_modify
|
|
|
|
|
|
|
|
|
2022-11-14 14:44:32 +00:00
|
|
|
def match_titles(title_1: str, title_2: str):
|
2022-11-13 15:40:44 +00:00
|
|
|
title_1, title_2 = modify_title(title_1), modify_title(title_2)
|
2022-11-09 18:01:13 +00:00
|
|
|
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
|
|
|
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
|
|
|
|
|
|
|
|
2022-11-14 14:44:32 +00:00
|
|
|
def match_artists(artist_1, artist_2: str):
|
2022-11-09 18:01:13 +00:00
|
|
|
if type(artist_1) == list:
|
|
|
|
distances = []
|
|
|
|
|
|
|
|
for artist_1_ in artist_1:
|
|
|
|
match, distance = match_titles(artist_1_, artist_2)
|
|
|
|
if not match:
|
|
|
|
return match, distance
|
|
|
|
|
|
|
|
distances.append(distance)
|
|
|
|
return True, min(distances)
|
|
|
|
return match_titles(artist_1, artist_2)
|
2022-11-29 13:49:56 +00:00
|
|
|
|
2022-11-29 15:55:13 +00:00
|
|
|
def match_length(length_1: int | None, length_2: int | None) -> bool:
|
|
|
|
# returning true if either one is Null, because if one value is not known,
|
|
|
|
# then it shouldn't be an attribute which could reject an audio source
|
|
|
|
if length_1 is None or length_2 is None:
|
|
|
|
return True
|
2022-11-29 13:49:56 +00:00
|
|
|
return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE
|