feat: improved the cleaning song title function to remove redundand brackets
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-04-19 12:02:54 +02:00
parent 1e62d371cd
commit 06acf22abb

View File

@ -11,6 +11,9 @@ from pathvalidate import sanitize_filename
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)",
)
OPEN_BRACKETS = "(["
CLOSE_BRACKETS = ")]"
DISALLOWED_SUBSTRING_IN_BRACKETS = ("official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat.")
@lru_cache
def unify(string: str) -> str:
@ -71,6 +74,26 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) ->
if raw_song_title.lower().endswith(dirty_appendix):
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
# remove brackets and their content if they contain disallowed substrings
for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS):
start = 0
while True:
try:
open_bracket_index = raw_song_title.index(open_bracket, start)
except ValueError:
break
try:
close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1)
except ValueError:
break
substring = raw_song_title[open_bracket_index + 1:close_bracket_index]
if any(disallowed_substring in substring for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS):
raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:]
else:
start = close_bracket_index + 1
# everything that requires the artist name
if artist_name is not None:
artist_name = artist_name.strip()