feat: improved the cleaning song title function to remove redundand brackets
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
@@ -11,6 +11,9 @@ from pathvalidate import sanitize_filename
|
||||
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
|
||||
"(official video)",
|
||||
)
|
||||
OPEN_BRACKETS = "(["
|
||||
CLOSE_BRACKETS = ")]"
|
||||
DISALLOWED_SUBSTRING_IN_BRACKETS = ("official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat.")
|
||||
|
||||
@lru_cache
|
||||
def unify(string: str) -> str:
|
||||
@@ -71,6 +74,26 @@ def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) ->
|
||||
if raw_song_title.lower().endswith(dirty_appendix):
|
||||
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
|
||||
|
||||
# remove brackets and their content if they contain disallowed substrings
|
||||
for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS):
|
||||
start = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
open_bracket_index = raw_song_title.index(open_bracket, start)
|
||||
except ValueError:
|
||||
break
|
||||
try:
|
||||
close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1)
|
||||
except ValueError:
|
||||
break
|
||||
|
||||
substring = raw_song_title[open_bracket_index + 1:close_bracket_index]
|
||||
if any(disallowed_substring in substring for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS):
|
||||
raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:]
|
||||
else:
|
||||
start = close_bracket_index + 1
|
||||
|
||||
# everything that requires the artist name
|
||||
if artist_name is not None:
|
||||
artist_name = artist_name.strip()
|
||||
|
Reference in New Issue
Block a user