music-kraken-core/src/music_kraken/utils/string_processing.py

85 lines
2.3 KiB
Python
Raw Normal View History

2023-10-12 22:11:58 +00:00
from typing import Tuple, Union
from pathlib import Path
2023-07-27 18:44:24 +00:00
2023-04-18 20:39:19 +00:00
from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
2023-05-10 14:39:44 +00:00
from pathvalidate import sanitize_filename
2023-04-18 20:39:19 +00:00
2023-07-27 18:44:24 +00:00
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)",
)
def unify(string: str) -> str:
"""
2023-04-18 20:39:19 +00:00
returns a unified str, to make comparisons easy.
a unified string has the following attributes:
- is lowercase
"""
2023-04-18 20:39:19 +00:00
try:
string = translit(string, reversed=True)
except LanguageDetectionError:
pass
return string.lower()
2023-04-03 09:17:55 +00:00
2023-04-18 20:39:19 +00:00
2023-10-12 22:11:58 +00:00
def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
def fit_string(string: str) -> str:
if string == "/":
return "/"
string = string.strip()
2023-04-18 20:39:19 +00:00
2023-10-12 22:11:58 +00:00
while string[0] == ".":
if len(string) == 0:
return string
2023-04-18 20:39:19 +00:00
2023-10-12 22:11:58 +00:00
string = string[1:]
2023-04-18 20:39:19 +00:00
2023-10-12 22:11:58 +00:00
string = string.replace("/", "_").replace("\\", "_")
string = sanitize_filename(string)
return string
2023-05-10 14:39:44 +00:00
2023-10-12 22:11:58 +00:00
if isinstance(string, Path):
return Path(*(fit_string(part) for part in string.parts))
else:
return fit_string(string)
2023-07-27 18:44:24 +00:00
def clean_song_title(raw_song_title: str, artist_name: str) -> str:
"""
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
cleans:
- `artist - song` -> `song`
- `song (Official Video)` -> `song`
- ` song` -> `song`
- `song (prod. some producer)`
"""
raw_song_title = raw_song_title.strip()
artist_name = artist_name.strip()
# Clean official Video appendix
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
if raw_song_title.lower().endswith(dirty_appendix):
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
return raw_song_title.strip()
2023-08-10 21:01:16 +00:00
def comment(uncommented_string: str) -> str:
_fragments = uncommented_string.split("\n")
_fragments = ["# " + frag for frag in _fragments]
return "\n".join(_fragments)