music-kraken-core/music_kraken/utils/string_processing.py

from typing import Tuple, Union
from pathlib import Path

from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
from pathvalidate import sanitize_filename


COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
    "(official video)",
)


def unify(string: str) -> str:
    """
    returns a unified str, to make comparisons easy.
    a unified string has the following attributes:
     - is lowercase
    """

    try:
        string = translit(string, reversed=True)
    except LanguageDetectionError:
        pass

    return string.lower()


def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
    def fit_string(string: str) -> str:
        if string == "/":
            return "/"
        string = string.strip()

        while string[0] == ".":
            if len(string) == 0:
                return string

            string = string[1:]

        string = string.replace("/", "_").replace("\\", "_")
        string = sanitize_filename(string)
        return string

    if isinstance(string, Path):
        return Path(*(fit_string(part) for part in string.parts))
    else:
        return fit_string(string)


def clean_song_title(raw_song_title: str, artist_name: str) -> str:
    """
    This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
    
    cleans:

    - `artist - song` -> `song`
    - `song (Official Video)` -> `song`
    - ` song` -> `song`
    - `song (prod. some producer)`
    """
    raw_song_title = raw_song_title.strip()
    artist_name = artist_name.strip()

    # Clean official Video appendix
    for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
        if raw_song_title.lower().endswith(dirty_appendix):
            raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()

    # Remove artist from the start of the title
    if raw_song_title.lower().startswith(artist_name.lower()):
        raw_song_title = raw_song_title[len(artist_name):].strip()

        if raw_song_title.startswith("-"):
            raw_song_title = raw_song_title[1:].strip()

    return raw_song_title.strip()

    
def comment(uncommented_string: str) -> str:
    _fragments = uncommented_string.split("\n")
    _fragments = ["# " + frag for frag in _fragments]
    return "\n".join(_fragments)
fix: sanatizing file names correctly 2023-09-13 16:55:04 +00:00			`from typing import Tuple, Union`
			`from pathlib import Path`
clean up dirty song titles 2023-07-27 18:44:24 +00:00
translit 2023-04-18 20:39:19 +00:00			`from transliterate.exceptions import LanguageDetectionError`
			`from transliterate import translit`
fixed file paths 2023-05-10 14:39:44 +00:00			`from pathvalidate import sanitize_filename`

translit 2023-04-18 20:39:19 +00:00
clean up dirty song titles 2023-07-27 18:44:24 +00:00			`COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (`
			`"(official video)",`
			`)`


fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`def unify(string: str) -> str:`
			`"""`
translit 2023-04-18 20:39:19 +00:00			`returns a unified str, to make comparisons easy.`
			`a unified string has the following attributes:`
fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`- is lowercase`
			`"""`
translit 2023-04-18 20:39:19 +00:00
			`try:`
			`string = translit(string, reversed=True)`
			`except LanguageDetectionError:`
			`pass`

fixed mutable argument bug in the source dict of SourceAttribute 2023-01-31 12:18:52 +00:00			`return string.lower()`
escaping strings 2023-04-03 09:17:55 +00:00
translit 2023-04-18 20:39:19 +00:00
fix: sanatizing file names correctly 2023-09-13 16:55:04 +00:00			`def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:`
			`def fit_string(string: str) -> str:`
			`if string == "/":`
			`return "/"`
			`string = string.strip()`
translit 2023-04-18 20:39:19 +00:00
fix: sanatizing file names correctly 2023-09-13 16:55:04 +00:00			`while string[0] == ".":`
			`if len(string) == 0:`
			`return string`
translit 2023-04-18 20:39:19 +00:00
fix: sanatizing file names correctly 2023-09-13 16:55:04 +00:00			`string = string[1:]`
translit 2023-04-18 20:39:19 +00:00
fix: sanatizing file names correctly 2023-09-13 16:55:04 +00:00			`string = string.replace("/", "_").replace("\\", "_")`
			`string = sanitize_filename(string)`
			`return string`
translit 2023-04-18 20:39:19 +00:00
fix: sanatizing file names correctly 2023-09-13 16:55:04 +00:00			`if isinstance(string, Path):`
			`return Path(*(fit_string(part) for part in string.parts))`
			`else:`
			`return fit_string(string)`
clean up dirty song titles 2023-07-27 18:44:24 +00:00

			`def clean_song_title(raw_song_title: str, artist_name: str) -> str:`
			`"""`
			`This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos`

			`cleans:`

			- `artist - song` -> `song`
			- `song (Official Video)` -> `song`
			- ` song` -> `song`
			- `song (prod. some producer)`
			`"""`
			`raw_song_title = raw_song_title.strip()`
			`artist_name = artist_name.strip()`

			`# Clean official Video appendix`
			`for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:`
			`if raw_song_title.lower().endswith(dirty_appendix):`
			`raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()`

			`# Remove artist from the start of the title`
			`if raw_song_title.lower().startswith(artist_name.lower()):`
			`raw_song_title = raw_song_title[len(artist_name):].strip()`

			`if raw_song_title.startswith("-"):`
			`raw_song_title = raw_song_title[1:].strip()`

			`return raw_song_title.strip()`
Merge branch 'youtube_music' into experimental 2023-09-12 09:06:26 +00:00

feat: layed out commit 2023-08-10 21:01:16 +00:00			`def comment(uncommented_string: str) -> str:`
			`_fragments = uncommented_string.split("\n")`
			`_fragments = ["# " + frag for frag in _fragments]`
			`return "\n".join(_fragments)`