clean up dirty song titles
This commit is contained in:
parent
28552f702e
commit
fe1751a7f3
@ -19,6 +19,7 @@ from ..objects import (
|
|||||||
ID3Timestamp
|
ID3Timestamp
|
||||||
)
|
)
|
||||||
from ..connection import Connection
|
from ..connection import Connection
|
||||||
|
from ..utils.string_processing import clean_song_title
|
||||||
from ..utils.support_classes import DownloadResult
|
from ..utils.support_classes import DownloadResult
|
||||||
from ..utils.shared import YOUTUBE_LOGGER, INVIDIOUS_INSTANCE, BITRATE, ENABLE_SPONSOR_BLOCK, PIPED_INSTANCE, SLEEP_AFTER_YOUTUBE_403
|
from ..utils.shared import YOUTUBE_LOGGER, INVIDIOUS_INSTANCE, BITRATE, ENABLE_SPONSOR_BLOCK, PIPED_INSTANCE, SLEEP_AFTER_YOUTUBE_403
|
||||||
|
|
||||||
@ -226,6 +227,10 @@ class YouTube(Page):
|
|||||||
))
|
))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# If the song is not a topic song in the beginning, it cleans the title. If it is from a topic channel, it is clean anyways
|
||||||
|
# If cleaned data is returned by the api, it will be overridden in the next step anyways
|
||||||
|
title = clean_song_title(title, _author)
|
||||||
|
|
||||||
for music_track in data.get("musicTracks", []):
|
for music_track in data.get("musicTracks", []):
|
||||||
title = music_track["song"]
|
title = music_track["song"]
|
||||||
license_str = music_track["license"]
|
license_str = music_track["license"]
|
||||||
|
@ -1,9 +1,15 @@
|
|||||||
|
from typing import Tuple
|
||||||
|
|
||||||
from transliterate.exceptions import LanguageDetectionError
|
from transliterate.exceptions import LanguageDetectionError
|
||||||
from transliterate import translit
|
from transliterate import translit
|
||||||
|
|
||||||
from pathvalidate import sanitize_filename
|
from pathvalidate import sanitize_filename
|
||||||
|
|
||||||
|
|
||||||
|
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
|
||||||
|
"(official video)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def unify(string: str) -> str:
|
def unify(string: str) -> str:
|
||||||
"""
|
"""
|
||||||
returns a unified str, to make comparisons easy.
|
returns a unified str, to make comparisons easy.
|
||||||
@ -33,3 +39,32 @@ def fit_to_file_system(string: str) -> str:
|
|||||||
string = sanitize_filename(string)
|
string = sanitize_filename(string)
|
||||||
|
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
|
def clean_song_title(raw_song_title: str, artist_name: str) -> str:
|
||||||
|
"""
|
||||||
|
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
|
||||||
|
|
||||||
|
cleans:
|
||||||
|
|
||||||
|
- `artist - song` -> `song`
|
||||||
|
- `song (Official Video)` -> `song`
|
||||||
|
- ` song` -> `song`
|
||||||
|
- `song (prod. some producer)`
|
||||||
|
"""
|
||||||
|
raw_song_title = raw_song_title.strip()
|
||||||
|
artist_name = artist_name.strip()
|
||||||
|
|
||||||
|
# Clean official Video appendix
|
||||||
|
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
|
||||||
|
if raw_song_title.lower().endswith(dirty_appendix):
|
||||||
|
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
|
||||||
|
|
||||||
|
# Remove artist from the start of the title
|
||||||
|
if raw_song_title.lower().startswith(artist_name.lower()):
|
||||||
|
raw_song_title = raw_song_title[len(artist_name):].strip()
|
||||||
|
|
||||||
|
if raw_song_title.startswith("-"):
|
||||||
|
raw_song_title = raw_song_title[1:].strip()
|
||||||
|
|
||||||
|
return raw_song_title.strip()
|
||||||
|
Loading…
Reference in New Issue
Block a user