Compare commits

...

3 Commits

Author SHA1 Message Date
1e62d371cd feat: cleaned bandcamp songs
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 11:46:56 +02:00
24a90f1cdf feat: artist name in clean song title is optional 2024-04-19 11:43:21 +02:00
d9c711a2f8 feat: added lru cache to unify function to speed up indexing 2024-04-19 11:40:00 +02:00
5 changed files with 20 additions and 15 deletions

View File

@ -7,7 +7,7 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__": if __name__ == "__main__":
commands = [ commands = [
"s: #a Ghost Bath", "s: #a Ghost Bath",
"4", "d: 0",
] ]

View File

@ -14,7 +14,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic,
ALL_PAGES: Set[Type[Page]] = { ALL_PAGES: Set[Type[Page]] = {
EncyclopaediaMetallum, # EncyclopaediaMetallum,
Musify, Musify,
YoutubeMusic, YoutubeMusic,
Bandcamp Bandcamp

View File

@ -22,6 +22,7 @@ from ..objects import (
) )
from ..connection import Connection from ..connection import Connection
from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings from ..utils.config import main_settings, logging_settings
from ..utils.shared import DEBUG from ..utils.shared import DEBUG
@ -114,7 +115,7 @@ class Bandcamp(Page):
if object_type is BandcampTypes.SONG: if object_type is BandcampTypes.SONG:
return Song( return Song(
title=name.strip(), title=clean_song_title(name, artist_name=data["band_name"]),
source_list=source_list, source_list=source_list,
main_artist_list=[ main_artist_list=[
Artist( Artist(
@ -254,7 +255,7 @@ class Bandcamp(Page):
def _parse_track_element(self, track: dict) -> Optional[Song]: def _parse_track_element(self, track: dict) -> Optional[Song]:
return Song( return Song(
title=track["item"]["name"].strip(), title=clean_song_title(track["item"]["name"]),
source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])], source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])],
tracksort=int(track["position"]) tracksort=int(track["position"])
) )
@ -337,7 +338,7 @@ class Bandcamp(Page):
mp3_url = value mp3_url = value
song = Song( song = Song(
title=data["name"].strip(), title=clean_song_title(data["name"], artist_name=artist_data["name"]),
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
album_list=[Album( album_list=[Album(
title=album_data["name"].strip(), title=album_data["name"].strip(),

View File

@ -17,7 +17,7 @@ DEBUG_LOGGING = DEBUG and True
DEBUG_TRACE = DEBUG and True DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE = DEBUG and False
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False DEBUG_PAGES = DEBUG and True
DEBUG_DUMP = DEBUG and True DEBUG_DUMP = DEBUG and True
if DEBUG: if DEBUG:

View File

@ -1,6 +1,7 @@
from typing import Tuple, Union from typing import Tuple, Union, Optional
from pathlib import Path from pathlib import Path
import string import string
from functools import lru_cache
from transliterate.exceptions import LanguageDetectionError from transliterate.exceptions import LanguageDetectionError
from transliterate import translit from transliterate import translit
@ -11,7 +12,7 @@ COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)", "(official video)",
) )
@lru_cache
def unify(string: str) -> str: def unify(string: str) -> str:
""" """
returns a unified str, to make comparisons easy. returns a unified str, to make comparisons easy.
@ -52,7 +53,7 @@ def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
return fit_string(string) return fit_string(string)
def clean_song_title(raw_song_title: str, artist_name: str) -> str: def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> str:
""" """
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
@ -64,19 +65,22 @@ def clean_song_title(raw_song_title: str, artist_name: str) -> str:
- `song (prod. some producer)` - `song (prod. some producer)`
""" """
raw_song_title = raw_song_title.strip() raw_song_title = raw_song_title.strip()
artist_name = artist_name.strip()
# Clean official Video appendix # Clean official Video appendix
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST: for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
if raw_song_title.lower().endswith(dirty_appendix): if raw_song_title.lower().endswith(dirty_appendix):
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip() raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
# Remove artist from the start of the title # everything that requires the artist name
if raw_song_title.lower().startswith(artist_name.lower()): if artist_name is not None:
raw_song_title = raw_song_title[len(artist_name):].strip() artist_name = artist_name.strip()
if raw_song_title.startswith("-"): # Remove artist from the start of the title
raw_song_title = raw_song_title[1:].strip() if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
return raw_song_title.strip() return raw_song_title.strip()