Compare commits

...

3 Commits

Author SHA1 Message Date
1e62d371cd feat: cleaned bandcamp songs
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 11:46:56 +02:00
24a90f1cdf feat: artist name in clean song title is optional 2024-04-19 11:43:21 +02:00
d9c711a2f8 feat: added lru cache to unify function to speed up indexing 2024-04-19 11:40:00 +02:00
5 changed files with 20 additions and 15 deletions

View File

@ -7,7 +7,7 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__":
commands = [
"s: #a Ghost Bath",
"4",
"d: 0",
]

View File

@ -14,7 +14,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic,
ALL_PAGES: Set[Type[Page]] = {
EncyclopaediaMetallum,
# EncyclopaediaMetallum,
Musify,
YoutubeMusic,
Bandcamp

View File

@ -22,6 +22,7 @@ from ..objects import (
)
from ..connection import Connection
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings
from ..utils.shared import DEBUG
@ -114,7 +115,7 @@ class Bandcamp(Page):
if object_type is BandcampTypes.SONG:
return Song(
title=name.strip(),
title=clean_song_title(name, artist_name=data["band_name"]),
source_list=source_list,
main_artist_list=[
Artist(
@ -254,7 +255,7 @@ class Bandcamp(Page):
def _parse_track_element(self, track: dict) -> Optional[Song]:
return Song(
title=track["item"]["name"].strip(),
title=clean_song_title(track["item"]["name"]),
source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])],
tracksort=int(track["position"])
)
@ -337,7 +338,7 @@ class Bandcamp(Page):
mp3_url = value
song = Song(
title=data["name"].strip(),
title=clean_song_title(data["name"], artist_name=artist_data["name"]),
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
album_list=[Album(
title=album_data["name"].strip(),

View File

@ -17,7 +17,7 @@ DEBUG_LOGGING = DEBUG and True
DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False
DEBUG_PAGES = DEBUG and True
DEBUG_DUMP = DEBUG and True
if DEBUG:

View File

@ -1,6 +1,7 @@
from typing import Tuple, Union
from typing import Tuple, Union, Optional
from pathlib import Path
import string
from functools import lru_cache
from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
@ -11,7 +12,7 @@ COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)",
)
@lru_cache
def unify(string: str) -> str:
"""
returns a unified str, to make comparisons easy.
@ -52,7 +53,7 @@ def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
return fit_string(string)
def clean_song_title(raw_song_title: str, artist_name: str) -> str:
def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> str:
"""
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
@ -64,19 +65,22 @@ def clean_song_title(raw_song_title: str, artist_name: str) -> str:
- `song (prod. some producer)`
"""
raw_song_title = raw_song_title.strip()
artist_name = artist_name.strip()
# Clean official Video appendix
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
if raw_song_title.lower().endswith(dirty_appendix):
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
# everything that requires the artist name
if artist_name is not None:
artist_name = artist_name.strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
return raw_song_title.strip()