8 Commits

Author SHA1 Message Date
919a99885c feat: disabled debugging pages
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 12:47:36 +02:00
e20b14a9df feat: added fetching artworks to bandcamp
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 12:37:14 +02:00
b933c6ac14 feat: improved the lyrics support for bandcamp
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 12:17:08 +02:00
3c5bbc19af feat: some slight performance improvements
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 12:04:13 +02:00
06acf22abb feat: improved the cleaning song title function to remove redundand brackets
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 12:02:54 +02:00
1e62d371cd feat: cleaned bandcamp songs
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-19 11:46:56 +02:00
24a90f1cdf feat: artist name in clean song title is optional 2024-04-19 11:43:21 +02:00
d9c711a2f8 feat: added lru cache to unify function to speed up indexing 2024-04-19 11:40:00 +02:00
5 changed files with 88 additions and 19 deletions

View File

@@ -7,7 +7,8 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__": if __name__ == "__main__":
commands = [ commands = [
"s: #a Ghost Bath", "s: #a Ghost Bath",
"4", "0",
"d: 1",
] ]

View File

@@ -14,7 +14,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic,
ALL_PAGES: Set[Type[Page]] = { ALL_PAGES: Set[Type[Page]] = {
EncyclopaediaMetallum, # EncyclopaediaMetallum,
Musify, Musify,
YoutubeMusic, YoutubeMusic,
Bandcamp Bandcamp

View File

@@ -1,18 +1,26 @@
import mistune import mistune
import html2markdown import html2markdown
def plain_to_markdown(plain: str) -> str:
return plain.replace("\n", " \n")
class FormattedText: class FormattedText:
html = "" html = ""
def __init__( def __init__(
self, self,
markdown: str = None, markdown: str = None,
html: str = None html: str = None,
plain: str = None,
) -> None: ) -> None:
if html is not None: if html is not None:
self.html = html self.html = html
elif markdown is not None: elif markdown is not None:
self.html = mistune.markdown(markdown) self.html = mistune.markdown(markdown)
elif plain is not None:
self.html = mistune.markdown(plain_to_markdown(plain))
@property @property
def is_empty(self) -> bool: def is_empty(self) -> bool:

View File

@@ -18,10 +18,12 @@ from ..objects import (
Contact, Contact,
ID3Timestamp, ID3Timestamp,
Lyrics, Lyrics,
FormattedText FormattedText,
Artwork,
) )
from ..connection import Connection from ..connection import Connection
from ..utils.support_classes.download_result import DownloadResult from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings from ..utils.config import main_settings, logging_settings
from ..utils.shared import DEBUG from ..utils.shared import DEBUG
@@ -114,7 +116,7 @@ class Bandcamp(Page):
if object_type is BandcampTypes.SONG: if object_type is BandcampTypes.SONG:
return Song( return Song(
title=name.strip(), title=clean_song_title(name, artist_name=data["band_name"]),
source_list=source_list, source_list=source_list,
main_artist_list=[ main_artist_list=[
Artist( Artist(
@@ -252,11 +254,18 @@ class Bandcamp(Page):
artist.source_collection.append(source) artist.source_collection.append(source)
return artist return artist
def _parse_track_element(self, track: dict) -> Optional[Song]: def _parse_track_element(self, track: dict, artwork: Artwork) -> Optional[Song]:
lyrics_list: List[Lyrics] = []
_lyrics: Optional[str] = track.get("item", {}).get("recordingOf", {}).get("lyrics", {}).get("text")
if _lyrics is not None:
lyrics_list.append(Lyrics(text=FormattedText(plain=_lyrics)))
return Song( return Song(
title=track["item"]["name"].strip(), title=clean_song_title(track["item"]["name"]),
source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])], source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])],
tracksort=int(track["position"]) tracksort=int(track["position"]),
artwork=artwork,
) )
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album: def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
@@ -289,12 +298,32 @@ class Bandcamp(Page):
)] )]
) )
artwork: Artwork = Artwork()
def _get_artwork_url(_data: dict) -> Optional[str]:
if "image" in _data:
return _data["image"]
for _property in _data.get("additionalProperty", []):
if _property.get("name") == "art_id":
return f"https://f4.bcbits.com/img/a{_property.get('value')}_2.jpg"
_artwork_url = _get_artwork_url(data)
if _artwork_url is not None:
artwork.append(url=_artwork_url, width=350, height=350)
else:
for album_release in data.get("albumRelease", []):
_artwork_url = _get_artwork_url(album_release)
if _artwork_url is not None:
artwork.append(url=_artwork_url, width=350, height=350)
break
for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])): for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])):
if DEBUG: if DEBUG:
dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False) dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False)
try: try:
album.song_collection.append(self._parse_track_element(track_json)) album.song_collection.append(self._parse_track_element(track_json, artwork=artwork))
except KeyError: except KeyError:
continue continue
@@ -337,7 +366,7 @@ class Bandcamp(Page):
mp3_url = value mp3_url = value
song = Song( song = Song(
title=data["name"].strip(), title=clean_song_title(data["name"], artist_name=artist_data["name"]),
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
album_list=[Album( album_list=[Album(
title=album_data["name"].strip(), title=album_data["name"].strip(),

View File

@@ -1,6 +1,7 @@
from typing import Tuple, Union from typing import Tuple, Union, Optional
from pathlib import Path from pathlib import Path
import string import string
from functools import lru_cache
from transliterate.exceptions import LanguageDetectionError from transliterate.exceptions import LanguageDetectionError
from transliterate import translit from transliterate import translit
@@ -10,8 +11,11 @@ from pathvalidate import sanitize_filename
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
"(official video)", "(official video)",
) )
OPEN_BRACKETS = "(["
CLOSE_BRACKETS = ")]"
DISALLOWED_SUBSTRING_IN_BRACKETS = ("official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat.")
@lru_cache
def unify(string: str) -> str: def unify(string: str) -> str:
""" """
returns a unified str, to make comparisons easy. returns a unified str, to make comparisons easy.
@@ -52,7 +56,8 @@ def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
return fit_string(string) return fit_string(string)
def clean_song_title(raw_song_title: str, artist_name: str) -> str: @lru_cache(maxsize=128)
def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> str:
""" """
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
@@ -64,19 +69,45 @@ def clean_song_title(raw_song_title: str, artist_name: str) -> str:
- `song (prod. some producer)` - `song (prod. some producer)`
""" """
raw_song_title = raw_song_title.strip() raw_song_title = raw_song_title.strip()
artist_name = artist_name.strip()
# Clean official Video appendix # Clean official Video appendix
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST: for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
if raw_song_title.lower().endswith(dirty_appendix): if raw_song_title.lower().endswith(dirty_appendix):
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip() raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
# Remove artist from the start of the title # remove brackets and their content if they contain disallowed substrings
if raw_song_title.lower().startswith(artist_name.lower()): for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS):
raw_song_title = raw_song_title[len(artist_name):].strip() if open_bracket not in raw_song_title or close_bracket not in raw_song_title:
continue
start = 0
if raw_song_title.startswith("-"): while True:
raw_song_title = raw_song_title[1:].strip() try:
open_bracket_index = raw_song_title.index(open_bracket, start)
except ValueError:
break
try:
close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1)
except ValueError:
break
substring = raw_song_title[open_bracket_index + 1:close_bracket_index]
if any(disallowed_substring in substring for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS):
raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:]
else:
start = close_bracket_index + 1
# everything that requires the artist name
if artist_name is not None:
artist_name = artist_name.strip()
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
raw_song_title = raw_song_title[len(artist_name):].strip()
if raw_song_title.startswith("-"):
raw_song_title = raw_song_title[1:].strip()
return raw_song_title.strip() return raw_song_title.strip()