Compare commits

...

5 Commits

Author SHA1 Message Date
c6bdf724e3 draft: string processing
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-29 14:40:49 +02:00
aa50d2cf20 feat: renamed referrer page fixing typo 2024-04-29 13:51:43 +02:00
3eba8e90f4 feat: cleaned data objects 2024-04-29 13:49:41 +02:00
ee1aaa13b0 feat: cleaned data objects 2024-04-29 13:49:16 +02:00
1ad62df0ab feat: default implementation for options that should be sufficient 2024-04-29 13:43:34 +02:00
9 changed files with 104 additions and 131 deletions

View File

@ -16,6 +16,7 @@
}, },
"python.formatting.provider": "none", "python.formatting.provider": "none",
"cSpell.words": [ "cSpell.words": [
"albumsort",
"APIC", "APIC",
"Bandcamp", "Bandcamp",
"dotenv", "dotenv",

View File

@ -9,7 +9,7 @@ from pathlib import Path
import inspect import inspect
from .metadata import Metadata from .metadata import Metadata
from ..utils import get_unix_time, object_trace from ..utils import get_unix_time, object_trace, generate_id
from ..utils.config import logging_settings, main_settings from ..utils.config import logging_settings, main_settings
from ..utils.shared import HIGHEST_ID from ..utils.shared import HIGHEST_ID
from ..utils.hacking import MetaClass from ..utils.hacking import MetaClass
@ -29,6 +29,10 @@ class InnerData:
""" """
_refers_to_instances: set = None _refers_to_instances: set = None
"""
Attribute versions keep track, of if the attribute has been changed.
"""
_attribute_versions: Dict[str, int] = None
def __init__(self, object_type, **kwargs): def __init__(self, object_type, **kwargs):
self._refers_to_instances = set() self._refers_to_instances = set()
@ -84,8 +88,6 @@ class OuterProxy:
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple() UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
TITEL = "id"
def __init__(self, _id: int = None, dynamic: bool = False, **kwargs): def __init__(self, _id: int = None, dynamic: bool = False, **kwargs):
_automatic_id: bool = False _automatic_id: bool = False
@ -94,7 +96,7 @@ class OuterProxy:
generates a random integer id generates a random integer id
the range is defined in the config the range is defined in the config
""" """
_id = random.randint(0, HIGHEST_ID) _id = generate_id()
_automatic_id = True _automatic_id = True
kwargs["automatic_id"] = _automatic_id kwargs["automatic_id"] = _automatic_id
@ -235,7 +237,17 @@ class OuterProxy:
@property @property
def options(self) -> List[P]: def options(self) -> List[P]:
return [self] r = []
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
r.extend(self.__getattribute__(collection_string_attribute))
r.append(self)
for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
r.extend(self.__getattribute__(collection_string_attribute))
return r
@property @property
def indexing_values(self) -> List[Tuple[str, object]]: def indexing_values(self) -> List[Tuple[str, object]]:
@ -267,6 +279,7 @@ class OuterProxy:
return r return r
TITEL = "id"
@property @property
def title_string(self) -> str: def title_string(self) -> str:
return str(self.__getattribute__(self.TITEL)) return str(self.__getattribute__(self.TITEL))

View File

@ -117,7 +117,7 @@ class Song(Base):
Base.__init__(**locals()) Base.__init__(**locals())
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_artist_collection", "feature_artist_collection", "album_collection")
TITEL = "title" TITEL = "title"
def __init_collections__(self) -> None: def __init_collections__(self) -> None:
@ -269,7 +269,7 @@ class Album(Base):
**kwargs) **kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",) DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",)
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection")
def __init_collections__(self): def __init_collections__(self):
self.song_collection.append_object_to_attribute = { self.song_collection.append_object_to_attribute = {
@ -341,12 +341,6 @@ class Album(Base):
r += f" with {len(self.song_collection)} songs" r += f" with {len(self.song_collection)} songs"
return r return r
@property
def options(self) -> List[P]:
options = [*self.artist_collection, self, *self.song_collection]
return options
def update_tracksort(self): def update_tracksort(self):
""" """
This updates the tracksort attributes, of the songs in This updates the tracksort attributes, of the songs in
@ -372,18 +366,6 @@ class Album(Base):
tracksort_map[i] = existing_list.pop(0) tracksort_map[i] = existing_list.pop(0)
tracksort_map[i].tracksort = i tracksort_map[i].tracksort = i
def compile(self, merge_into: bool = False):
"""
compiles the recursive structures,
and does depending on the object some other stuff.
no need to override if only the recursive structure should be built.
override self.build_recursive_structures() instead
"""
self.update_tracksort()
self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into)
@property @property
def copyright(self) -> str: def copyright(self) -> str:
if self.date is None: if self.date is None:
@ -429,7 +411,7 @@ class Artist(Base):
lyrical_themes: List[str] lyrical_themes: List[str]
general_genre: str general_genre: str
unformated_location: str unformatted_location: str
source_collection: SourceCollection source_collection: SourceCollection
contact_collection: Collection[Contact] contact_collection: Collection[Contact]
@ -442,7 +424,7 @@ class Artist(Base):
"name": str, "name": str,
"unified_name": lambda: None, "unified_name": lambda: None,
"country": lambda: None, "country": lambda: None,
"unformated_location": lambda: None, "unformatted_location": lambda: None,
"formed_in": ID3Timestamp, "formed_in": ID3Timestamp,
"notes": FormattedText, "notes": FormattedText,
@ -461,17 +443,17 @@ class Artist(Base):
# This is automatically generated # This is automatically generated
def __init__(self, name: str = "", unified_name: str = None, country: Country = None, def __init__(self, name: str = "", unified_name: str = None, country: Country = None,
formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None, formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None,
general_genre: str = None, unformated_location: str = None, source_list: List[Source] = None, general_genre: str = None, unformatted_location: str = None, source_list: List[Source] = None,
contact_list: List[Contact] = None, feature_song_list: List[Song] = None, contact_list: List[Contact] = None, feature_song_list: List[Song] = None,
main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None: main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None:
super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes, super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes,
lyrical_themes=lyrical_themes, general_genre=general_genre, lyrical_themes=lyrical_themes, general_genre=general_genre,
unformated_location=unformated_location, source_list=source_list, contact_list=contact_list, unformatted_location=unformatted_location, source_list=source_list, contact_list=contact_list,
feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list, feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list,
**kwargs) **kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection") DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection")
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",) UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",)
def __init_collections__(self): def __init_collections__(self):
@ -504,12 +486,6 @@ class Artist(Base):
self.label_collection.extend(object_list) self.label_collection.extend(object_list)
return return
@property
def options(self) -> List[P]:
options = [self, *self.main_album_collection.shallow_list, *self.feature_album]
print(options)
return options
def update_albumsort(self): def update_albumsort(self):
""" """
This updates the albumsort attributes, of the albums in This updates the albumsort attributes, of the albums in
@ -585,19 +561,6 @@ class Artist(Base):
return metadata return metadata
"""
def __str__(self, include_notes: bool = False):
string = self.name or ""
if include_notes:
plaintext_notes = self.notes.get_plaintext()
if plaintext_notes is not None:
string += "\n" + plaintext_notes
return string
"""
def __repr__(self):
return f"Artist(\"{self.name}\")"
@property @property
def option_string(self) -> str: def option_string(self) -> str:
r = OPTION_FOREGROUND.value + self.name + BColors.ENDC.value + OPTION_BACKGROUND.value r = OPTION_FOREGROUND.value + self.name + BColors.ENDC.value + OPTION_BACKGROUND.value
@ -613,43 +576,6 @@ class Artist(Base):
return r return r
@property
def options(self) -> List[P]:
options = [self]
options.extend(self.main_album_collection)
options.extend(self.feature_song_collection)
return options
@property
def feature_album(self) -> Album:
return Album(
title="features",
album_status=AlbumStatus.UNRELEASED,
album_type=AlbumType.COMPILATION_ALBUM,
is_split=True,
albumsort=666,
dynamic=True,
song_list=self.feature_song_collection.shallow_list
)
def get_all_songs(self) -> List[Song]:
"""
returns a list of all Songs.
probably not that useful, because it is unsorted
"""
collection = self.feature_song_collection.copy()
for album in self.discography:
collection.extend(album.song_collection)
return collection
@property
def discography(self) -> List[Album]:
flat_copy_discography = self.main_album_collection.copy()
flat_copy_discography.append(self.feature_album)
return flat_copy_discography
""" """
Label Label
@ -702,7 +628,6 @@ class Label(Base):
@property @property
def indexing_values(self) -> List[Tuple[str, object]]: def indexing_values(self) -> List[Tuple[str, object]]:
return [ return [
('id', self.id),
('name', unify(self.name)), ('name', unify(self.name)),
*[('url', source.url) for source in self.source_collection] *[('url', source.url) for source in self.source_collection]
] ]

View File

@ -2,9 +2,12 @@ from __future__ import annotations
from collections import defaultdict from collections import defaultdict
from enum import Enum from enum import Enum
from typing import List, Dict, Set, Tuple, Optional, Iterable from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator
from urllib.parse import urlparse from urllib.parse import urlparse, ParseResult
from dataclasses import dataclass, field
from functools import cached_property
from ..utils import generate_id
from ..utils.enums.source import SourcePages, SourceTypes from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.config import youtube_settings from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url from ..utils.string_processing import hash_url
@ -14,29 +17,26 @@ from .parents import OuterProxy
from .collection import Collection from .collection import Collection
class Source(OuterProxy):
@dataclass
class Source:
url: str url: str
page_enum: SourcePages page_enum: SourcePages
referer_page: SourcePages referrer_page: SourcePages
audio_url: Optional[str]
audio_url: str id: int = field(default_factory=generate_id)
additional_data: dict = field(default_factory=dict)
_default_factories = { def __post_init__(self):
"audio_url": lambda: None, self.referrer_page = self.referrer_page or self.page_enum
}
# This is automatically generated @cached_property
def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None, def parsed_url(self) -> ParseResult:
**kwargs) -> None: return urlparse(self.url)
if referer_page is None:
referer_page = page_enum
super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)
@classmethod @classmethod
def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]: def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]:
""" """
this shouldn't be used, unlesse you are not certain what the source is for this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient the reason is that it is more inefficient
@ -45,38 +45,38 @@ class Source(OuterProxy):
url = parsed.geturl() url = parsed.geturl()
if "musify" in parsed.netloc: if "musify" in parsed.netloc:
return cls(SourcePages.MUSIFY, url, referer_page=referer_page) return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page)
if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourcePages.YOUTUBE, url, referer_page=referer_page) return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page)
if url.startswith("https://www.deezer"): if url.startswith("https://www.deezer"):
return cls(SourcePages.DEEZER, url, referer_page=referer_page) return cls(SourcePages.DEEZER, url, referrer_page=referrer_page)
if url.startswith("https://open.spotify.com"): if url.startswith("https://open.spotify.com"):
return cls(SourcePages.SPOTIFY, url, referer_page=referer_page) return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page)
if "bandcamp" in url: if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url, referer_page=referer_page) return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page)
if "wikipedia" in parsed.netloc: if "wikipedia" in parsed.netloc:
return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page) return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page)
if url.startswith("https://www.metal-archives.com/"): if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page) return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
# the less important once # the less important once
if url.startswith("https://www.facebook"): if url.startswith("https://www.facebook"):
return cls(SourcePages.FACEBOOK, url, referer_page=referer_page) return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page)
if url.startswith("https://www.instagram"): if url.startswith("https://www.instagram"):
return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page) return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page)
if url.startswith("https://twitter"): if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url, referer_page=referer_page) return cls(SourcePages.TWITTER, url, referrer_page=referrer_page)
if url.startswith("https://myspace.com"): if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url, referer_page=referer_page) return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
def get_song_metadata(self) -> Metadata: def get_song_metadata(self) -> Metadata:
return Metadata({ return Metadata({
@ -120,16 +120,23 @@ class Source(OuterProxy):
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum)) homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
class SourceCollection(Collection): class SourceCollection:
_page_to_source_list: Dict[SourcePages, List[Source]]
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list) self._page_to_source_list = defaultdict(list)
super().__init__(data=data, **kwargs) def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
for page in source_pages:
yield from self._page_to_source_list[page]
def _map_element(self, __object: Source, **kwargs): def append(self, source: Source):
super()._map_element(__object, **kwargs) pass
self._page_to_source_list[__object.page_enum].append(__object) def extend(self, sources: Iterable[Source]):
for source in sources:
self.append(source)
@property @property
def source_pages(self) -> Set[SourcePages]: def source_pages(self) -> Set[SourcePages]:

View File

@ -185,7 +185,7 @@ class Bandcamp(Page):
if li is None and li['href'] is not None: if li is None and li['href'] is not None:
continue continue
source_list.append(Source.match_url(_parse_artist_url(li['href']), referer_page=self.SOURCE_TYPE)) source_list.append(Source.match_url(_parse_artist_url(li['href']), referrer_page=self.SOURCE_TYPE))
return Artist( return Artist(
name=name, name=name,

View File

@ -486,7 +486,7 @@ class EncyclopaediaMetallum(Page):
href = anchor["href"] href = anchor["href"]
if href is not None: if href is not None:
source_list.append(Source.match_url(href, referer_page=self.SOURCE_TYPE)) source_list.append(Source.match_url(href, referrer_page=self.SOURCE_TYPE))
# The following code is only legacy code, which I just kep because it doesn't harm. # The following code is only legacy code, which I just kep because it doesn't harm.
# The way ma returns sources changed. # The way ma returns sources changed.
@ -504,7 +504,7 @@ class EncyclopaediaMetallum(Page):
if url is None: if url is None:
continue continue
source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE)) source_list.append(Source.match_url(url, referrer_page=self.SOURCE_TYPE))
return source_list return source_list

View File

@ -503,7 +503,7 @@ class Musify(Page):
source_list.append(Source( source_list.append(Source(
SourcePages.YOUTUBE, SourcePages.YOUTUBE,
iframe["src"], iframe["src"],
referer_page=self.SOURCE_TYPE referrer_page=self.SOURCE_TYPE
)) ))
return Song( return Song(
@ -812,7 +812,7 @@ class Musify(Page):
href = additional_source.get("href") href = additional_source.get("href")
if href is None: if href is None:
continue continue
new_src = Source.match_url(href, referer_page=self.SOURCE_TYPE) new_src = Source.match_url(href, referrer_page=self.SOURCE_TYPE)
if new_src is None: if new_src is None:
continue continue
source_list.append(new_src) source_list.append(new_src)

View File

@ -71,6 +71,12 @@ def object_trace(obj):
misc functions misc functions
""" """
_auto_increment = 0
def generate_id() -> int:
global _auto_increment
_auto_increment += 1
return _auto_increment
def get_current_millis() -> int: def get_current_millis() -> int:
dt = datetime.now() dt = datetime.now()
return int(dt.microsecond / 1_000) return int(dt.microsecond / 1_000)

View File

@ -6,6 +6,7 @@ from functools import lru_cache
from transliterate.exceptions import LanguageDetectionError from transliterate.exceptions import LanguageDetectionError
from transliterate import translit from transliterate import translit
from pathvalidate import sanitize_filename from pathvalidate import sanitize_filename
from urllib.parse import urlparse, ParseResult, parse_qs
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = ( COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
@ -21,6 +22,7 @@ def unify(string: str) -> str:
returns a unified str, to make comparisons easy. returns a unified str, to make comparisons easy.
a unified string has the following attributes: a unified string has the following attributes:
- is lowercase - is lowercase
- is transliterated to Latin characters from e.g. Cyrillic
""" """
if string is None: if string is None:
@ -132,8 +134,27 @@ def unify_punctuation(to_unify: str) -> str:
to_unify = to_unify.replace(char, UNIFY_TO) to_unify = to_unify.replace(char, UNIFY_TO)
return to_unify return to_unify
def hash_url(url: str) -> int: def hash_url(url: Union[str, ParseResult]) -> str:
return url.strip().lower().lstrip("https://").lstrip("http://") if isinstance(url, str):
url = urlparse(url)
query = url.query
query_dict: Optional[dict] = None
try:
query_dict: dict = parse_qs(url.query, strict_parsing=True)
except ValueError:
# the query couldn't be parsed
pass
if isinstance(query_dict, dict):
# sort keys alphabetically
query = ""
for key, value in sorted(query_dict.items(), key=lambda i: i[0]):
query += f"_{key.strip()}_{''.join(i.strip() for i in value)}"
r = f"{url.netloc}_{url.path.replace('/', '_')}{query}"
r = r.lower().strip()
return r
def remove_feature_part_from_track(title: str) -> str: def remove_feature_part_from_track(title: str) -> str: