Compare commits

...

5 Commits

Author SHA1 Message Date
c6bdf724e3 draft: string processing
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-04-29 14:40:49 +02:00
aa50d2cf20 feat: renamed referrer page fixing typo 2024-04-29 13:51:43 +02:00
3eba8e90f4 feat: cleaned data objects 2024-04-29 13:49:41 +02:00
ee1aaa13b0 feat: cleaned data objects 2024-04-29 13:49:16 +02:00
1ad62df0ab feat: default implementation for options that should be sufficient 2024-04-29 13:43:34 +02:00
9 changed files with 104 additions and 131 deletions

View File

@ -16,6 +16,7 @@
},
"python.formatting.provider": "none",
"cSpell.words": [
"albumsort",
"APIC",
"Bandcamp",
"dotenv",

View File

@ -9,7 +9,7 @@ from pathlib import Path
import inspect
from .metadata import Metadata
from ..utils import get_unix_time, object_trace
from ..utils import get_unix_time, object_trace, generate_id
from ..utils.config import logging_settings, main_settings
from ..utils.shared import HIGHEST_ID
from ..utils.hacking import MetaClass
@ -29,6 +29,10 @@ class InnerData:
"""
_refers_to_instances: set = None
"""
Attribute versions keep track, of if the attribute has been changed.
"""
_attribute_versions: Dict[str, int] = None
def __init__(self, object_type, **kwargs):
self._refers_to_instances = set()
@ -84,8 +88,6 @@ class OuterProxy:
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
TITEL = "id"
def __init__(self, _id: int = None, dynamic: bool = False, **kwargs):
_automatic_id: bool = False
@ -94,7 +96,7 @@ class OuterProxy:
generates a random integer id
the range is defined in the config
"""
_id = random.randint(0, HIGHEST_ID)
_id = generate_id()
_automatic_id = True
kwargs["automatic_id"] = _automatic_id
@ -235,7 +237,17 @@ class OuterProxy:
@property
def options(self) -> List[P]:
return [self]
r = []
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
r.extend(self.__getattribute__(collection_string_attribute))
r.append(self)
for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
r.extend(self.__getattribute__(collection_string_attribute))
return r
@property
def indexing_values(self) -> List[Tuple[str, object]]:
@ -267,6 +279,7 @@ class OuterProxy:
return r
TITEL = "id"
@property
def title_string(self) -> str:
return str(self.__getattribute__(self.TITEL))

View File

@ -117,7 +117,7 @@ class Song(Base):
Base.__init__(**locals())
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("album_collection", "main_artist_collection", "feature_artist_collection")
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_artist_collection", "feature_artist_collection", "album_collection")
TITEL = "title"
def __init_collections__(self) -> None:
@ -269,7 +269,7 @@ class Album(Base):
**kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("song_collection",)
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection")
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection", "artist_collection")
def __init_collections__(self):
self.song_collection.append_object_to_attribute = {
@ -340,12 +340,6 @@ class Album(Base):
if len(self.song_collection) > 0:
r += f" with {len(self.song_collection)} songs"
return r
@property
def options(self) -> List[P]:
options = [*self.artist_collection, self, *self.song_collection]
return options
def update_tracksort(self):
"""
@ -372,18 +366,6 @@ class Album(Base):
tracksort_map[i] = existing_list.pop(0)
tracksort_map[i].tracksort = i
def compile(self, merge_into: bool = False):
"""
compiles the recursive structures,
and does depending on the object some other stuff.
no need to override if only the recursive structure should be built.
override self.build_recursive_structures() instead
"""
self.update_tracksort()
self._build_recursive_structures(build_version=random.randint(0, 99999), merge=merge_into)
@property
def copyright(self) -> str:
if self.date is None:
@ -429,7 +411,7 @@ class Artist(Base):
lyrical_themes: List[str]
general_genre: str
unformated_location: str
unformatted_location: str
source_collection: SourceCollection
contact_collection: Collection[Contact]
@ -442,7 +424,7 @@ class Artist(Base):
"name": str,
"unified_name": lambda: None,
"country": lambda: None,
"unformated_location": lambda: None,
"unformatted_location": lambda: None,
"formed_in": ID3Timestamp,
"notes": FormattedText,
@ -461,17 +443,17 @@ class Artist(Base):
# This is automatically generated
def __init__(self, name: str = "", unified_name: str = None, country: Country = None,
formed_in: ID3Timestamp = None, notes: FormattedText = None, lyrical_themes: List[str] = None,
general_genre: str = None, unformated_location: str = None, source_list: List[Source] = None,
general_genre: str = None, unformatted_location: str = None, source_list: List[Source] = None,
contact_list: List[Contact] = None, feature_song_list: List[Song] = None,
main_album_list: List[Album] = None, label_list: List[Label] = None, **kwargs) -> None:
super().__init__(name=name, unified_name=unified_name, country=country, formed_in=formed_in, notes=notes,
lyrical_themes=lyrical_themes, general_genre=general_genre,
unformated_location=unformated_location, source_list=source_list, contact_list=contact_list,
unformatted_location=unformatted_location, source_list=source_list, contact_list=contact_list,
feature_song_list=feature_song_list, main_album_list=main_album_list, label_list=label_list,
**kwargs)
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("feature_song_collection", "main_album_collection")
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection")
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",)
def __init_collections__(self):
@ -504,12 +486,6 @@ class Artist(Base):
self.label_collection.extend(object_list)
return
@property
def options(self) -> List[P]:
options = [self, *self.main_album_collection.shallow_list, *self.feature_album]
print(options)
return options
def update_albumsort(self):
"""
This updates the albumsort attributes, of the albums in
@ -585,19 +561,6 @@ class Artist(Base):
return metadata
"""
def __str__(self, include_notes: bool = False):
string = self.name or ""
if include_notes:
plaintext_notes = self.notes.get_plaintext()
if plaintext_notes is not None:
string += "\n" + plaintext_notes
return string
"""
def __repr__(self):
return f"Artist(\"{self.name}\")"
@property
def option_string(self) -> str:
r = OPTION_FOREGROUND.value + self.name + BColors.ENDC.value + OPTION_BACKGROUND.value
@ -613,43 +576,6 @@ class Artist(Base):
return r
@property
def options(self) -> List[P]:
options = [self]
options.extend(self.main_album_collection)
options.extend(self.feature_song_collection)
return options
@property
def feature_album(self) -> Album:
return Album(
title="features",
album_status=AlbumStatus.UNRELEASED,
album_type=AlbumType.COMPILATION_ALBUM,
is_split=True,
albumsort=666,
dynamic=True,
song_list=self.feature_song_collection.shallow_list
)
def get_all_songs(self) -> List[Song]:
"""
returns a list of all Songs.
probably not that useful, because it is unsorted
"""
collection = self.feature_song_collection.copy()
for album in self.discography:
collection.extend(album.song_collection)
return collection
@property
def discography(self) -> List[Album]:
flat_copy_discography = self.main_album_collection.copy()
flat_copy_discography.append(self.feature_album)
return flat_copy_discography
"""
Label
@ -702,7 +628,6 @@ class Label(Base):
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('name', unify(self.name)),
*[('url', source.url) for source in self.source_collection]
]

View File

@ -2,9 +2,12 @@ from __future__ import annotations
from collections import defaultdict
from enum import Enum
from typing import List, Dict, Set, Tuple, Optional, Iterable
from urllib.parse import urlparse
from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator
from urllib.parse import urlparse, ParseResult
from dataclasses import dataclass, field
from functools import cached_property
from ..utils import generate_id
from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url
@ -14,29 +17,26 @@ from .parents import OuterProxy
from .collection import Collection
class Source(OuterProxy):
@dataclass
class Source:
url: str
page_enum: SourcePages
referer_page: SourcePages
referrer_page: SourcePages
audio_url: Optional[str]
audio_url: str
id: int = field(default_factory=generate_id)
additional_data: dict = field(default_factory=dict)
_default_factories = {
"audio_url": lambda: None,
}
# This is automatically generated
def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,
**kwargs) -> None:
if referer_page is None:
referer_page = page_enum
super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)
def __post_init__(self):
self.referrer_page = self.referrer_page or self.page_enum
@cached_property
def parsed_url(self) -> ParseResult:
return urlparse(self.url)
@classmethod
def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:
def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]:
"""
this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient
@ -45,38 +45,38 @@ class Source(OuterProxy):
url = parsed.geturl()
if "musify" in parsed.netloc:
return cls(SourcePages.MUSIFY, url, referer_page=referer_page)
return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page)
if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)
return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page)
if url.startswith("https://www.deezer"):
return cls(SourcePages.DEEZER, url, referer_page=referer_page)
return cls(SourcePages.DEEZER, url, referrer_page=referrer_page)
if url.startswith("https://open.spotify.com"):
return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)
return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page)
if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)
return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page)
if "wikipedia" in parsed.netloc:
return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)
return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page)
if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
# the less important once
if url.startswith("https://www.facebook"):
return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)
return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page)
if url.startswith("https://www.instagram"):
return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)
return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page)
if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url, referer_page=referer_page)
return cls(SourcePages.TWITTER, url, referrer_page=referrer_page)
if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url, referer_page=referer_page)
return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
def get_song_metadata(self) -> Metadata:
return Metadata({
@ -120,16 +120,23 @@ class Source(OuterProxy):
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
class SourceCollection(Collection):
class SourceCollection:
_page_to_source_list: Dict[SourcePages, List[Source]]
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)
self._page_to_source_list = defaultdict(list)
super().__init__(data=data, **kwargs)
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
for page in source_pages:
yield from self._page_to_source_list[page]
def _map_element(self, __object: Source, **kwargs):
super()._map_element(__object, **kwargs)
def append(self, source: Source):
pass
self._page_to_source_list[__object.page_enum].append(__object)
def extend(self, sources: Iterable[Source]):
for source in sources:
self.append(source)
@property
def source_pages(self) -> Set[SourcePages]:

View File

@ -185,7 +185,7 @@ class Bandcamp(Page):
if li is None and li['href'] is not None:
continue
source_list.append(Source.match_url(_parse_artist_url(li['href']), referer_page=self.SOURCE_TYPE))
source_list.append(Source.match_url(_parse_artist_url(li['href']), referrer_page=self.SOURCE_TYPE))
return Artist(
name=name,

View File

@ -486,7 +486,7 @@ class EncyclopaediaMetallum(Page):
href = anchor["href"]
if href is not None:
source_list.append(Source.match_url(href, referer_page=self.SOURCE_TYPE))
source_list.append(Source.match_url(href, referrer_page=self.SOURCE_TYPE))
# The following code is only legacy code, which I just kep because it doesn't harm.
# The way ma returns sources changed.
@ -504,7 +504,7 @@ class EncyclopaediaMetallum(Page):
if url is None:
continue
source_list.append(Source.match_url(url, referer_page=self.SOURCE_TYPE))
source_list.append(Source.match_url(url, referrer_page=self.SOURCE_TYPE))
return source_list

View File

@ -503,7 +503,7 @@ class Musify(Page):
source_list.append(Source(
SourcePages.YOUTUBE,
iframe["src"],
referer_page=self.SOURCE_TYPE
referrer_page=self.SOURCE_TYPE
))
return Song(
@ -812,7 +812,7 @@ class Musify(Page):
href = additional_source.get("href")
if href is None:
continue
new_src = Source.match_url(href, referer_page=self.SOURCE_TYPE)
new_src = Source.match_url(href, referrer_page=self.SOURCE_TYPE)
if new_src is None:
continue
source_list.append(new_src)

View File

@ -71,6 +71,12 @@ def object_trace(obj):
misc functions
"""
_auto_increment = 0
def generate_id() -> int:
global _auto_increment
_auto_increment += 1
return _auto_increment
def get_current_millis() -> int:
dt = datetime.now()
return int(dt.microsecond / 1_000)

View File

@ -6,6 +6,7 @@ from functools import lru_cache
from transliterate.exceptions import LanguageDetectionError
from transliterate import translit
from pathvalidate import sanitize_filename
from urllib.parse import urlparse, ParseResult, parse_qs
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
@ -21,6 +22,7 @@ def unify(string: str) -> str:
returns a unified str, to make comparisons easy.
a unified string has the following attributes:
- is lowercase
- is transliterated to Latin characters from e.g. Cyrillic
"""
if string is None:
@ -132,8 +134,27 @@ def unify_punctuation(to_unify: str) -> str:
to_unify = to_unify.replace(char, UNIFY_TO)
return to_unify
def hash_url(url: str) -> int:
return url.strip().lower().lstrip("https://").lstrip("http://")
def hash_url(url: Union[str, ParseResult]) -> str:
if isinstance(url, str):
url = urlparse(url)
query = url.query
query_dict: Optional[dict] = None
try:
query_dict: dict = parse_qs(url.query, strict_parsing=True)
except ValueError:
# the query couldn't be parsed
pass
if isinstance(query_dict, dict):
# sort keys alphabetically
query = ""
for key, value in sorted(query_dict.items(), key=lambda i: i[0]):
query += f"_{key.strip()}_{''.join(i.strip() for i in value)}"
r = f"{url.netloc}_{url.path.replace('/', '_')}{query}"
r = r.lower().strip()
return r
def remove_feature_part_from_track(title: str) -> str: