Merge pull request 'fix/bandcamp' (#12) from fix/bandcamp into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Reviewed-on: #12
This commit is contained in:
commit
cacff47643
@ -7,7 +7,8 @@ logging.getLogger().setLevel(logging.DEBUG)
|
||||
if __name__ == "__main__":
|
||||
commands = [
|
||||
"s: #a Ghost Bath",
|
||||
"4",
|
||||
"0",
|
||||
"d: 1",
|
||||
]
|
||||
|
||||
|
||||
|
@ -2,91 +2,30 @@ import music_kraken
|
||||
from music_kraken.objects import Song, Album, Artist, Collection
|
||||
|
||||
if __name__ == "__main__":
|
||||
artist: Artist = Artist(
|
||||
name="artist",
|
||||
main_album_list=[
|
||||
Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(
|
||||
title="song",
|
||||
album_list=[
|
||||
Album(
|
||||
title="album",
|
||||
albumsort=123,
|
||||
main_artist=Artist(name="artist"),
|
||||
),
|
||||
],
|
||||
),
|
||||
Song(
|
||||
title="other_song",
|
||||
album_list=[
|
||||
Album(title="album", albumsort=423),
|
||||
],
|
||||
),
|
||||
]
|
||||
),
|
||||
Album(title="album", barcode="1234567890123"),
|
||||
album_1 = Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(title="song", main_artist_list=[Artist(name="artist")]),
|
||||
],
|
||||
artist_list=[
|
||||
Artist(name="artist 3"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
other_artist: Artist = Artist(
|
||||
name="artist",
|
||||
main_album_list=[
|
||||
Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(
|
||||
title="song",
|
||||
album_list=[
|
||||
Album(
|
||||
title="album",
|
||||
albumsort=123,
|
||||
main_artist=Artist(name="other_artist"),
|
||||
),
|
||||
],
|
||||
),
|
||||
Song(
|
||||
title="other_song",
|
||||
album_list=[
|
||||
Album(title="album", albumsort=423),
|
||||
],
|
||||
),
|
||||
]
|
||||
),
|
||||
Album(title="album", barcode="1234567890123"),
|
||||
album_2 = Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(title="song", main_artist_list=[Artist(name="artist 2")]),
|
||||
],
|
||||
artist_list=[
|
||||
Artist(name="artist"),
|
||||
]
|
||||
)
|
||||
|
||||
artist.merge(other_artist)
|
||||
album_1.merge(album_2)
|
||||
|
||||
a = artist.main_album_collection[0]
|
||||
b = a.song_collection[0].album_collection[0]
|
||||
c = a.song_collection[1].album_collection[0]
|
||||
d = b.song_collection[0].album_collection[0]
|
||||
e = d.song_collection[0].album_collection[0]
|
||||
f = e.song_collection[0].album_collection[0]
|
||||
g = f.song_collection[0].album_collection[0]
|
||||
|
||||
print(a.id, a.title, a.barcode, a.albumsort)
|
||||
print(b.id, b.title, b.barcode, b.albumsort)
|
||||
print(c.id, c.title, c.barcode, c.albumsort)
|
||||
print(d.id, d.title, d.barcode, d.albumsort)
|
||||
print(e.id, e.title, e.barcode, e.albumsort)
|
||||
print(f.id, f.title, f.barcode, f.albumsort)
|
||||
print(g.id, g.title, g.barcode, g.albumsort)
|
||||
print()
|
||||
print(*(f"{a.title_string} ; {a.id}" for a in album_1.artist_collection.data), sep=" | ")
|
||||
|
||||
d.title = "new_title"
|
||||
|
||||
print(a.id, a.title, a.barcode, a.albumsort)
|
||||
print(b.id, b.title, b.barcode, b.albumsort)
|
||||
print(c.id, c.title, c.barcode, c.albumsort)
|
||||
print(d.id, d.title, d.barcode, d.albumsort)
|
||||
print(e.id, e.title, e.barcode, e.albumsort)
|
||||
print(f.id, f.title, f.barcode, f.albumsort)
|
||||
print(g.id, g.title, g.barcode, g.albumsort)
|
||||
print()
|
||||
|
||||
print(artist.main_album_collection._indexed_values)
|
||||
print(id(album_1.artist_collection), id(album_2.artist_collection))
|
||||
print(id(album_1.song_collection[0].main_artist_collection), id(album_2.song_collection[0].main_artist_collection))
|
@ -14,7 +14,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic,
|
||||
|
||||
|
||||
ALL_PAGES: Set[Type[Page]] = {
|
||||
EncyclopaediaMetallum,
|
||||
# EncyclopaediaMetallum,
|
||||
Musify,
|
||||
YoutubeMusic,
|
||||
Bandcamp
|
||||
|
@ -1,8 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union
|
||||
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any
|
||||
from .parents import OuterProxy
|
||||
from ..utils import object_trace
|
||||
|
||||
T = TypeVar('T', bound=OuterProxy)
|
||||
|
||||
@ -21,186 +22,62 @@ class Collection(Generic[T]):
|
||||
self,
|
||||
data: Optional[Iterable[T]] = None,
|
||||
sync_on_append: Dict[str, Collection] = None,
|
||||
contain_given_in_attribute: Dict[str, Collection] = None,
|
||||
contain_attribute_in_given: Dict[str, Collection] = None,
|
||||
append_object_to_attribute: Dict[str, T] = None
|
||||
append_object_to_attribute: Dict[str, T] = None,
|
||||
extend_object_to_attribute: Dict[str, Collection] = None,
|
||||
) -> None:
|
||||
self._collection_for: dict = dict()
|
||||
|
||||
self._contains_ids = set()
|
||||
self._data = []
|
||||
|
||||
self.parents: List[Collection[T]] = []
|
||||
self.children: List[Collection[T]] = []
|
||||
|
||||
# List of collection attributes that should be modified on append
|
||||
# Key: collection attribute (str) of appended element
|
||||
# Value: main collection to sync to
|
||||
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
|
||||
self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
|
||||
self.extend_object_to_attribute: Dict[str, Collection[T]] = extend_object_to_attribute or {}
|
||||
self.sync_on_append: Dict[str, Collection] = sync_on_append or {}
|
||||
|
||||
self._id_to_index_values: Dict[int, set] = defaultdict(set)
|
||||
self._indexed_values = defaultdict(lambda: None)
|
||||
self._indexed_to_objects = defaultdict(lambda: None)
|
||||
|
||||
# This is to cleanly unmap previously mapped items by their id
|
||||
self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict)
|
||||
# this is to keep track and look up the actual objects
|
||||
self._indexed_values: Dict[str, Dict[Any, T]] = defaultdict(dict)
|
||||
|
||||
self.extend(data)
|
||||
|
||||
def _map_element(self, __object: T, from_map: bool = False):
|
||||
self._contains_ids.add(__object.id)
|
||||
def __repr__(self) -> str:
|
||||
return f"Collection({id(self)})"
|
||||
|
||||
for name, value in (*__object.indexing_values, ('id', __object.id)):
|
||||
def _map_element(self, __object: T, from_map: bool = False):
|
||||
self._unmap_element(__object.id)
|
||||
|
||||
self._indexed_from_id[__object.id]["id"] = __object.id
|
||||
self._indexed_values["id"][__object.id] = __object
|
||||
|
||||
for name, value in __object.indexing_values:
|
||||
if value is None or value == __object._inner._default_values.get(name):
|
||||
continue
|
||||
|
||||
self._indexed_values[name] = value
|
||||
self._indexed_to_objects[value] = __object
|
||||
|
||||
self._id_to_index_values[__object.id].add((name, value))
|
||||
self._indexed_values[name][value] = __object
|
||||
self._indexed_from_id[__object.id][name] = value
|
||||
|
||||
def _unmap_element(self, __object: Union[T, int]):
|
||||
obj_id = __object.id if isinstance(__object, OuterProxy) else __object
|
||||
|
||||
if obj_id in self._contains_ids:
|
||||
self._contains_ids.remove(obj_id)
|
||||
|
||||
for name, value in self._id_to_index_values[obj_id]:
|
||||
if name in self._indexed_values:
|
||||
del self._indexed_values[name]
|
||||
if value in self._indexed_to_objects:
|
||||
del self._indexed_to_objects[value]
|
||||
|
||||
del self._id_to_index_values[obj_id]
|
||||
|
||||
def _contained_in_self(self, __object: T) -> bool:
|
||||
if __object.id in self._contains_ids:
|
||||
return True
|
||||
|
||||
for name, value in __object.indexing_values:
|
||||
if value is None:
|
||||
continue
|
||||
if value == self._indexed_values[name]:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List[Collection]:
|
||||
"""
|
||||
Gets the collection this object is found in, if it is found in any.
|
||||
|
||||
:param __object:
|
||||
:param break_at_first:
|
||||
:return:
|
||||
"""
|
||||
results = []
|
||||
|
||||
if self._contained_in_self(__object):
|
||||
return [self]
|
||||
|
||||
for collection in self.children:
|
||||
results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first))
|
||||
|
||||
if break_at_first:
|
||||
return results
|
||||
|
||||
return results
|
||||
|
||||
def _get_root_collections(self) -> List[Collection]:
|
||||
if not len(self.parents):
|
||||
return [self]
|
||||
|
||||
root_collections = []
|
||||
for upper_collection in self.parents:
|
||||
root_collections.extend(upper_collection._get_root_collections())
|
||||
return root_collections
|
||||
|
||||
@property
|
||||
def _is_root(self) -> bool:
|
||||
return len(self.parents) <= 0
|
||||
|
||||
def _get_parents_of_multiple_contained_children(self, __object: T):
|
||||
results = []
|
||||
if len(self.children) < 2 or self._contained_in_self(__object):
|
||||
return results
|
||||
|
||||
count = 0
|
||||
|
||||
for collection in self.children:
|
||||
sub_results = collection._get_parents_of_multiple_contained_children(__object)
|
||||
|
||||
if len(sub_results) > 0:
|
||||
count += 1
|
||||
results.extend(sub_results)
|
||||
|
||||
if count >= 2:
|
||||
results.append(self)
|
||||
|
||||
return results
|
||||
|
||||
def merge_into_self(self, __object: T, from_map: bool = False):
|
||||
"""
|
||||
1. find existing objects
|
||||
2. merge into existing object
|
||||
3. remap existing object
|
||||
"""
|
||||
if __object.id in self._contains_ids:
|
||||
if obj_id not in self._indexed_from_id:
|
||||
return
|
||||
|
||||
existing_object: T = None
|
||||
for name, value in self._indexed_from_id[obj_id].items():
|
||||
if value in self._indexed_values[name]:
|
||||
del self._indexed_values[name][value]
|
||||
|
||||
del self._indexed_from_id[obj_id]
|
||||
|
||||
def _find_object(self, __object: T) -> Optional[T]:
|
||||
for name, value in __object.indexing_values:
|
||||
if value is None:
|
||||
continue
|
||||
|
||||
if value == self._indexed_values[name]:
|
||||
existing_object = self._indexed_to_objects[value]
|
||||
if existing_object.id == __object.id:
|
||||
return None
|
||||
|
||||
break
|
||||
|
||||
if existing_object is None:
|
||||
return None
|
||||
|
||||
existing_object.merge(__object)
|
||||
|
||||
# just a check if it really worked
|
||||
if existing_object.id != __object.id:
|
||||
raise ValueError("This should NEVER happen. Merging doesn't work.")
|
||||
|
||||
self._map_element(existing_object, from_map=from_map)
|
||||
|
||||
def contains(self, __object: T) -> bool:
|
||||
return len(self._contained_in_sub(__object)) > 0
|
||||
|
||||
def _find_object_in_self(self, __object: T) -> Optional[T]:
|
||||
for name, value in __object.indexing_values:
|
||||
if value == self._indexed_values[name]:
|
||||
return self._indexed_to_objects[value]
|
||||
|
||||
def _find_object(self, __object: T, no_sibling: bool = False) -> Tuple[Collection[T], Optional[T]]:
|
||||
other_object = self._find_object_in_self(__object)
|
||||
if other_object is not None:
|
||||
return self, other_object
|
||||
|
||||
for c in self.children:
|
||||
o, other_object = c._find_object(__object)
|
||||
if other_object is not None:
|
||||
return o, other_object
|
||||
|
||||
if no_sibling:
|
||||
return self, None
|
||||
|
||||
"""
|
||||
# find in siblings and all children of siblings
|
||||
for parent in self.parents:
|
||||
for sibling in parent.children:
|
||||
if sibling is self:
|
||||
continue
|
||||
|
||||
o, other_object = sibling._find_object(__object, no_sibling=True)
|
||||
if other_object is not None:
|
||||
return o, other_object
|
||||
"""
|
||||
|
||||
return self, None
|
||||
if value in self._indexed_values[name]:
|
||||
return self._indexed_values[name][value]
|
||||
|
||||
def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False):
|
||||
"""
|
||||
@ -217,23 +94,32 @@ class Collection(Generic[T]):
|
||||
if __object is None:
|
||||
return
|
||||
|
||||
append_to, existing_object = self._find_object(__object)
|
||||
existing_object = self._find_object(__object)
|
||||
|
||||
if existing_object is None:
|
||||
# append
|
||||
append_to._data.append(__object)
|
||||
append_to._map_element(__object)
|
||||
self._data.append(__object)
|
||||
self._map_element(__object)
|
||||
|
||||
# only modify collections if the object actually has been appended
|
||||
for collection_attribute, child_collection in self.contain_given_in_attribute.items():
|
||||
__object.__getattribute__(collection_attribute).contain_collection_inside(child_collection, __object)
|
||||
for collection_attribute, child_collection in self.extend_object_to_attribute.items():
|
||||
__object.__getattribute__(collection_attribute).extend(child_collection)
|
||||
|
||||
for attribute, new_object in self.append_object_to_attribute.items():
|
||||
__object.__getattribute__(attribute).append(new_object)
|
||||
|
||||
for attribute, collection in self.sync_on_append.items():
|
||||
collection.extend(__object.__getattribute__(attribute))
|
||||
__object.__setattr__(attribute, collection)
|
||||
|
||||
# only modify collections if the object actually has been appended
|
||||
for attribute, a in self.sync_on_append.items():
|
||||
b = __object.__getattribute__(attribute)
|
||||
object_trace(f"Syncing [{a}{id(a)}] = [{b}{id(b)}]")
|
||||
|
||||
data_to_extend = b.data
|
||||
|
||||
a._collection_for.update(b._collection_for)
|
||||
for synced_with, key in b._collection_for.items():
|
||||
synced_with.__setattr__(key, a)
|
||||
|
||||
a.extend(data_to_extend)
|
||||
|
||||
|
||||
else:
|
||||
# merge only if the two objects are not the same
|
||||
@ -245,9 +131,9 @@ class Collection(Generic[T]):
|
||||
existing_object.merge(__object)
|
||||
|
||||
if existing_object.id != old_id:
|
||||
append_to._unmap_element(old_id)
|
||||
self._unmap_element(old_id)
|
||||
|
||||
append_to._map_element(existing_object)
|
||||
self._map_element(existing_object)
|
||||
|
||||
def extend(self, __iterable: Optional[Generator[T, None, None]]):
|
||||
if __iterable is None:
|
||||
@ -256,54 +142,22 @@ class Collection(Generic[T]):
|
||||
for __object in __iterable:
|
||||
self.append(__object)
|
||||
|
||||
def contain_collection_inside(self, sub_collection: Collection, _object: T):
|
||||
"""
|
||||
This collection will ALWAYS contain everything from the passed in collection
|
||||
"""
|
||||
if self is sub_collection or sub_collection in self.children:
|
||||
return
|
||||
|
||||
_object._inner._is_collection_child[self] = sub_collection
|
||||
_object._inner._is_collection_parent[sub_collection] = self
|
||||
|
||||
self.children.append(sub_collection)
|
||||
sub_collection.parents.append(self)
|
||||
|
||||
@property
|
||||
def data(self) -> List[T]:
|
||||
return list(self.__iter__())
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._data) + sum(len(collection) for collection in self.children)
|
||||
return len(self._data)
|
||||
|
||||
@property
|
||||
def empty(self) -> bool:
|
||||
return self.__len__() <= 0
|
||||
|
||||
def __iter__(self, finished_ids: set = None) -> Iterator[T]:
|
||||
_finished_ids = finished_ids or set()
|
||||
|
||||
for element in self._data:
|
||||
if element.id in _finished_ids:
|
||||
continue
|
||||
_finished_ids.add(element.id)
|
||||
yield element
|
||||
|
||||
for c in self.children:
|
||||
yield from c.__iter__(finished_ids=finished_ids)
|
||||
def __iter__(self) -> Iterator[T]:
|
||||
yield from self._data
|
||||
|
||||
def __merge__(self, __other: Collection, override: bool = False):
|
||||
self.extend(__other)
|
||||
|
||||
def __getitem__(self, item: int):
|
||||
if item < len(self._data):
|
||||
return self._data[item]
|
||||
|
||||
item = item - len(self._data)
|
||||
|
||||
for c in self.children:
|
||||
if item < len(c):
|
||||
return c.__getitem__(item)
|
||||
item = item - len(c._data)
|
||||
|
||||
raise IndexError
|
||||
return self._data[item]
|
||||
|
@ -1,5 +1,10 @@
|
||||
import mistune
|
||||
import html2markdown
|
||||
from markdownify import markdownify as md
|
||||
|
||||
|
||||
def plain_to_markdown(plain: str) -> str:
|
||||
return plain.replace("\n", " \n")
|
||||
|
||||
|
||||
class FormattedText:
|
||||
html = ""
|
||||
@ -7,12 +12,15 @@ class FormattedText:
|
||||
def __init__(
|
||||
self,
|
||||
markdown: str = None,
|
||||
html: str = None
|
||||
html: str = None,
|
||||
plain: str = None,
|
||||
) -> None:
|
||||
if html is not None:
|
||||
self.html = html
|
||||
elif markdown is not None:
|
||||
self.html = mistune.markdown(markdown)
|
||||
elif plain is not None:
|
||||
self.html = mistune.markdown(plain_to_markdown(plain))
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
@ -28,7 +36,7 @@ class FormattedText:
|
||||
|
||||
@property
|
||||
def markdown(self) -> str:
|
||||
return html2markdown.convert(self.html)
|
||||
return md(self.html).strip()
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.markdown
|
||||
|
@ -34,6 +34,6 @@ class Lyrics(OuterProxy):
|
||||
@property
|
||||
def metadata(self) -> Metadata:
|
||||
return Metadata({
|
||||
id3Mapping.UNSYNCED_LYRICS: [self.text.html]
|
||||
id3Mapping.UNSYNCED_LYRICS: [self.text.markdown]
|
||||
})
|
||||
|
||||
|
@ -32,19 +32,21 @@ class InnerData:
|
||||
|
||||
def __init__(self, object_type, **kwargs):
|
||||
self._refers_to_instances = set()
|
||||
self._fetched_from: dict = {}
|
||||
|
||||
# collection : collection that is a collection of self
|
||||
self._is_collection_child: Dict[Collection, Collection] = {}
|
||||
self._is_collection_parent: Dict[Collection, Collection] = {}
|
||||
|
||||
# initialize the default values
|
||||
self._default_values = {}
|
||||
for name, factory in object_type._default_factories.items():
|
||||
self._default_values[name] = factory()
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(value, "__is_collection__"):
|
||||
value._collection_for[self] = key
|
||||
self.__setattr__(key, value)
|
||||
|
||||
def __hash__(self):
|
||||
return self.id
|
||||
|
||||
def __merge__(self, __other: InnerData, override: bool = False):
|
||||
"""
|
||||
:param __other:
|
||||
@ -52,6 +54,8 @@ class InnerData:
|
||||
:return:
|
||||
"""
|
||||
|
||||
self._fetched_from.update(__other._fetched_from)
|
||||
|
||||
for key, value in __other.__dict__.copy().items():
|
||||
# just set the other value if self doesn't already have it
|
||||
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
|
||||
@ -85,7 +89,7 @@ class OuterProxy:
|
||||
def __init__(self, _id: int = None, dynamic: bool = False, **kwargs):
|
||||
_automatic_id: bool = False
|
||||
|
||||
if _id is None and not dynamic:
|
||||
if _id is None:
|
||||
"""
|
||||
generates a random integer id
|
||||
the range is defined in the config
|
||||
@ -109,11 +113,11 @@ class OuterProxy:
|
||||
|
||||
del kwargs[name]
|
||||
|
||||
self._fetched_from: dict = {}
|
||||
self._inner: InnerData = InnerData(type(self), **kwargs)
|
||||
self._inner._refers_to_instances.add(self)
|
||||
|
||||
object_trace(f"creating {type(self).__name__} [{self.title_string}]")
|
||||
|
||||
self.__init_collections__()
|
||||
|
||||
for name, data_list in collection_data.items():
|
||||
@ -192,19 +196,7 @@ class OuterProxy:
|
||||
if len(b._inner._refers_to_instances) > len(a._inner._refers_to_instances):
|
||||
a, b = b, a
|
||||
|
||||
object_trace(f"merging {type(a).__name__} [{a.title_string} | {a.id}] with {type(b).__name__} [{b.title_string} | {b.id}] called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]")
|
||||
|
||||
for collection, child_collection in b._inner._is_collection_child.items():
|
||||
try:
|
||||
collection.children.remove(child_collection)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for collection, parent_collection in b._inner._is_collection_parent.items():
|
||||
try:
|
||||
collection.parents.remove(parent_collection)
|
||||
except ValueError:
|
||||
pass
|
||||
object_trace(f"merging {type(a).__name__} [{a.title_string} | {a.id}] with {type(b).__name__} [{b.title_string} | {b.id}]")
|
||||
|
||||
old_inner = b._inner
|
||||
|
||||
@ -220,13 +212,13 @@ class OuterProxy:
|
||||
|
||||
def mark_as_fetched(self, *url_hash_list: List[str]):
|
||||
for url_hash in url_hash_list:
|
||||
self._fetched_from[url_hash] = {
|
||||
self._inner._fetched_from[url_hash] = {
|
||||
"time": get_unix_time(),
|
||||
"url": url_hash,
|
||||
}
|
||||
|
||||
def already_fetched_from(self, url_hash: str) -> bool:
|
||||
res = self._fetched_from.get(url_hash, None)
|
||||
res = self._inner._fetched_from.get(url_hash, None)
|
||||
|
||||
if res is None:
|
||||
return False
|
||||
|
@ -86,11 +86,6 @@ class Song(Base):
|
||||
TITEL = "title"
|
||||
|
||||
def __init_collections__(self) -> None:
|
||||
"""
|
||||
self.album_collection.contain_given_in_attribute = {
|
||||
"artist_collection": self.main_artist_collection,
|
||||
}
|
||||
"""
|
||||
self.album_collection.sync_on_append = {
|
||||
"artist_collection": self.main_artist_collection,
|
||||
}
|
||||
@ -98,8 +93,7 @@ class Song(Base):
|
||||
self.album_collection.append_object_to_attribute = {
|
||||
"song_collection": self,
|
||||
}
|
||||
|
||||
self.main_artist_collection.contain_given_in_attribute = {
|
||||
self.main_artist_collection.extend_object_to_attribute = {
|
||||
"main_album_collection": self.album_collection
|
||||
}
|
||||
self.feature_artist_collection.append_object_to_attribute = {
|
||||
@ -126,7 +120,7 @@ class Song(Base):
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
return [
|
||||
('id', self.id),
|
||||
('title', unify(self.unified_title)),
|
||||
('title', unify(self.title)),
|
||||
('isrc', self.isrc),
|
||||
*[('url', source.url) for source in self.source_collection]
|
||||
]
|
||||
@ -209,6 +203,7 @@ class Album(Base):
|
||||
notes: FormattedText
|
||||
|
||||
source_collection: SourceCollection
|
||||
|
||||
artist_collection: Collection[Artist]
|
||||
song_collection: Collection[Song]
|
||||
label_collection: Collection[Label]
|
||||
@ -258,7 +253,7 @@ class Album(Base):
|
||||
self.artist_collection.append_object_to_attribute = {
|
||||
"main_album_collection": self
|
||||
}
|
||||
self.artist_collection.contain_given_in_attribute = {
|
||||
self.artist_collection.extend_object_to_attribute = {
|
||||
"label_collection": self.label_collection
|
||||
}
|
||||
|
||||
@ -347,7 +342,6 @@ class Album(Base):
|
||||
tracksort_map[i] = existing_list.pop(0)
|
||||
tracksort_map[i].tracksort = i
|
||||
|
||||
|
||||
def compile(self, merge_into: bool = False):
|
||||
"""
|
||||
compiles the recursive structures,
|
||||
|
@ -18,10 +18,12 @@ from ..objects import (
|
||||
Contact,
|
||||
ID3Timestamp,
|
||||
Lyrics,
|
||||
FormattedText
|
||||
FormattedText,
|
||||
Artwork,
|
||||
)
|
||||
from ..connection import Connection
|
||||
from ..utils.support_classes.download_result import DownloadResult
|
||||
from ..utils.string_processing import clean_song_title
|
||||
from ..utils.config import main_settings, logging_settings
|
||||
from ..utils.shared import DEBUG
|
||||
|
||||
@ -114,7 +116,7 @@ class Bandcamp(Page):
|
||||
|
||||
if object_type is BandcampTypes.SONG:
|
||||
return Song(
|
||||
title=name.strip(),
|
||||
title=clean_song_title(name, artist_name=data["band_name"]),
|
||||
source_list=source_list,
|
||||
main_artist_list=[
|
||||
Artist(
|
||||
@ -252,11 +254,18 @@ class Bandcamp(Page):
|
||||
artist.source_collection.append(source)
|
||||
return artist
|
||||
|
||||
def _parse_track_element(self, track: dict) -> Optional[Song]:
|
||||
def _parse_track_element(self, track: dict, artwork: Artwork) -> Optional[Song]:
|
||||
lyrics_list: List[Lyrics] = []
|
||||
|
||||
_lyrics: Optional[str] = track.get("item", {}).get("recordingOf", {}).get("lyrics", {}).get("text")
|
||||
if _lyrics is not None:
|
||||
lyrics_list.append(Lyrics(text=FormattedText(plain=_lyrics)))
|
||||
|
||||
return Song(
|
||||
title=track["item"]["name"].strip(),
|
||||
title=clean_song_title(track["item"]["name"]),
|
||||
source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])],
|
||||
tracksort=int(track["position"])
|
||||
tracksort=int(track["position"]),
|
||||
artwork=artwork,
|
||||
)
|
||||
|
||||
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
|
||||
@ -289,12 +298,32 @@ class Bandcamp(Page):
|
||||
)]
|
||||
)
|
||||
|
||||
artwork: Artwork = Artwork()
|
||||
|
||||
def _get_artwork_url(_data: dict) -> Optional[str]:
|
||||
if "image" in _data:
|
||||
return _data["image"]
|
||||
for _property in _data.get("additionalProperty", []):
|
||||
if _property.get("name") == "art_id":
|
||||
return f"https://f4.bcbits.com/img/a{_property.get('value')}_2.jpg"
|
||||
|
||||
_artwork_url = _get_artwork_url(data)
|
||||
if _artwork_url is not None:
|
||||
artwork.append(url=_artwork_url, width=350, height=350)
|
||||
else:
|
||||
for album_release in data.get("albumRelease", []):
|
||||
_artwork_url = _get_artwork_url(album_release)
|
||||
if _artwork_url is not None:
|
||||
artwork.append(url=_artwork_url, width=350, height=350)
|
||||
break
|
||||
|
||||
|
||||
for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])):
|
||||
if DEBUG:
|
||||
dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False)
|
||||
|
||||
try:
|
||||
album.song_collection.append(self._parse_track_element(track_json))
|
||||
album.song_collection.append(self._parse_track_element(track_json, artwork=artwork))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
@ -304,7 +333,6 @@ class Bandcamp(Page):
|
||||
def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]:
|
||||
track_lyrics = soup.find("div", {"class": "lyricsText"})
|
||||
if track_lyrics:
|
||||
self.LOGGER.debug(" Lyrics retrieved..")
|
||||
return [Lyrics(text=FormattedText(html=track_lyrics.prettify()))]
|
||||
|
||||
return []
|
||||
@ -323,10 +351,9 @@ class Bandcamp(Page):
|
||||
if len(other_data_list) > 0:
|
||||
other_data = json.loads(other_data_list[0]["data-tralbum"])
|
||||
|
||||
if DEBUG:
|
||||
dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
|
||||
dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
|
||||
dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)
|
||||
dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
|
||||
dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
|
||||
dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)
|
||||
|
||||
data = json.loads(data_container.text)
|
||||
album_data = data["inAlbum"]
|
||||
@ -337,8 +364,8 @@ class Bandcamp(Page):
|
||||
mp3_url = value
|
||||
|
||||
song = Song(
|
||||
title=data["name"].strip(),
|
||||
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
|
||||
title=clean_song_title(data["name"], artist_name=artist_data["name"]),
|
||||
source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
|
||||
album_list=[Album(
|
||||
title=album_data["name"].strip(),
|
||||
date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"),
|
||||
@ -351,8 +378,6 @@ class Bandcamp(Page):
|
||||
lyrics_list=self._fetch_lyrics(soup=soup)
|
||||
)
|
||||
|
||||
song.source_collection.append(source)
|
||||
|
||||
return song
|
||||
|
||||
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
|
||||
|
@ -2,8 +2,9 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
import json
|
||||
import logging
|
||||
import inspect
|
||||
|
||||
from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE
|
||||
from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK
|
||||
from .config import config, read_config, write_config
|
||||
from .enums.colors import BColors
|
||||
from .path_manager import LOCATIONS
|
||||
@ -56,7 +57,8 @@ def object_trace(obj):
|
||||
if not DEBUG_OBJECT_TRACE:
|
||||
return
|
||||
|
||||
output("object: " + str(obj), BColors.GREY)
|
||||
appendix = f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
|
||||
output("object: " + str(obj) + appendix, BColors.GREY)
|
||||
|
||||
|
||||
"""
|
||||
|
@ -13,12 +13,13 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"):
|
||||
__stage__ = os.getenv("STAGE", "prod")
|
||||
|
||||
DEBUG = (__stage__ == "dev") and True
|
||||
DEBUG_LOGGING = DEBUG and True
|
||||
DEBUG_LOGGING = DEBUG and False
|
||||
DEBUG_TRACE = DEBUG and True
|
||||
DEBUG_OBJECT_TRACE = DEBUG and False
|
||||
DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False
|
||||
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
|
||||
DEBUG_PAGES = DEBUG and False
|
||||
DEBUG_DUMP = DEBUG and True
|
||||
DEBUG_DUMP = DEBUG and False
|
||||
|
||||
if DEBUG:
|
||||
print("DEBUG ACTIVE")
|
||||
|
@ -1,6 +1,7 @@
|
||||
from typing import Tuple, Union
|
||||
from typing import Tuple, Union, Optional
|
||||
from pathlib import Path
|
||||
import string
|
||||
from functools import lru_cache
|
||||
|
||||
from transliterate.exceptions import LanguageDetectionError
|
||||
from transliterate import translit
|
||||
@ -10,8 +11,11 @@ from pathvalidate import sanitize_filename
|
||||
COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
|
||||
"(official video)",
|
||||
)
|
||||
OPEN_BRACKETS = "(["
|
||||
CLOSE_BRACKETS = ")]"
|
||||
DISALLOWED_SUBSTRING_IN_BRACKETS = ("official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat.")
|
||||
|
||||
|
||||
@lru_cache
|
||||
def unify(string: str) -> str:
|
||||
"""
|
||||
returns a unified str, to make comparisons easy.
|
||||
@ -52,7 +56,8 @@ def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
|
||||
return fit_string(string)
|
||||
|
||||
|
||||
def clean_song_title(raw_song_title: str, artist_name: str) -> str:
|
||||
@lru_cache(maxsize=128)
|
||||
def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> str:
|
||||
"""
|
||||
This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
|
||||
|
||||
@ -64,19 +69,45 @@ def clean_song_title(raw_song_title: str, artist_name: str) -> str:
|
||||
- `song (prod. some producer)`
|
||||
"""
|
||||
raw_song_title = raw_song_title.strip()
|
||||
artist_name = artist_name.strip()
|
||||
|
||||
# Clean official Video appendix
|
||||
for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
|
||||
if raw_song_title.lower().endswith(dirty_appendix):
|
||||
raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()
|
||||
|
||||
# Remove artist from the start of the title
|
||||
if raw_song_title.lower().startswith(artist_name.lower()):
|
||||
raw_song_title = raw_song_title[len(artist_name):].strip()
|
||||
# remove brackets and their content if they contain disallowed substrings
|
||||
for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS):
|
||||
if open_bracket not in raw_song_title or close_bracket not in raw_song_title:
|
||||
continue
|
||||
|
||||
start = 0
|
||||
|
||||
if raw_song_title.startswith("-"):
|
||||
raw_song_title = raw_song_title[1:].strip()
|
||||
while True:
|
||||
try:
|
||||
open_bracket_index = raw_song_title.index(open_bracket, start)
|
||||
except ValueError:
|
||||
break
|
||||
try:
|
||||
close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1)
|
||||
except ValueError:
|
||||
break
|
||||
|
||||
substring = raw_song_title[open_bracket_index + 1:close_bracket_index]
|
||||
if any(disallowed_substring in substring for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS):
|
||||
raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:]
|
||||
else:
|
||||
start = close_bracket_index + 1
|
||||
|
||||
# everything that requires the artist name
|
||||
if artist_name is not None:
|
||||
artist_name = artist_name.strip()
|
||||
|
||||
# Remove artist from the start of the title
|
||||
if raw_song_title.lower().startswith(artist_name.lower()):
|
||||
raw_song_title = raw_song_title[len(artist_name):].strip()
|
||||
|
||||
if raw_song_title.startswith("-"):
|
||||
raw_song_title = raw_song_title[1:].strip()
|
||||
|
||||
return raw_song_title.strip()
|
||||
|
||||
|
@ -56,6 +56,7 @@ dependencies = [
|
||||
|
||||
"rich~=13.7.1",
|
||||
"mistune~=3.0.2",
|
||||
"markdownify~=0.12.1",
|
||||
"html2markdown~=0.1.7",
|
||||
"jellyfish~=0.9.0",
|
||||
"transliterate~=1.10.2",
|
||||
|
@ -70,7 +70,49 @@ class TestCollection(unittest.TestCase):
|
||||
self.assertTrue(a.name == b.name == c.name == d.name == "artist")
|
||||
self.assertTrue(a.country == b.country == c.country == d.country)
|
||||
|
||||
"""
|
||||
def test_artist_artist_relation(self):
|
||||
artist = Artist(
|
||||
name="artist",
|
||||
main_album_list=[
|
||||
Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(title="song"),
|
||||
],
|
||||
artist_list=[
|
||||
Artist(name="artist"),
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
self.assertTrue(artist.id == artist.main_album_collection[0].song_collection[0].main_artist_collection[0].id)
|
||||
|
||||
def test_artist_collection_sync(self):
|
||||
album_1 = Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(title="song", main_artist_list=[Artist(name="artist")]),
|
||||
],
|
||||
artist_list=[
|
||||
Artist(name="artist"),
|
||||
]
|
||||
)
|
||||
|
||||
album_2 = Album(
|
||||
title="album",
|
||||
song_list=[
|
||||
Song(title="song", main_artist_list=[Artist(name="artist")]),
|
||||
],
|
||||
artist_list=[
|
||||
Artist(name="artist"),
|
||||
]
|
||||
)
|
||||
|
||||
album_1.merge(album_2)
|
||||
|
||||
self.assertTrue(id(album_1.artist_collection) == id(album_1.artist_collection) == id(album_1.song_collection[0].main_artist_collection) == id(album_1.song_collection[0].main_artist_collection))
|
||||
|
||||
def test_song_artist_relations(self):
|
||||
a = self.complicated_object()
|
||||
b = a.main_album_collection[0].song_collection[0].main_artist_collection[0]
|
||||
@ -80,7 +122,6 @@ class TestCollection(unittest.TestCase):
|
||||
self.assertTrue(a.id == b.id == c.id == d.id)
|
||||
self.assertTrue(a.name == b.name == c.name == d.name == "artist")
|
||||
self.assertTrue(a.country == b.country == c.country == d.country)
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user