Merge pull request 'fix/bandcamp' (#12 ) from fix/bandcamp into experimental

Reviewed-on: #12
feat: dynamic objects now also have ids
2024-04-23 10:04:51 +00:00 · 2024-04-23 11:52:08 +02:00 · 2024-04-23 11:44:39 +02:00 · 2024-04-23 11:39:25 +02:00 · 2024-04-23 11:37:49 +02:00 · 2024-04-23 09:19:06 +02:00
14 changed files with 238 additions and 349 deletions
--- a/development/actual_donwload.py
+++ b/development/actual_donwload.py
@@ -7,7 +7,8 @@ logging.getLogger().setLevel(logging.DEBUG)
 if __name__ == "__main__":
    commands = [
        "s: #a Ghost Bath",
-        "4",
+        "0",
+        "d: 1",
    ]

    
--- a/development/objects_collection.py
+++ b/development/objects_collection.py
@@ -2,91 +2,30 @@ import music_kraken
 from music_kraken.objects import Song, Album, Artist, Collection

 if __name__ == "__main__":
-    artist: Artist = Artist(
-        name="artist",
-        main_album_list=[
-            Album(
-                title="album",
-                song_list=[
-                    Song(
-                        title="song",
-                        album_list=[
-                            Album(
-                                title="album", 
-                                albumsort=123,
-                                main_artist=Artist(name="artist"),
-                            ),
-                        ],
-                    ),
-                    Song(
-                        title="other_song",
-                        album_list=[
-                            Album(title="album", albumsort=423),
-                        ],
-                    ),
-                ]
-            ),
-            Album(title="album", barcode="1234567890123"),
+    album_1 = Album(
+        title="album",
+        song_list=[
+            Song(title="song", main_artist_list=[Artist(name="artist")]),
+        ],
+        artist_list=[
+            Artist(name="artist 3"),
        ]
    )

-
-    other_artist: Artist = Artist(
-        name="artist",
-        main_album_list=[
-            Album(
-                title="album",
-                song_list=[
-                    Song(
-                        title="song",
-                        album_list=[
-                            Album(
-                                title="album", 
-                                albumsort=123,
-                                main_artist=Artist(name="other_artist"),
-                            ),
-                        ],
-                    ),
-                    Song(
-                        title="other_song",
-                        album_list=[
-                            Album(title="album", albumsort=423),
-                        ],
-                    ),
-                ]
-            ),
-            Album(title="album", barcode="1234567890123"),
+    album_2 = Album(
+        title="album",
+        song_list=[
+            Song(title="song", main_artist_list=[Artist(name="artist 2")]),
+        ],
+        artist_list=[
+            Artist(name="artist"),
        ]
    )

-    artist.merge(other_artist)
+    album_1.merge(album_2)

-    a = artist.main_album_collection[0]
-    b = a.song_collection[0].album_collection[0]
-    c = a.song_collection[1].album_collection[0]
-    d = b.song_collection[0].album_collection[0]
-    e = d.song_collection[0].album_collection[0]
-    f = e.song_collection[0].album_collection[0]
-    g = f.song_collection[0].album_collection[0]
-
-    print(a.id, a.title, a.barcode, a.albumsort)
-    print(b.id, b.title, b.barcode, b.albumsort)
-    print(c.id, c.title, c.barcode, c.albumsort)
-    print(d.id, d.title, d.barcode, d.albumsort)
-    print(e.id, e.title, e.barcode, e.albumsort)
-    print(f.id, f.title, f.barcode, f.albumsort)
-    print(g.id, g.title, g.barcode, g.albumsort)
    print()
+    print(*(f"{a.title_string} ; {a.id}" for a in album_1.artist_collection.data), sep=" | ")

-    d.title = "new_title"
-
-    print(a.id, a.title, a.barcode, a.albumsort)
-    print(b.id, b.title, b.barcode, b.albumsort)
-    print(c.id, c.title, c.barcode, c.albumsort)
-    print(d.id, d.title, d.barcode, d.albumsort)
-    print(e.id, e.title, e.barcode, e.albumsort)
-    print(f.id, f.title, f.barcode, f.albumsort)
-    print(g.id, g.title, g.barcode, g.albumsort)
-    print()
-
-    print(artist.main_album_collection._indexed_values)
+    print(id(album_1.artist_collection), id(album_2.artist_collection))
+    print(id(album_1.song_collection[0].main_artist_collection), id(album_2.song_collection[0].main_artist_collection))
--- a/music_kraken/download/page_attributes.py
+++ b/music_kraken/download/page_attributes.py
@@ -14,7 +14,7 @@ from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic,


 ALL_PAGES: Set[Type[Page]] = {
-    EncyclopaediaMetallum,
+    # EncyclopaediaMetallum,
    Musify,
    YoutubeMusic,
    Bandcamp
--- a/music_kraken/objects/collection.py
+++ b/music_kraken/objects/collection.py
@@ -1,8 +1,9 @@
 from __future__ import annotations

 from collections import defaultdict
-from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union
+from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any
 from .parents import OuterProxy
+from ..utils import object_trace

 T = TypeVar('T', bound=OuterProxy)

@@ -21,186 +22,62 @@ class Collection(Generic[T]):
            self,
            data: Optional[Iterable[T]] = None,
            sync_on_append: Dict[str, Collection] = None,
-            contain_given_in_attribute: Dict[str, Collection] = None,
-            contain_attribute_in_given: Dict[str, Collection] = None,
-            append_object_to_attribute: Dict[str, T] = None
+            append_object_to_attribute: Dict[str, T] = None,
+            extend_object_to_attribute: Dict[str, Collection] = None,
    ) -> None:
+        self._collection_for: dict = dict()
+
        self._contains_ids = set()
        self._data = []

-        self.parents: List[Collection[T]] = []
-        self.children: List[Collection[T]] = []
-
        # List of collection attributes that should be modified on append
        # Key: collection attribute (str) of appended element
        # Value: main collection to sync to
-        self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
        self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
+        self.extend_object_to_attribute: Dict[str, Collection[T]] = extend_object_to_attribute or {}
        self.sync_on_append: Dict[str, Collection] = sync_on_append or {}

        self._id_to_index_values: Dict[int, set] = defaultdict(set)
-        self._indexed_values = defaultdict(lambda: None)
-        self._indexed_to_objects = defaultdict(lambda: None)
+        
+        # This is to cleanly unmap previously mapped items by their id
+        self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict)
+        # this is to keep track and look up the actual objects
+        self._indexed_values: Dict[str, Dict[Any, T]] = defaultdict(dict)

        self.extend(data)

-    def _map_element(self, __object: T, from_map: bool = False):
-        self._contains_ids.add(__object.id)
+    def __repr__(self) -> str:
+        return f"Collection({id(self)})"

-        for name, value in (*__object.indexing_values, ('id', __object.id)):
+    def _map_element(self, __object: T, from_map: bool = False):
+        self._unmap_element(__object.id)
+
+        self._indexed_from_id[__object.id]["id"] = __object.id
+        self._indexed_values["id"][__object.id] = __object
+
+        for name, value in __object.indexing_values:
            if value is None or value == __object._inner._default_values.get(name):
                continue

-            self._indexed_values[name] = value
-            self._indexed_to_objects[value] = __object
-
-            self._id_to_index_values[__object.id].add((name, value))
+            self._indexed_values[name][value] = __object
+            self._indexed_from_id[__object.id][name] = value

    def _unmap_element(self, __object: Union[T, int]):
        obj_id = __object.id if isinstance(__object, OuterProxy) else __object

-        if obj_id in self._contains_ids:
-            self._contains_ids.remove(obj_id)
-
-        for name, value in self._id_to_index_values[obj_id]:
-            if name in self._indexed_values:
-                del self._indexed_values[name]
-            if value in self._indexed_to_objects:
-                del self._indexed_to_objects[value]
-
-        del self._id_to_index_values[obj_id]
-
-    def _contained_in_self(self, __object: T) -> bool:
-        if __object.id in self._contains_ids:
-            return True
-
-        for name, value in __object.indexing_values:
-            if value is None:
-                continue
-            if value == self._indexed_values[name]:
-                return True
-        return False
-
-    def _contained_in_sub(self, __object: T, break_at_first: bool = True) -> List[Collection]:
-        """
-        Gets the collection this object is found in, if it is found in any.
-
-        :param __object:
-        :param break_at_first:
-        :return:
-        """
-        results = []
-
-        if self._contained_in_self(__object):
-            return [self]
-
-        for collection in self.children:
-            results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first))
-
-            if break_at_first:
-                return results
-
-        return results
-
-    def _get_root_collections(self) -> List[Collection]:
-        if not len(self.parents):
-            return [self]
-
-        root_collections = []
-        for upper_collection in self.parents:
-            root_collections.extend(upper_collection._get_root_collections())
-        return root_collections
-
-    @property
-    def _is_root(self) -> bool:
-        return len(self.parents) <= 0
-
-    def _get_parents_of_multiple_contained_children(self, __object: T):
-        results = []
-        if len(self.children) < 2 or self._contained_in_self(__object):
-            return results
-
-        count = 0
-
-        for collection in self.children:
-            sub_results = collection._get_parents_of_multiple_contained_children(__object)
-
-            if len(sub_results) > 0:
-                count += 1
-                results.extend(sub_results)
-
-        if count >= 2:
-            results.append(self)
-
-        return results
-
-    def merge_into_self(self, __object: T, from_map: bool = False):
-        """
-        1. find existing objects
-        2. merge into existing object
-        3. remap existing object
-        """
-        if __object.id in self._contains_ids:
+        if obj_id not in self._indexed_from_id:
            return

-        existing_object: T = None
+        for name, value in self._indexed_from_id[obj_id].items():
+            if value in self._indexed_values[name]:
+                del self._indexed_values[name][value]

+        del self._indexed_from_id[obj_id]
+
+    def _find_object(self, __object: T) -> Optional[T]:
        for name, value in __object.indexing_values:
-            if value is None:
-                continue
-
-            if value == self._indexed_values[name]:
-                existing_object = self._indexed_to_objects[value]
-                if existing_object.id == __object.id:
-                    return None
-
-                break
-
-        if existing_object is None:
-            return None
-
-        existing_object.merge(__object)
-
-        # just a check if it really worked
-        if existing_object.id != __object.id:
-            raise ValueError("This should NEVER happen. Merging doesn't work.")
-
-        self._map_element(existing_object, from_map=from_map)
-
-    def contains(self, __object: T) -> bool:
-        return len(self._contained_in_sub(__object)) > 0
-
-    def _find_object_in_self(self, __object: T) -> Optional[T]:
-        for name, value in __object.indexing_values:
-            if value == self._indexed_values[name]:
-                return self._indexed_to_objects[value]
-
-    def _find_object(self, __object: T, no_sibling: bool = False) -> Tuple[Collection[T], Optional[T]]:
-        other_object = self._find_object_in_self(__object)
-        if other_object is not None:
-            return self, other_object
-
-        for c in self.children:
-            o, other_object = c._find_object(__object)
-            if other_object is not None:
-                return o, other_object
-
-        if no_sibling:
-            return self, None
-
-        """
-        # find in siblings and all children of siblings
-        for parent in self.parents:
-            for sibling in parent.children:
-                if sibling is self:
-                    continue
-
-                o, other_object = sibling._find_object(__object, no_sibling=True)
-                if other_object is not None:
-                    return o, other_object
-        """
-
-        return self, None
+            if value in self._indexed_values[name]:
+                return self._indexed_values[name][value]

    def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False):
        """
@@ -217,23 +94,32 @@ class Collection(Generic[T]):
        if __object is None:
            return

-        append_to, existing_object = self._find_object(__object)
+        existing_object = self._find_object(__object)

        if existing_object is None:
            # append
-            append_to._data.append(__object)
-            append_to._map_element(__object)
+            self._data.append(__object)
+            self._map_element(__object)

-            # only modify collections if the object actually has been appended
-            for collection_attribute, child_collection in self.contain_given_in_attribute.items():
-                __object.__getattribute__(collection_attribute).contain_collection_inside(child_collection, __object)
+            for collection_attribute, child_collection in self.extend_object_to_attribute.items():
+                __object.__getattribute__(collection_attribute).extend(child_collection)

            for attribute, new_object in self.append_object_to_attribute.items():
                __object.__getattribute__(attribute).append(new_object)
-            
-            for attribute, collection in self.sync_on_append.items():
-                collection.extend(__object.__getattribute__(attribute))
-                __object.__setattr__(attribute, collection)
+
+            # only modify collections if the object actually has been appended
+            for attribute, a in self.sync_on_append.items():
+                b = __object.__getattribute__(attribute)
+                object_trace(f"Syncing [{a}{id(a)}] = [{b}{id(b)}]")
+
+                data_to_extend = b.data
+
+                a._collection_for.update(b._collection_for)
+                for synced_with, key in b._collection_for.items():
+                    synced_with.__setattr__(key, a)
+
+                a.extend(data_to_extend)
+

        else:
            # merge only if the two objects are not the same
@@ -245,9 +131,9 @@ class Collection(Generic[T]):
            existing_object.merge(__object)

            if existing_object.id != old_id:
-                append_to._unmap_element(old_id)
+                self._unmap_element(old_id)

-            append_to._map_element(existing_object)
+            self._map_element(existing_object)            

    def extend(self, __iterable: Optional[Generator[T, None, None]]):
        if __iterable is None:
@@ -256,54 +142,22 @@ class Collection(Generic[T]):
        for __object in __iterable:
            self.append(__object)

-    def contain_collection_inside(self, sub_collection: Collection, _object: T):
-        """
-        This collection will ALWAYS contain everything from the passed in collection
-        """
-        if self is sub_collection or sub_collection in self.children:
-            return
-
-        _object._inner._is_collection_child[self] = sub_collection
-        _object._inner._is_collection_parent[sub_collection] = self
-
-        self.children.append(sub_collection)
-        sub_collection.parents.append(self)
-
    @property
    def data(self) -> List[T]:
        return list(self.__iter__())

    def __len__(self) -> int:
-        return len(self._data) + sum(len(collection) for collection in self.children)
+        return len(self._data)

    @property
    def empty(self) -> bool:
        return self.__len__() <= 0

-    def __iter__(self, finished_ids: set = None) -> Iterator[T]:
-        _finished_ids = finished_ids or set()
-
-        for element in self._data:
-            if element.id in _finished_ids:
-                continue
-            _finished_ids.add(element.id)
-            yield element
-
-        for c in self.children:
-            yield from c.__iter__(finished_ids=finished_ids)
+    def __iter__(self) -> Iterator[T]:
+        yield from self._data

    def __merge__(self, __other: Collection, override: bool = False):
        self.extend(__other)

    def __getitem__(self, item: int):
-        if item < len(self._data):
-            return self._data[item]
-
-        item = item - len(self._data)
-
-        for c in self.children:
-            if item < len(c):
-                return c.__getitem__(item)
-            item = item - len(c._data)
-
-        raise IndexError
+        return self._data[item]
--- a/music_kraken/objects/formatted_text.py
+++ b/music_kraken/objects/formatted_text.py
@@ -1,5 +1,10 @@
 import mistune
-import html2markdown
+from markdownify import markdownify as md
+
+
+def plain_to_markdown(plain: str) -> str:
+    return plain.replace("\n", "  \n")
+

 class FormattedText:    
    html = ""
@@ -7,12 +12,15 @@ class FormattedText:
    def __init__(
            self,
            markdown: str = None,
-            html: str = None
+            html: str = None,
+            plain: str = None,
    ) -> None:
        if html is not None:
            self.html = html
        elif markdown is not None:
            self.html = mistune.markdown(markdown)
+        elif plain is not None:
+            self.html = mistune.markdown(plain_to_markdown(plain))

    @property
    def is_empty(self) -> bool:
@@ -28,7 +36,7 @@ class FormattedText:

    @property
    def markdown(self) -> str:
-        return html2markdown.convert(self.html)
+        return md(self.html).strip()

    def __str__(self) -> str:
        return self.markdown
--- a/music_kraken/objects/lyrics.py
+++ b/music_kraken/objects/lyrics.py
@@ -34,6 +34,6 @@ class Lyrics(OuterProxy):
    @property
    def metadata(self) -> Metadata:
        return Metadata({
-            id3Mapping.UNSYNCED_LYRICS: [self.text.html]
+            id3Mapping.UNSYNCED_LYRICS: [self.text.markdown]
        })

--- a/music_kraken/objects/parents.py
+++ b/music_kraken/objects/parents.py
@@ -32,19 +32,21 @@ class InnerData:

    def __init__(self, object_type, **kwargs):
        self._refers_to_instances = set()
+        self._fetched_from: dict = {}

-        # collection : collection that is a collection of self
-        self._is_collection_child: Dict[Collection, Collection] = {}
-        self._is_collection_parent: Dict[Collection, Collection] = {}
-        
        # initialize the default values
        self._default_values = {}
        for name, factory in object_type._default_factories.items():
            self._default_values[name] = factory()

        for key, value in kwargs.items():
+            if hasattr(value, "__is_collection__"):
+                value._collection_for[self] = key
            self.__setattr__(key, value)

+    def __hash__(self):
+        return self.id
+
    def __merge__(self, __other: InnerData, override: bool = False):
        """
        :param __other:
@@ -52,6 +54,8 @@ class InnerData:
        :return:
        """

+        self._fetched_from.update(__other._fetched_from)
+
        for key, value in __other.__dict__.copy().items():
            # just set the other value if self doesn't already have it
            if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
@@ -85,7 +89,7 @@ class OuterProxy:
    def __init__(self, _id: int = None, dynamic: bool = False, **kwargs):
        _automatic_id: bool = False

-        if _id is None and not dynamic:
+        if _id is None:
            """
            generates a random integer id
            the range is defined in the config
@@ -109,11 +113,11 @@ class OuterProxy:

                del kwargs[name]

-        self._fetched_from: dict = {}
        self._inner: InnerData = InnerData(type(self), **kwargs)
        self._inner._refers_to_instances.add(self)

        object_trace(f"creating {type(self).__name__} [{self.title_string}]")
+
        self.__init_collections__()

        for name, data_list in collection_data.items():
@@ -192,19 +196,7 @@ class OuterProxy:
        if len(b._inner._refers_to_instances) > len(a._inner._refers_to_instances):
            a, b = b, a

-        object_trace(f"merging {type(a).__name__} [{a.title_string} | {a.id}] with {type(b).__name__} [{b.title_string} | {b.id}] called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]")
-        
-        for collection, child_collection in b._inner._is_collection_child.items():
-            try:
-                collection.children.remove(child_collection)
-            except ValueError:
-                pass
-            
-        for collection, parent_collection in b._inner._is_collection_parent.items():
-            try:
-                collection.parents.remove(parent_collection)
-            except ValueError:
-                pass
+        object_trace(f"merging {type(a).__name__} [{a.title_string} | {a.id}] with {type(b).__name__} [{b.title_string} | {b.id}]")

        old_inner = b._inner

@@ -220,13 +212,13 @@ class OuterProxy:

    def mark_as_fetched(self, *url_hash_list: List[str]):
        for url_hash in url_hash_list:
-            self._fetched_from[url_hash] = {
+            self._inner._fetched_from[url_hash] = {
                "time": get_unix_time(),
                "url": url_hash,
            }

    def already_fetched_from(self, url_hash: str) -> bool:
-        res = self._fetched_from.get(url_hash, None)
+        res = self._inner._fetched_from.get(url_hash, None)

        if res is None:
            return False
--- a/music_kraken/objects/song.py
+++ b/music_kraken/objects/song.py
@@ -86,11 +86,6 @@ class Song(Base):
    TITEL = "title"

    def __init_collections__(self) -> None:
-        """
-        self.album_collection.contain_given_in_attribute = {
-            "artist_collection": self.main_artist_collection,
-        }
-        """
        self.album_collection.sync_on_append = {
            "artist_collection": self.main_artist_collection,
        }
@@ -98,8 +93,7 @@ class Song(Base):
        self.album_collection.append_object_to_attribute = {
            "song_collection": self,
        }
-
-        self.main_artist_collection.contain_given_in_attribute = {
+        self.main_artist_collection.extend_object_to_attribute = {
            "main_album_collection": self.album_collection
        }
        self.feature_artist_collection.append_object_to_attribute = {
@@ -126,7 +120,7 @@ class Song(Base):
    def indexing_values(self) -> List[Tuple[str, object]]:
        return [
            ('id', self.id),
-            ('title', unify(self.unified_title)),
+            ('title', unify(self.title)),
            ('isrc', self.isrc),
            *[('url', source.url) for source in self.source_collection]
        ]
@@ -209,6 +203,7 @@ class Album(Base):
    notes: FormattedText

    source_collection: SourceCollection
+
    artist_collection: Collection[Artist]
    song_collection: Collection[Song]
    label_collection: Collection[Label]
@@ -258,7 +253,7 @@ class Album(Base):
        self.artist_collection.append_object_to_attribute = {
            "main_album_collection": self
        }
-        self.artist_collection.contain_given_in_attribute = {
+        self.artist_collection.extend_object_to_attribute = {
            "label_collection": self.label_collection
        }

@@ -347,7 +342,6 @@ class Album(Base):
                tracksort_map[i] = existing_list.pop(0)
                tracksort_map[i].tracksort = i

-
    def compile(self, merge_into: bool = False):
        """
        compiles the recursive structures,
--- a/music_kraken/pages/bandcamp.py
+++ b/music_kraken/pages/bandcamp.py
@@ -18,10 +18,12 @@ from ..objects import (
    Contact,
    ID3Timestamp,
    Lyrics,
-    FormattedText
+    FormattedText,
+    Artwork,
 )
 from ..connection import Connection
 from ..utils.support_classes.download_result import DownloadResult
+from ..utils.string_processing import clean_song_title
 from ..utils.config import main_settings, logging_settings
 from ..utils.shared import DEBUG

@@ -114,7 +116,7 @@ class Bandcamp(Page):

        if object_type is BandcampTypes.SONG:
            return Song(
-                title=name.strip(),
+                title=clean_song_title(name, artist_name=data["band_name"]),
                source_list=source_list,
                main_artist_list=[
                    Artist(
@@ -252,11 +254,18 @@ class Bandcamp(Page):
        artist.source_collection.append(source)
        return artist

-    def _parse_track_element(self, track: dict) -> Optional[Song]:
+    def _parse_track_element(self, track: dict, artwork: Artwork) -> Optional[Song]:
+        lyrics_list: List[Lyrics] = []
+
+        _lyrics: Optional[str] = track.get("item", {}).get("recordingOf", {}).get("lyrics", {}).get("text")
+        if _lyrics is not None:
+            lyrics_list.append(Lyrics(text=FormattedText(plain=_lyrics)))
+
        return Song(
-            title=track["item"]["name"].strip(),
+            title=clean_song_title(track["item"]["name"]),
            source_list=[Source(self.SOURCE_TYPE, track["item"]["mainEntityOfPage"])],
-            tracksort=int(track["position"])
+            tracksort=int(track["position"]),
+            artwork=artwork,
        )

    def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
@@ -289,12 +298,32 @@ class Bandcamp(Page):
            )]
        )

+        artwork: Artwork = Artwork()
+
+        def _get_artwork_url(_data: dict) -> Optional[str]:
+            if "image" in _data:
+                return _data["image"]
+            for _property in _data.get("additionalProperty", []):
+                if _property.get("name") == "art_id":
+                    return f"https://f4.bcbits.com/img/a{_property.get('value')}_2.jpg"
+
+        _artwork_url = _get_artwork_url(data)
+        if _artwork_url is not None:
+            artwork.append(url=_artwork_url, width=350, height=350)
+        else:
+            for album_release in data.get("albumRelease", []):
+                _artwork_url = _get_artwork_url(album_release)
+                if _artwork_url is not None:
+                    artwork.append(url=_artwork_url, width=350, height=350)
+                    break
+
+
        for i, track_json in enumerate(data.get("track", {}).get("itemListElement", [])):
            if DEBUG:
                dump_to_file(f"album_track_{i}.json", json.dumps(track_json), is_json=True, exit_after_dump=False)

            try:
-                album.song_collection.append(self._parse_track_element(track_json))
+                album.song_collection.append(self._parse_track_element(track_json, artwork=artwork))
            except KeyError:
                continue

@@ -304,7 +333,6 @@ class Bandcamp(Page):
    def _fetch_lyrics(self, soup: BeautifulSoup) -> List[Lyrics]:
        track_lyrics = soup.find("div", {"class": "lyricsText"})
        if track_lyrics:
-            self.LOGGER.debug(" Lyrics retrieved..")
            return [Lyrics(text=FormattedText(html=track_lyrics.prettify()))]

        return []
@@ -323,10 +351,9 @@ class Bandcamp(Page):
        if len(other_data_list) > 0:
            other_data = json.loads(other_data_list[0]["data-tralbum"])

-        if DEBUG:
-            dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
-            dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
-            dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)
+        dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
+        dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
+        dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)

        data = json.loads(data_container.text)
        album_data = data["inAlbum"]
@@ -337,8 +364,8 @@ class Bandcamp(Page):
            mp3_url = value

        song = Song(
-            title=data["name"].strip(),
-            source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
+            title=clean_song_title(data["name"], artist_name=artist_data["name"]),
+            source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
            album_list=[Album(
                title=album_data["name"].strip(),
                date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"),
@@ -351,8 +378,6 @@ class Bandcamp(Page):
            lyrics_list=self._fetch_lyrics(soup=soup)
        )

-        song.source_collection.append(source)
-
        return song

    def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
--- a/music_kraken/utils/init.py
+++ b/music_kraken/utils/init.py
@@ -2,8 +2,9 @@ from datetime import datetime
 from pathlib import Path
 import json
 import logging
+import inspect

-from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE
+from .shared import DEBUG, DEBUG_LOGGING, DEBUG_DUMP, DEBUG_TRACE, DEBUG_OBJECT_TRACE, DEBUG_OBJECT_TRACE_CALLSTACK
 from .config import config, read_config, write_config
 from .enums.colors import BColors
 from .path_manager import LOCATIONS
@@ -56,7 +57,8 @@ def object_trace(obj):
    if not DEBUG_OBJECT_TRACE:
        return

-    output("object: " + str(obj), BColors.GREY)
+    appendix =  f" called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]" if DEBUG_OBJECT_TRACE_CALLSTACK else ""
+    output("object: " + str(obj) + appendix, BColors.GREY)


 """
--- a/music_kraken/utils/shared.py
+++ b/music_kraken/utils/shared.py
@@ -13,12 +13,13 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"):
 __stage__ = os.getenv("STAGE", "prod")

 DEBUG = (__stage__ == "dev") and True
-DEBUG_LOGGING = DEBUG and True
+DEBUG_LOGGING = DEBUG and False
 DEBUG_TRACE = DEBUG and True
 DEBUG_OBJECT_TRACE = DEBUG and False
+DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False
 DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
 DEBUG_PAGES = DEBUG and False
-DEBUG_DUMP = DEBUG and True
+DEBUG_DUMP = DEBUG and False

 if DEBUG:
    print("DEBUG ACTIVE")
--- a/music_kraken/utils/string_processing.py
+++ b/music_kraken/utils/string_processing.py
@@ -1,6 +1,7 @@
-from typing import Tuple, Union
+from typing import Tuple, Union, Optional
 from pathlib import Path
 import string
+from functools import lru_cache

 from transliterate.exceptions import LanguageDetectionError
 from transliterate import translit
@@ -10,8 +11,11 @@ from pathvalidate import sanitize_filename
 COMMON_TITLE_APPENDIX_LIST: Tuple[str, ...] = (
    "(official video)",
 )
+OPEN_BRACKETS = "(["
+CLOSE_BRACKETS = ")]"
+DISALLOWED_SUBSTRING_IN_BRACKETS = ("official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat.")

-
+@lru_cache
 def unify(string: str) -> str:
    """
    returns a unified str, to make comparisons easy.
@@ -52,7 +56,8 @@ def fit_to_file_system(string: Union[str, Path]) -> Union[str, Path]:
        return fit_string(string)


-def clean_song_title(raw_song_title: str, artist_name: str) -> str:
+@lru_cache(maxsize=128)
+def clean_song_title(raw_song_title: str, artist_name: Optional[str] = None) -> str:
    """
    This function cleans common naming "conventions" for non clean song titles, like the title of youtube videos
    
@@ -64,19 +69,45 @@ def clean_song_title(raw_song_title: str, artist_name: str) -> str:
    - `song (prod. some producer)`
    """
    raw_song_title = raw_song_title.strip()
-    artist_name = artist_name.strip()

    # Clean official Video appendix
    for dirty_appendix in COMMON_TITLE_APPENDIX_LIST:
        if raw_song_title.lower().endswith(dirty_appendix):
            raw_song_title = raw_song_title[:-len(dirty_appendix)].strip()

-    # Remove artist from the start of the title
-    if raw_song_title.lower().startswith(artist_name.lower()):
-        raw_song_title = raw_song_title[len(artist_name):].strip()
+    # remove brackets and their content if they contain disallowed substrings
+    for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS):
+        if open_bracket not in raw_song_title or close_bracket not in raw_song_title:
+            continue
+        
+        start = 0

-        if raw_song_title.startswith("-"):
-            raw_song_title = raw_song_title[1:].strip()
+        while True:
+            try:
+                open_bracket_index = raw_song_title.index(open_bracket, start)
+            except ValueError:
+                break
+            try:
+                close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1)
+            except ValueError:
+                break
+
+            substring = raw_song_title[open_bracket_index + 1:close_bracket_index]
+            if any(disallowed_substring in substring for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS):
+                raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:]
+            else:
+                start = close_bracket_index + 1
+
+    # everything that requires the artist name
+    if artist_name is not None:
+        artist_name = artist_name.strip()
+
+        # Remove artist from the start of the title
+        if raw_song_title.lower().startswith(artist_name.lower()):
+            raw_song_title = raw_song_title[len(artist_name):].strip()
+
+            if raw_song_title.startswith("-"):
+                raw_song_title = raw_song_title[1:].strip()

    return raw_song_title.strip()

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ dependencies = [

    "rich~=13.7.1",
    "mistune~=3.0.2",
+    "markdownify~=0.12.1",
    "html2markdown~=0.1.7",
    "jellyfish~=0.9.0",
    "transliterate~=1.10.2",
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -70,7 +70,49 @@ class TestCollection(unittest.TestCase):
        self.assertTrue(a.name == b.name == c.name == d.name == "artist")
        self.assertTrue(a.country == b.country == c.country == d.country)

-    """
+    def test_artist_artist_relation(self):
+        artist = Artist(
+            name="artist",
+            main_album_list=[
+                Album(
+                    title="album",
+                    song_list=[
+                        Song(title="song"),
+                    ],
+                    artist_list=[
+                        Artist(name="artist"),
+                    ]
+                )
+            ]
+        )
+
+        self.assertTrue(artist.id == artist.main_album_collection[0].song_collection[0].main_artist_collection[0].id)
+
+    def test_artist_collection_sync(self):
+        album_1 = Album(
+            title="album",
+            song_list=[
+                Song(title="song", main_artist_list=[Artist(name="artist")]),
+            ],
+            artist_list=[
+                Artist(name="artist"),
+            ]
+        )
+
+        album_2 = Album(
+            title="album",
+            song_list=[
+                Song(title="song", main_artist_list=[Artist(name="artist")]),
+            ],
+            artist_list=[
+                Artist(name="artist"),
+            ]
+        )
+
+        album_1.merge(album_2)
+
+        self.assertTrue(id(album_1.artist_collection) == id(album_1.artist_collection) == id(album_1.song_collection[0].main_artist_collection) == id(album_1.song_collection[0].main_artist_collection))
+
    def test_song_artist_relations(self):
        a = self.complicated_object()
        b = a.main_album_collection[0].song_collection[0].main_artist_collection[0]
@@ -80,7 +122,6 @@ class TestCollection(unittest.TestCase):
        self.assertTrue(a.id == b.id == c.id == d.id)
        self.assertTrue(a.name == b.name == c.name == d.name == "artist")
        self.assertTrue(a.country == b.country == c.country == d.country)
-    """

 if __name__ == "__main__":
    unittest.main()
Author	SHA1	Message	Date
Hazel	cacff47643	Merge pull request 'fix/bandcamp' (#12 ) from fix/bandcamp into experimental All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details Reviewed-on: #12	2024-04-23 10:04:51 +00:00
Lars Noack	0179246ec0	feat: dynamic objects now also have ids All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful Details	2024-04-23 11:52:08 +02:00
Lars Noack	3d432cd0d7	fix: test All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-23 11:44:39 +02:00
Lars Noack	0080a48e70	feat: removed legacy code	2024-04-23 11:39:25 +02:00
Lars Noack	ea5adfbe8a	feat: limited complexity of collection by removing child collections	2024-04-23 11:37:49 +02:00
Lars Noack	fa723d7747	feat: removed redundand collection functions All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-23 09:19:06 +02:00
Lars Noack	312e57d82e	feat: progress All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 17:48:42 +02:00
Lars Noack	a998e52cd9	fix: syncing All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 17:45:49 +02:00
Lars Noack	b4c73d56a7	feat: improved tracing All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 14:43:09 +02:00
Lars Noack	1735ff4e1d	feat: removed redundand commands from song	2024-04-19 14:16:31 +02:00
Lars Noack	be09562632	feat: stripped whitespaces from lyrics All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 14:05:05 +02:00
Lars Noack	29770825a4	fix: unified wrong attribute in song, causing many duplicates All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 13:54:08 +02:00
Lars Noack	81708ba100	feat: switched to a more readable markdown converter All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 13:51:08 +02:00
Lars Noack	301ff82bcf	feat: implemented the merging from where it has been fetched from All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 13:37:12 +02:00
Lars Noack	06ffae06a6	fix: lyrics should be embedded in the markdown format rather than html	2024-04-19 12:57:34 +02:00
Lars Noack	919a99885c	feat: disabled debugging pages All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 12:47:36 +02:00
Lars Noack	e20b14a9df	feat: added fetching artworks to bandcamp All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 12:37:14 +02:00
Lars Noack	b933c6ac14	feat: improved the lyrics support for bandcamp All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 12:17:08 +02:00
Lars Noack	3c5bbc19af	feat: some slight performance improvements All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 12:04:13 +02:00
Lars Noack	06acf22abb	feat: improved the cleaning song title function to remove redundand brackets All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 12:02:54 +02:00
Lars Noack	1e62d371cd	feat: cleaned bandcamp songs All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-04-19 11:46:56 +02:00
Lars Noack	24a90f1cdf	feat: artist name in clean song title is optional	2024-04-19 11:43:21 +02:00
Lars Noack	d9c711a2f8	feat: added lru cache to unify function to speed up indexing	2024-04-19 11:40:00 +02:00