diff --git a/src/music_kraken/objects/__init__.py b/src/music_kraken/objects/__init__.py index 03907fc..80e4064 100644 --- a/src/music_kraken/objects/__init__.py +++ b/src/music_kraken/objects/__init__.py @@ -5,7 +5,8 @@ from . import ( parents, formatted_text, album, - option + option, + collection ) MusicObject = parents.DatabaseObject @@ -31,3 +32,4 @@ Album = song.Album FormattedText = formatted_text.FormattedText Options = option.Options +Collection = collection.Collection diff --git a/src/music_kraken/objects/cache.py b/src/music_kraken/objects/cache.py new file mode 100644 index 0000000..181a13c --- /dev/null +++ b/src/music_kraken/objects/cache.py @@ -0,0 +1,110 @@ +from collections import defaultdict +from typing import Dict, List, Optional +import weakref + +from .parents import DatabaseObject + +""" +This is a cache for the objects, that et pulled out of the database. +This is necessary, to not have duplicate objects with the same id. + +Using a cache that maps the ojects to their id has multiple benefits: + - if you modify the object at any point, all objects with the same id get modified *(copy by reference)* + - less ram usage + - to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work +""" + + +class ObjectCache: + """ + ObjectCache is a cache for the objects retrieved from a database. + It maps each object to its id and uses weak references to manage its memory usage. + Using a cache for these objects provides several benefits: + + - Modifying an object updates all objects with the same id (due to copy by reference) + - Reduced memory usage + + :attr object_to_id: Dictionary that maps DatabaseObjects to their id. + :attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values. + + :method exists: Check if a DatabaseObject already exists in the cache. + :method append: Add a DatabaseObject to the cache if it does not already exist. + :method extent: Add a list of DatabaseObjects to the cache. + :method remove: Remove a DatabaseObject from the cache by its id. + :method get: Retrieve a DatabaseObject from the cache by its id. """ + object_to_id: Dict[str, DatabaseObject] + weakref_map: Dict[weakref.ref, str] + + def __init__(self) -> None: + self.object_to_id = dict() + self.weakref_map = defaultdict() + + def exists(self, database_object: DatabaseObject) -> bool: + """ + Check if a DatabaseObject with the same id already exists in the cache. + + :param database_object: The DatabaseObject to check for. + :return: True if the DatabaseObject exists, False otherwise. + """ + if database_object.dynamic: + return True + return database_object.id in self.object_to_id + + def on_death(self, weakref_: weakref.ref) -> None: + """ + Callback function that gets triggered when the reference count of a DatabaseObject drops to 0. + This function removes the DatabaseObject from the cache. + + :param weakref_: The weak reference of the DatabaseObject that has been garbage collected. + """ + data_id = self.weakref_map.pop(weakref_) + self.object_to_id.pop(data_id) + + def get_weakref(self, database_object: DatabaseObject) -> weakref.ref: + return weakref.ref(database_object, self.on_death) + + + def append(self, database_object: DatabaseObject) -> bool: + """ + Add a DatabaseObject to the cache. + + :param database_object: The DatabaseObject to add to the cache. + :return: True if the DatabaseObject already exists in the cache, False otherwise. + """ + if self.exists(database_object): + return True + + self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id + self.object_to_id[database_object.id] = database_object + + return False + + def extent(self, database_object_list: List[DatabaseObject]): + """ + adjacent to the extent method of list, this appends n Object + """ + for database_object in database_object_list: + self.append(database_object) + + def remove(self, _id: str): + """ + Remove a DatabaseObject from the cache. + + :param _id: The id of the DatabaseObject to remove from the cache. + """ + data = self.object_to_id.get(_id) + if data: + self.weakref_map.pop(weakref.ref(data)) + self.object_to_id.pop(_id) + + def __getitem__(self, item) -> Optional[DatabaseObject]: + """ + this returns the data obj + :param item: the id of the music object + :return: + """ + + return self.object_to_id.get(item) + + def get(self, _id: str) -> Optional[DatabaseObject]: + return self.__getitem__(_id) diff --git a/src/music_kraken/objects/collection.py b/src/music_kraken/objects/collection.py index d70617b..841f47e 100644 --- a/src/music_kraken/objects/collection.py +++ b/src/music_kraken/objects/collection.py @@ -50,7 +50,7 @@ class Collection: self._used_ids.add(element.id) - def append(self, element: DatabaseObject, merge_on_conflict: bool = True): + def append(self, element: DatabaseObject, merge_on_conflict: bool = True) -> DatabaseObject: """ :param element: :param merge_on_conflict: @@ -63,17 +63,20 @@ class Collection: for name, value in element.indexing_values: if value in self._attribute_to_object_map[name]: + existing_object = self._attribute_to_object_map[name][value] + if merge_on_conflict: # if the object does already exist # thus merging and don't add it afterwards - existing_object = self._attribute_to_object_map[name][value] existing_object.merge(element) # in case any relevant data has been added (e.g. it remaps the old object) self.map_element(existing_object) - return + return existing_object self._data.append(element) self.map_element(element) + + return element def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True): for element in element_list: diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index 83fae1a..dfd0030 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -15,15 +15,24 @@ from ..objects import ( Target, MusicObject, Options, - SourcePages + SourcePages, + Collection ) +class PageCache(Collection): + def clear(self): + self.__init__(element_type=self.element_type) + + class Page: """ This is an abstract class, laying out the functionality for every other class fetching something """ + SONG_CACHE = PageCache(element_type=Song) + ALBUM_CACHE = PageCache(element_type=Album) + ARTIST_CACHE = PageCache(element_type=Artist) API_SESSION: requests.Session = requests.Session() API_SESSION.proxies = shared.proxies @@ -151,6 +160,10 @@ class Page: tracklist of every album of the artist. :return detailed_music_object: IT MODIFIES THE INPUT OBJ """ + + cls.ARTIST_CACHE.clear() + cls.ALBUM_CACHE.clear() + cls.SONG_CACHE.clear() if type(music_object) == Song: song = cls.fetch_song_details(music_object, flat=flat) diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index bdf6f4c..7604dfb 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -151,11 +151,11 @@ class Musify(Page): artist_thumbnail = image_soup.get("src") - return Artist( + return cls.ARTIST_CACHE.append(Artist( _id=_id, name=name, source_list=source_list - ) + )) @classmethod def parse_album_contact(cls, contact: BeautifulSoup) -> Album: @@ -257,13 +257,13 @@ class Musify(Page): else: LOGGER.warning("got an unequal ammount than 3 small elements") - return Album( + return cls.ALBUM_CACHE.append(Album( _id=_id, title=title, source_list=source_list, date=ID3Timestamp(year=year), artist_list=artist_list - ) + )) @classmethod def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: @@ -535,14 +535,14 @@ class Musify(Page): else: LOGGER.debug("there is not even 1 footer in the album card") - return Album( + return cls.ALBUM_CACHE.append(Album( _id=_id, title=name, source_list=source_list, date=timestamp, album_type=album_type, album_status=album_status - ) + )) @classmethod def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]: @@ -700,13 +700,13 @@ class Musify(Page): if note_soup is not None: notes.html = note_soup.decode_contents() - return Artist( + return cls.ARTIST_CACHE.append(Artist( _id=url.musify_id, name=name, country=country, source_list=source_list, notes=notes - ) + )) @classmethod def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: @@ -842,7 +842,7 @@ class Musify(Page): _artist_name = meta_artist_name_text if _artist_name is not None or _artist_src is not None: - artist_list.append(Artist(name=_artist_name, source_list=_artist_src)) + artist_list.append(cls.ARTIST_CACHE.append(Artist(name=_artist_name, source_list=_artist_src))) return Song( title=song_name,