ahhh fuck me :(

This commit is contained in:
Hellow2 2023-03-21 12:46:32 +01:00
parent 0f47cdadb8
commit c648a330e0
5 changed files with 142 additions and 14 deletions

View File

@ -5,7 +5,8 @@ from . import (
parents, parents,
formatted_text, formatted_text,
album, album,
option option,
collection
) )
MusicObject = parents.DatabaseObject MusicObject = parents.DatabaseObject
@ -31,3 +32,4 @@ Album = song.Album
FormattedText = formatted_text.FormattedText FormattedText = formatted_text.FormattedText
Options = option.Options Options = option.Options
Collection = collection.Collection

View File

@ -0,0 +1,110 @@
from collections import defaultdict
from typing import Dict, List, Optional
import weakref
from .parents import DatabaseObject
"""
This is a cache for the objects, that et pulled out of the database.
This is necessary, to not have duplicate objects with the same id.
Using a cache that maps the ojects to their id has multiple benefits:
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
- less ram usage
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
"""
class ObjectCache:
"""
ObjectCache is a cache for the objects retrieved from a database.
It maps each object to its id and uses weak references to manage its memory usage.
Using a cache for these objects provides several benefits:
- Modifying an object updates all objects with the same id (due to copy by reference)
- Reduced memory usage
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
:method exists: Check if a DatabaseObject already exists in the cache.
:method append: Add a DatabaseObject to the cache if it does not already exist.
:method extent: Add a list of DatabaseObjects to the cache.
:method remove: Remove a DatabaseObject from the cache by its id.
:method get: Retrieve a DatabaseObject from the cache by its id. """
object_to_id: Dict[str, DatabaseObject]
weakref_map: Dict[weakref.ref, str]
def __init__(self) -> None:
self.object_to_id = dict()
self.weakref_map = defaultdict()
def exists(self, database_object: DatabaseObject) -> bool:
"""
Check if a DatabaseObject with the same id already exists in the cache.
:param database_object: The DatabaseObject to check for.
:return: True if the DatabaseObject exists, False otherwise.
"""
if database_object.dynamic:
return True
return database_object.id in self.object_to_id
def on_death(self, weakref_: weakref.ref) -> None:
"""
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
This function removes the DatabaseObject from the cache.
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
"""
data_id = self.weakref_map.pop(weakref_)
self.object_to_id.pop(data_id)
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
return weakref.ref(database_object, self.on_death)
def append(self, database_object: DatabaseObject) -> bool:
"""
Add a DatabaseObject to the cache.
:param database_object: The DatabaseObject to add to the cache.
:return: True if the DatabaseObject already exists in the cache, False otherwise.
"""
if self.exists(database_object):
return True
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
self.object_to_id[database_object.id] = database_object
return False
def extent(self, database_object_list: List[DatabaseObject]):
"""
adjacent to the extent method of list, this appends n Object
"""
for database_object in database_object_list:
self.append(database_object)
def remove(self, _id: str):
"""
Remove a DatabaseObject from the cache.
:param _id: The id of the DatabaseObject to remove from the cache.
"""
data = self.object_to_id.get(_id)
if data:
self.weakref_map.pop(weakref.ref(data))
self.object_to_id.pop(_id)
def __getitem__(self, item) -> Optional[DatabaseObject]:
"""
this returns the data obj
:param item: the id of the music object
:return:
"""
return self.object_to_id.get(item)
def get(self, _id: str) -> Optional[DatabaseObject]:
return self.__getitem__(_id)

View File

@ -50,7 +50,7 @@ class Collection:
self._used_ids.add(element.id) self._used_ids.add(element.id)
def append(self, element: DatabaseObject, merge_on_conflict: bool = True): def append(self, element: DatabaseObject, merge_on_conflict: bool = True) -> DatabaseObject:
""" """
:param element: :param element:
:param merge_on_conflict: :param merge_on_conflict:
@ -63,17 +63,20 @@ class Collection:
for name, value in element.indexing_values: for name, value in element.indexing_values:
if value in self._attribute_to_object_map[name]: if value in self._attribute_to_object_map[name]:
existing_object = self._attribute_to_object_map[name][value]
if merge_on_conflict: if merge_on_conflict:
# if the object does already exist # if the object does already exist
# thus merging and don't add it afterwards # thus merging and don't add it afterwards
existing_object = self._attribute_to_object_map[name][value]
existing_object.merge(element) existing_object.merge(element)
# in case any relevant data has been added (e.g. it remaps the old object) # in case any relevant data has been added (e.g. it remaps the old object)
self.map_element(existing_object) self.map_element(existing_object)
return return existing_object
self._data.append(element) self._data.append(element)
self.map_element(element) self.map_element(element)
return element
def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True): def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True):
for element in element_list: for element in element_list:

View File

@ -15,15 +15,24 @@ from ..objects import (
Target, Target,
MusicObject, MusicObject,
Options, Options,
SourcePages SourcePages,
Collection
) )
class PageCache(Collection):
def clear(self):
self.__init__(element_type=self.element_type)
class Page: class Page:
""" """
This is an abstract class, laying out the This is an abstract class, laying out the
functionality for every other class fetching something functionality for every other class fetching something
""" """
SONG_CACHE = PageCache(element_type=Song)
ALBUM_CACHE = PageCache(element_type=Album)
ARTIST_CACHE = PageCache(element_type=Artist)
API_SESSION: requests.Session = requests.Session() API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = shared.proxies API_SESSION.proxies = shared.proxies
@ -151,6 +160,10 @@ class Page:
tracklist of every album of the artist. tracklist of every album of the artist.
:return detailed_music_object: IT MODIFIES THE INPUT OBJ :return detailed_music_object: IT MODIFIES THE INPUT OBJ
""" """
cls.ARTIST_CACHE.clear()
cls.ALBUM_CACHE.clear()
cls.SONG_CACHE.clear()
if type(music_object) == Song: if type(music_object) == Song:
song = cls.fetch_song_details(music_object, flat=flat) song = cls.fetch_song_details(music_object, flat=flat)

View File

@ -151,11 +151,11 @@ class Musify(Page):
artist_thumbnail = image_soup.get("src") artist_thumbnail = image_soup.get("src")
return Artist( return cls.ARTIST_CACHE.append(Artist(
_id=_id, _id=_id,
name=name, name=name,
source_list=source_list source_list=source_list
) ))
@classmethod @classmethod
def parse_album_contact(cls, contact: BeautifulSoup) -> Album: def parse_album_contact(cls, contact: BeautifulSoup) -> Album:
@ -257,13 +257,13 @@ class Musify(Page):
else: else:
LOGGER.warning("got an unequal ammount than 3 small elements") LOGGER.warning("got an unequal ammount than 3 small elements")
return Album( return cls.ALBUM_CACHE.append(Album(
_id=_id, _id=_id,
title=title, title=title,
source_list=source_list, source_list=source_list,
date=ID3Timestamp(year=year), date=ID3Timestamp(year=year),
artist_list=artist_list artist_list=artist_list
) ))
@classmethod @classmethod
def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]: def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]:
@ -535,14 +535,14 @@ class Musify(Page):
else: else:
LOGGER.debug("there is not even 1 footer in the album card") LOGGER.debug("there is not even 1 footer in the album card")
return Album( return cls.ALBUM_CACHE.append(Album(
_id=_id, _id=_id,
title=name, title=name,
source_list=source_list, source_list=source_list,
date=timestamp, date=timestamp,
album_type=album_type, album_type=album_type,
album_status=album_status album_status=album_status
) ))
@classmethod @classmethod
def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]: def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]:
@ -700,13 +700,13 @@ class Musify(Page):
if note_soup is not None: if note_soup is not None:
notes.html = note_soup.decode_contents() notes.html = note_soup.decode_contents()
return Artist( return cls.ARTIST_CACHE.append(Artist(
_id=url.musify_id, _id=url.musify_id,
name=name, name=name,
country=country, country=country,
source_list=source_list, source_list=source_list,
notes=notes notes=notes
) ))
@classmethod @classmethod
def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist: def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
@ -842,7 +842,7 @@ class Musify(Page):
_artist_name = meta_artist_name_text _artist_name = meta_artist_name_text
if _artist_name is not None or _artist_src is not None: if _artist_name is not None or _artist_src is not None:
artist_list.append(Artist(name=_artist_name, source_list=_artist_src)) artist_list.append(cls.ARTIST_CACHE.append(Artist(name=_artist_name, source_list=_artist_src)))
return Song( return Song(
title=song_name, title=song_name,