ahhh fuck me :(

This commit is contained in:
Hellow2 2023-03-21 12:46:32 +01:00
parent 0f47cdadb8
commit c648a330e0
5 changed files with 142 additions and 14 deletions

View File

@ -5,7 +5,8 @@ from . import (
parents,
formatted_text,
album,
option
option,
collection
)
MusicObject = parents.DatabaseObject
@ -31,3 +32,4 @@ Album = song.Album
FormattedText = formatted_text.FormattedText
Options = option.Options
Collection = collection.Collection

View File

@ -0,0 +1,110 @@
from collections import defaultdict
from typing import Dict, List, Optional
import weakref
from .parents import DatabaseObject
"""
This is a cache for the objects, that et pulled out of the database.
This is necessary, to not have duplicate objects with the same id.
Using a cache that maps the ojects to their id has multiple benefits:
- if you modify the object at any point, all objects with the same id get modified *(copy by reference)*
- less ram usage
- to further decrease ram usage I only store weak refs and not a strong reference, for the gc to still work
"""
class ObjectCache:
"""
ObjectCache is a cache for the objects retrieved from a database.
It maps each object to its id and uses weak references to manage its memory usage.
Using a cache for these objects provides several benefits:
- Modifying an object updates all objects with the same id (due to copy by reference)
- Reduced memory usage
:attr object_to_id: Dictionary that maps DatabaseObjects to their id.
:attr weakref_map: Dictionary that uses weak references to DatabaseObjects as keys and their id as values.
:method exists: Check if a DatabaseObject already exists in the cache.
:method append: Add a DatabaseObject to the cache if it does not already exist.
:method extent: Add a list of DatabaseObjects to the cache.
:method remove: Remove a DatabaseObject from the cache by its id.
:method get: Retrieve a DatabaseObject from the cache by its id. """
object_to_id: Dict[str, DatabaseObject]
weakref_map: Dict[weakref.ref, str]
def __init__(self) -> None:
self.object_to_id = dict()
self.weakref_map = defaultdict()
def exists(self, database_object: DatabaseObject) -> bool:
"""
Check if a DatabaseObject with the same id already exists in the cache.
:param database_object: The DatabaseObject to check for.
:return: True if the DatabaseObject exists, False otherwise.
"""
if database_object.dynamic:
return True
return database_object.id in self.object_to_id
def on_death(self, weakref_: weakref.ref) -> None:
"""
Callback function that gets triggered when the reference count of a DatabaseObject drops to 0.
This function removes the DatabaseObject from the cache.
:param weakref_: The weak reference of the DatabaseObject that has been garbage collected.
"""
data_id = self.weakref_map.pop(weakref_)
self.object_to_id.pop(data_id)
def get_weakref(self, database_object: DatabaseObject) -> weakref.ref:
return weakref.ref(database_object, self.on_death)
def append(self, database_object: DatabaseObject) -> bool:
"""
Add a DatabaseObject to the cache.
:param database_object: The DatabaseObject to add to the cache.
:return: True if the DatabaseObject already exists in the cache, False otherwise.
"""
if self.exists(database_object):
return True
self.weakref_map[weakref.ref(database_object, self.on_death)] = database_object.id
self.object_to_id[database_object.id] = database_object
return False
def extent(self, database_object_list: List[DatabaseObject]):
"""
adjacent to the extent method of list, this appends n Object
"""
for database_object in database_object_list:
self.append(database_object)
def remove(self, _id: str):
"""
Remove a DatabaseObject from the cache.
:param _id: The id of the DatabaseObject to remove from the cache.
"""
data = self.object_to_id.get(_id)
if data:
self.weakref_map.pop(weakref.ref(data))
self.object_to_id.pop(_id)
def __getitem__(self, item) -> Optional[DatabaseObject]:
"""
this returns the data obj
:param item: the id of the music object
:return:
"""
return self.object_to_id.get(item)
def get(self, _id: str) -> Optional[DatabaseObject]:
return self.__getitem__(_id)

View File

@ -50,7 +50,7 @@ class Collection:
self._used_ids.add(element.id)
def append(self, element: DatabaseObject, merge_on_conflict: bool = True):
def append(self, element: DatabaseObject, merge_on_conflict: bool = True) -> DatabaseObject:
"""
:param element:
:param merge_on_conflict:
@ -63,17 +63,20 @@ class Collection:
for name, value in element.indexing_values:
if value in self._attribute_to_object_map[name]:
existing_object = self._attribute_to_object_map[name][value]
if merge_on_conflict:
# if the object does already exist
# thus merging and don't add it afterwards
existing_object = self._attribute_to_object_map[name][value]
existing_object.merge(element)
# in case any relevant data has been added (e.g. it remaps the old object)
self.map_element(existing_object)
return
return existing_object
self._data.append(element)
self.map_element(element)
return element
def extend(self, element_list: Iterable[DatabaseObject], merge_on_conflict: bool = True):
for element in element_list:

View File

@ -15,15 +15,24 @@ from ..objects import (
Target,
MusicObject,
Options,
SourcePages
SourcePages,
Collection
)
class PageCache(Collection):
def clear(self):
self.__init__(element_type=self.element_type)
class Page:
"""
This is an abstract class, laying out the
functionality for every other class fetching something
"""
SONG_CACHE = PageCache(element_type=Song)
ALBUM_CACHE = PageCache(element_type=Album)
ARTIST_CACHE = PageCache(element_type=Artist)
API_SESSION: requests.Session = requests.Session()
API_SESSION.proxies = shared.proxies
@ -151,6 +160,10 @@ class Page:
tracklist of every album of the artist.
:return detailed_music_object: IT MODIFIES THE INPUT OBJ
"""
cls.ARTIST_CACHE.clear()
cls.ALBUM_CACHE.clear()
cls.SONG_CACHE.clear()
if type(music_object) == Song:
song = cls.fetch_song_details(music_object, flat=flat)

View File

@ -151,11 +151,11 @@ class Musify(Page):
artist_thumbnail = image_soup.get("src")
return Artist(
return cls.ARTIST_CACHE.append(Artist(
_id=_id,
name=name,
source_list=source_list
)
))
@classmethod
def parse_album_contact(cls, contact: BeautifulSoup) -> Album:
@ -257,13 +257,13 @@ class Musify(Page):
else:
LOGGER.warning("got an unequal ammount than 3 small elements")
return Album(
return cls.ALBUM_CACHE.append(Album(
_id=_id,
title=title,
source_list=source_list,
date=ID3Timestamp(year=year),
artist_list=artist_list
)
))
@classmethod
def parse_contact_container(cls, contact_container_soup: BeautifulSoup) -> List[Union[Artist, Album]]:
@ -535,14 +535,14 @@ class Musify(Page):
else:
LOGGER.debug("there is not even 1 footer in the album card")
return Album(
return cls.ALBUM_CACHE.append(Album(
_id=_id,
title=name,
source_list=source_list,
date=timestamp,
album_type=album_type,
album_status=album_status
)
))
@classmethod
def get_discography(cls, url: MusifyUrl, artist_name: str = None, flat=False) -> List[Album]:
@ -700,13 +700,13 @@ class Musify(Page):
if note_soup is not None:
notes.html = note_soup.decode_contents()
return Artist(
return cls.ARTIST_CACHE.append(Artist(
_id=url.musify_id,
name=name,
country=country,
source_list=source_list,
notes=notes
)
))
@classmethod
def fetch_artist_from_source(cls, source: Source, flat: bool = False) -> Artist:
@ -842,7 +842,7 @@ class Musify(Page):
_artist_name = meta_artist_name_text
if _artist_name is not None or _artist_src is not None:
artist_list.append(Artist(name=_artist_name, source_list=_artist_src))
artist_list.append(cls.ARTIST_CACHE.append(Artist(name=_artist_name, source_list=_artist_src)))
return Song(
title=song_name,