2023-10-24 12:53:29 +00:00
from __future__ import annotations
2023-12-19 21:11:46 +00:00
2023-03-28 08:45:41 +00:00
import random
2024-01-15 11:48:36 +00:00
from collections import defaultdict
2023-12-20 10:02:38 +00:00
from functools import lru_cache
2024-01-15 09:50:24 +00:00
from typing import Optional , Dict , Tuple , List , Type , Generic , Any , TypeVar , Set
2022-12-01 12:15:30 +00:00
2024-04-18 12:37:20 +00:00
from pathlib import Path
import inspect
2023-03-10 08:09:35 +00:00
from . metadata import Metadata
2024-04-16 11:23:20 +00:00
from . . utils import get_unix_time , object_trace
2024-04-10 09:20:49 +00:00
from . . utils . config import logging_settings , main_settings
2023-09-11 19:34:45 +00:00
from . . utils . shared import HIGHEST_ID
2024-04-09 12:18:34 +00:00
from . . utils . hacking import MetaClass
2023-09-10 14:27:09 +00:00
LOGGER = logging_settings [ " object_logger " ]
2022-12-01 12:15:30 +00:00
2023-12-19 21:11:46 +00:00
P = TypeVar ( " P " , bound = " OuterProxy " )
2023-09-14 21:35:37 +00:00
2023-12-19 12:58:39 +00:00
class InnerData :
"""
This is the core class , which is used for every Data class .
The attributes are set , and can be merged .
The concept is , that the outer class proxies this class .
If the data in the wrapper class has to be merged , then this class is just replaced and garbage collected .
"""
2024-04-15 11:56:40 +00:00
_refers_to_instances : set = None
2024-04-12 15:11:17 +00:00
2024-04-10 14:55:02 +00:00
def __init__ ( self , object_type , * * kwargs ) :
2024-04-17 15:24:51 +00:00
self . _refers_to_instances = set ( )
2024-04-19 11:37:12 +00:00
self . _fetched_from : dict = { }
2024-04-17 15:24:51 +00:00
# collection : collection that is a collection of self
self . _is_collection_child : Dict [ Collection , Collection ] = { }
self . _is_collection_parent : Dict [ Collection , Collection ] = { }
2024-04-15 11:56:40 +00:00
2024-04-10 14:55:02 +00:00
# initialize the default values
2024-04-17 15:24:51 +00:00
self . _default_values = { }
2024-04-10 14:55:02 +00:00
for name , factory in object_type . _default_factories . items ( ) :
2024-04-17 15:24:51 +00:00
self . _default_values [ name ] = factory ( )
2024-04-10 14:55:02 +00:00
2023-12-19 12:58:39 +00:00
for key , value in kwargs . items ( ) :
self . __setattr__ ( key , value )
def __merge__ ( self , __other : InnerData , override : bool = False ) :
"""
: param __other :
: param override :
: return :
"""
2024-04-19 11:37:12 +00:00
self . _fetched_from . update ( __other . _fetched_from )
2023-12-29 20:50:40 +00:00
for key , value in __other . __dict__ . copy ( ) . items ( ) :
2023-12-19 12:58:39 +00:00
# just set the other value if self doesn't already have it
2024-04-17 15:24:51 +00:00
if key not in self . __dict__ or ( key in self . __dict__ and self . __dict__ [ key ] == self . _default_values . get ( key ) ) :
2023-12-19 12:58:39 +00:00
self . __setattr__ ( key , value )
continue
# if the object of value implemented __merge__, it merges
existing = self . __getattribute__ ( key )
if hasattr ( type ( existing ) , " __merge__ " ) :
2023-12-29 15:15:54 +00:00
existing . __merge__ ( value , override )
2023-12-19 12:58:39 +00:00
continue
# override the existing value if requested
if override :
self . __setattr__ ( key , value )
2023-12-20 08:55:09 +00:00
class OuterProxy :
2023-12-19 12:58:39 +00:00
"""
Wraps the inner data , and provides apis , to naturally access those values .
"""
2023-12-19 21:11:46 +00:00
_default_factories : dict = { }
2024-01-15 10:52:31 +00:00
_outer_attribute : Set [ str ] = { " options " , " metadata " , " indexing_values " , " option_string " }
2023-12-19 21:11:46 +00:00
2024-01-15 09:56:59 +00:00
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple ( )
UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple ( )
2024-04-10 08:25:05 +00:00
TITEL = " id "
2023-12-19 12:58:39 +00:00
def __init__ ( self , _id : int = None , dynamic : bool = False , * * kwargs ) :
_automatic_id : bool = False
if _id is None and not dynamic :
"""
generates a random integer id
the range is defined in the config
"""
_id = random . randint ( 0 , HIGHEST_ID )
_automatic_id = True
kwargs [ " automatic_id " ] = _automatic_id
kwargs [ " id " ] = _id
kwargs [ " dynamic " ] = dynamic
for name , factory in type ( self ) . _default_factories . items ( ) :
2023-12-29 20:50:40 +00:00
if kwargs . get ( name , None ) is None :
2023-12-19 12:58:39 +00:00
kwargs [ name ] = factory ( )
2023-12-29 20:50:40 +00:00
collection_data : Dict [ str , list ] = { }
for name , value in kwargs . copy ( ) . items ( ) :
if isinstance ( value , list ) and name . endswith ( " _list " ) :
collection_name = name . replace ( " _list " , " _collection " )
collection_data [ collection_name ] = value
del kwargs [ name ]
2024-04-10 14:55:02 +00:00
self . _inner : InnerData = InnerData ( type ( self ) , * * kwargs )
2024-04-15 12:12:26 +00:00
self . _inner . _refers_to_instances . add ( self )
2024-04-16 11:23:20 +00:00
object_trace ( f " creating { type ( self ) . __name__ } [ { self . title_string } ] " )
2023-12-19 12:58:39 +00:00
self . __init_collections__ ( )
2023-12-29 20:50:40 +00:00
for name , data_list in collection_data . items ( ) :
collection = self . _inner . __getattribute__ ( name )
collection . extend ( data_list )
2023-12-19 12:58:39 +00:00
2023-12-29 20:50:40 +00:00
self . _inner . __setattr__ ( name , collection )
2023-12-20 10:02:38 +00:00
2023-12-19 12:58:39 +00:00
def __init_collections__ ( self ) :
pass
def __getattribute__ ( self , __name : str ) - > Any :
"""
Returns the attribute of _inner if the attribute exists ,
else it returns the attribute of self .
That the _inner gets checked first is essential for the type hints .
: param __name :
: return :
"""
2024-01-15 09:56:59 +00:00
if __name . startswith ( " _ " ) or __name in self . _outer_attribute or __name . isupper ( ) :
2024-01-15 09:50:24 +00:00
return object . __getattribute__ ( self , __name )
2023-12-20 10:02:38 +00:00
2023-12-19 12:58:39 +00:00
_inner : InnerData = super ( ) . __getattribute__ ( " _inner " )
try :
return _inner . __getattribute__ ( __name )
except AttributeError :
return super ( ) . __getattribute__ ( __name )
def __setattr__ ( self , __name , __value ) :
if not __name . startswith ( " _ " ) and hasattr ( self , " _inner " ) :
_inner : InnerData = super ( ) . __getattribute__ ( " _inner " )
return _inner . __setattr__ ( __name , __value )
return super ( ) . __setattr__ ( __name , __value )
2024-01-15 11:48:36 +00:00
def _add_other_db_objects ( self , object_type : Type [ OuterProxy ] , object_list : List [ OuterProxy ] ) :
pass
def add_list_of_other_objects ( self , object_list : List [ OuterProxy ] ) :
d : Dict [ Type [ OuterProxy ] , List [ OuterProxy ] ] = defaultdict ( list )
for db_object in object_list :
d [ type ( db_object ) ] . append ( db_object )
for key , value in d . items ( ) :
self . _add_other_db_objects ( key , value )
2023-12-19 12:58:39 +00:00
def __hash__ ( self ) :
2024-04-18 13:30:04 +00:00
return id ( self )
2023-12-19 12:58:39 +00:00
def __eq__ ( self , other : Any ) :
return self . __hash__ ( ) == other . __hash__ ( )
2023-12-29 14:43:33 +00:00
def merge ( self , __other : Optional [ OuterProxy ] , override : bool = False ) :
2023-12-19 12:58:39 +00:00
"""
1. merges the data of __other in self
2. replaces the data of __other with the data of self
: param __other :
: param override :
: return :
"""
2023-12-29 14:43:33 +00:00
if __other is None :
return
2024-04-16 11:23:20 +00:00
2024-04-12 15:11:17 +00:00
a = self
b = __other
2024-04-18 12:37:20 +00:00
if a . id == b . id :
2024-04-15 11:56:40 +00:00
return
2024-04-17 16:13:03 +00:00
2024-04-15 11:56:40 +00:00
# switch instances if more efficient
if len ( b . _inner . _refers_to_instances ) > len ( a . _inner . _refers_to_instances ) :
2024-04-12 15:11:17 +00:00
a , b = b , a
2024-04-18 13:30:04 +00:00
object_trace ( f " merging { type ( a ) . __name__ } [ { a . title_string } | { a . id } ] with { type ( b ) . __name__ } [ { b . title_string } | { b . id } ] called by [ { ' | ' . join ( f ' { s . function } { Path ( s . filename ) . name } : { str ( s . lineno ) } ' for s in inspect . stack ( ) [ 1 : 5 ] ) } ] " )
2024-04-17 15:24:51 +00:00
for collection , child_collection in b . _inner . _is_collection_child . items ( ) :
2024-04-17 16:13:03 +00:00
try :
collection . children . remove ( child_collection )
except ValueError :
pass
2024-04-17 15:24:51 +00:00
for collection , parent_collection in b . _inner . _is_collection_parent . items ( ) :
2024-04-17 16:13:03 +00:00
try :
collection . parents . remove ( parent_collection )
except ValueError :
pass
2024-04-18 13:30:04 +00:00
2024-04-18 12:37:20 +00:00
old_inner = b . _inner
2024-04-12 15:11:17 +00:00
2024-04-18 13:30:04 +00:00
for instance in b . _inner . _refers_to_instances . copy ( ) :
2024-04-15 12:12:26 +00:00
instance . _inner = a . _inner
2024-04-18 13:30:04 +00:00
a . _inner . _refers_to_instances . add ( instance )
2023-12-19 12:58:39 +00:00
2024-04-18 13:30:04 +00:00
a . _inner . __merge__ ( old_inner , override = override )
2024-04-18 12:37:20 +00:00
del old_inner
2024-04-17 12:15:56 +00:00
def __merge__ ( self , __other : Optional [ OuterProxy ] , override : bool = False ) :
self . merge ( __other , override )
2024-04-10 09:20:49 +00:00
def mark_as_fetched ( self , * url_hash_list : List [ str ] ) :
for url_hash in url_hash_list :
2024-04-19 11:37:12 +00:00
self . _inner . _fetched_from [ url_hash ] = {
2024-04-10 09:20:49 +00:00
" time " : get_unix_time ( ) ,
" url " : url_hash ,
}
def already_fetched_from ( self , url_hash : str ) - > bool :
2024-04-19 11:37:12 +00:00
res = self . _inner . _fetched_from . get ( url_hash , None )
2024-04-10 09:20:49 +00:00
if res is None :
return False
return get_unix_time ( ) - res [ " time " ] < main_settings [ " refresh_after " ]
2023-03-09 17:19:49 +00:00
@property
2023-12-19 21:11:46 +00:00
def metadata ( self ) - > Metadata :
2023-03-09 17:19:49 +00:00
"""
2023-12-19 21:11:46 +00:00
This is an interface .
: return :
2023-03-09 17:19:49 +00:00
"""
2023-03-10 08:09:35 +00:00
return Metadata ( )
2023-03-10 09:54:15 +00:00
@property
2023-12-19 21:11:46 +00:00
def options ( self ) - > List [ P ] :
2023-06-12 12:56:14 +00:00
return [ self ]
2023-03-10 09:54:15 +00:00
@property
2023-12-19 21:11:46 +00:00
def indexing_values ( self ) - > List [ Tuple [ str , object ] ] :
2023-03-14 10:03:54 +00:00
"""
2023-12-19 21:11:46 +00:00
This is an interface .
It is supposed to return a map of the name and values for all important attributes .
This helps in comparing classes for equal data ( e . g . being the same song but different attributes )
2023-12-19 12:58:39 +00:00
2023-12-19 21:11:46 +00:00
TODO
Rewrite this approach into a approach , that is centered around statistics , and not binaries .
Instead of : one of this matches , it is the same
This : If enough attributes are similar enough , they are the same
2023-03-18 11:36:53 +00:00
2023-12-19 21:11:46 +00:00
Returns :
List [ Tuple [ str , object ] ] : the first element in the tuple is the name of the attribute , the second the value .
"""
2022-12-01 12:15:30 +00:00
2023-12-19 21:11:46 +00:00
return [ ]
2023-12-20 10:02:38 +00:00
@property
@lru_cache ( )
def all_collections ( self ) :
r = [ ]
for key in self . _default_factories :
val = self . _inner . __getattribute__ ( key )
if hasattr ( val , " __is_collection__ " ) :
r . append ( val )
return r
def __repr__ ( self ) :
return f " { type ( self ) . __name__ } ( { ' , ' . join ( key + ' : ' + str ( val ) for key , val in self . indexing_values ) } ) "
2024-04-10 08:25:05 +00:00
@property
def title_string ( self ) - > str :
return str ( self . __getattribute__ ( self . TITEL ) )