2023-10-24 12:53:29 +00:00
|
|
|
from __future__ import annotations
|
2023-12-19 21:11:46 +00:00
|
|
|
|
2023-03-28 08:45:41 +00:00
|
|
|
import random
|
2024-01-15 11:48:36 +00:00
|
|
|
from collections import defaultdict
|
2023-12-20 10:02:38 +00:00
|
|
|
from functools import lru_cache
|
2024-01-15 09:50:24 +00:00
|
|
|
from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
|
2022-12-01 12:15:30 +00:00
|
|
|
|
2024-04-18 12:37:20 +00:00
|
|
|
from pathlib import Path
|
|
|
|
import inspect
|
|
|
|
|
2024-05-15 11:16:11 +00:00
|
|
|
from .source import SourceCollection
|
2023-03-10 08:09:35 +00:00
|
|
|
from .metadata import Metadata
|
2024-04-29 11:43:34 +00:00
|
|
|
from ..utils import get_unix_time, object_trace, generate_id
|
2024-04-10 09:20:49 +00:00
|
|
|
from ..utils.config import logging_settings, main_settings
|
2024-05-03 12:52:12 +00:00
|
|
|
from ..utils.shared import HIGHEST_ID, DEBUG_PRINT_ID
|
2024-04-09 12:18:34 +00:00
|
|
|
from ..utils.hacking import MetaClass
|
2023-09-10 14:27:09 +00:00
|
|
|
|
|
|
|
LOGGER = logging_settings["object_logger"]
|
2022-12-01 12:15:30 +00:00
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
P = TypeVar("P", bound="OuterProxy")
|
2023-09-14 21:35:37 +00:00
|
|
|
|
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
class InnerData:
|
|
|
|
"""
|
|
|
|
This is the core class, which is used for every Data class.
|
|
|
|
The attributes are set, and can be merged.
|
|
|
|
|
|
|
|
The concept is, that the outer class proxies this class.
|
|
|
|
If the data in the wrapper class has to be merged, then this class is just replaced and garbage collected.
|
|
|
|
"""
|
|
|
|
|
2024-04-15 11:56:40 +00:00
|
|
|
_refers_to_instances: set = None
|
2024-05-08 10:09:41 +00:00
|
|
|
_is_in_collection: set = None
|
2024-05-08 19:06:40 +00:00
|
|
|
|
|
|
|
_has_data: bool = False
|
2024-04-29 11:43:34 +00:00
|
|
|
"""
|
|
|
|
Attribute versions keep track, of if the attribute has been changed.
|
|
|
|
"""
|
2024-04-12 15:11:17 +00:00
|
|
|
|
2024-04-10 14:55:02 +00:00
|
|
|
def __init__(self, object_type, **kwargs):
|
2024-04-17 15:24:51 +00:00
|
|
|
self._refers_to_instances = set()
|
2024-05-08 10:09:41 +00:00
|
|
|
self._is_in_collection = set()
|
|
|
|
|
2024-04-19 11:37:12 +00:00
|
|
|
self._fetched_from: dict = {}
|
2024-04-17 15:24:51 +00:00
|
|
|
|
2024-04-10 14:55:02 +00:00
|
|
|
# initialize the default values
|
2024-04-17 15:24:51 +00:00
|
|
|
self._default_values = {}
|
2024-04-10 14:55:02 +00:00
|
|
|
for name, factory in object_type._default_factories.items():
|
2024-04-17 15:24:51 +00:00
|
|
|
self._default_values[name] = factory()
|
2024-04-10 14:55:02 +00:00
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
for key, value in kwargs.items():
|
2024-04-19 15:45:49 +00:00
|
|
|
if hasattr(value, "__is_collection__"):
|
|
|
|
value._collection_for[self] = key
|
2024-05-08 19:06:40 +00:00
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
self.__setattr__(key, value)
|
|
|
|
|
2024-05-08 19:06:40 +00:00
|
|
|
if self._has_data:
|
|
|
|
continue
|
|
|
|
|
|
|
|
def __setattr__(self, key: str, value):
|
|
|
|
if self._has_data or not hasattr(self, "_default_values"):
|
|
|
|
return super().__setattr__(key, value)
|
|
|
|
|
|
|
|
super().__setattr__("_has_data", not (key in self._default_values and self._default_values[key] == value))
|
|
|
|
return super().__setattr__(key, value)
|
|
|
|
|
2024-04-19 15:45:49 +00:00
|
|
|
def __hash__(self):
|
|
|
|
return self.id
|
|
|
|
|
2024-04-29 20:37:07 +00:00
|
|
|
def __merge__(self, __other: InnerData, **kwargs):
|
2023-12-19 12:58:39 +00:00
|
|
|
"""
|
|
|
|
:param __other:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
|
2024-04-19 11:37:12 +00:00
|
|
|
self._fetched_from.update(__other._fetched_from)
|
2024-05-08 10:09:41 +00:00
|
|
|
self._is_in_collection.update(__other._is_in_collection)
|
2024-04-19 11:37:12 +00:00
|
|
|
|
2023-12-29 20:50:40 +00:00
|
|
|
for key, value in __other.__dict__.copy().items():
|
2024-05-06 15:39:53 +00:00
|
|
|
if key.startswith("_"):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if hasattr(value, "__is_collection__") and key in self.__dict__:
|
|
|
|
self.__getattribute__(key).__merge__(value, **kwargs)
|
|
|
|
continue
|
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
# just set the other value if self doesn't already have it
|
2024-04-17 15:24:51 +00:00
|
|
|
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
|
2023-12-19 12:58:39 +00:00
|
|
|
self.__setattr__(key, value)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# if the object of value implemented __merge__, it merges
|
|
|
|
existing = self.__getattribute__(key)
|
2024-05-06 15:39:53 +00:00
|
|
|
if hasattr(existing, "__merge__"):
|
2024-04-29 20:37:07 +00:00
|
|
|
existing.__merge__(value, **kwargs)
|
2023-12-19 12:58:39 +00:00
|
|
|
|
|
|
|
|
2023-12-20 08:55:09 +00:00
|
|
|
class OuterProxy:
|
2023-12-19 12:58:39 +00:00
|
|
|
"""
|
|
|
|
Wraps the inner data, and provides apis, to naturally access those values.
|
|
|
|
"""
|
|
|
|
|
2024-05-13 16:03:20 +00:00
|
|
|
source_collection: SourceCollection
|
|
|
|
|
|
|
|
_default_factories: dict = {"source_collection": SourceCollection}
|
2024-01-15 10:52:31 +00:00
|
|
|
_outer_attribute: Set[str] = {"options", "metadata", "indexing_values", "option_string"}
|
2023-12-19 21:11:46 +00:00
|
|
|
|
2024-01-15 09:56:59 +00:00
|
|
|
DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
|
|
|
|
UPWARDS_COLLECTION_STRING_ATTRIBUTES = tuple()
|
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
def __init__(self, _id: int = None, dynamic: bool = False, **kwargs):
|
|
|
|
_automatic_id: bool = False
|
|
|
|
|
2024-04-23 09:52:08 +00:00
|
|
|
if _id is None:
|
2023-12-19 12:58:39 +00:00
|
|
|
"""
|
|
|
|
generates a random integer id
|
|
|
|
the range is defined in the config
|
|
|
|
"""
|
2024-04-29 11:43:34 +00:00
|
|
|
_id = generate_id()
|
2023-12-19 12:58:39 +00:00
|
|
|
_automatic_id = True
|
|
|
|
|
|
|
|
kwargs["automatic_id"] = _automatic_id
|
|
|
|
kwargs["id"] = _id
|
|
|
|
kwargs["dynamic"] = dynamic
|
|
|
|
|
|
|
|
for name, factory in type(self)._default_factories.items():
|
2023-12-29 20:50:40 +00:00
|
|
|
if kwargs.get(name, None) is None:
|
2023-12-19 12:58:39 +00:00
|
|
|
kwargs[name] = factory()
|
|
|
|
|
2023-12-29 20:50:40 +00:00
|
|
|
collection_data: Dict[str, list] = {}
|
|
|
|
for name, value in kwargs.copy().items():
|
|
|
|
if isinstance(value, list) and name.endswith("_list"):
|
|
|
|
collection_name = name.replace("_list", "_collection")
|
|
|
|
collection_data[collection_name] = value
|
|
|
|
|
|
|
|
del kwargs[name]
|
|
|
|
|
2024-04-10 14:55:02 +00:00
|
|
|
self._inner: InnerData = InnerData(type(self), **kwargs)
|
2024-04-15 12:12:26 +00:00
|
|
|
self._inner._refers_to_instances.add(self)
|
|
|
|
|
2024-05-03 12:52:12 +00:00
|
|
|
object_trace(f"creating {type(self).__name__} [{self.option_string}]")
|
2024-04-19 15:45:49 +00:00
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
self.__init_collections__()
|
|
|
|
|
2023-12-29 20:50:40 +00:00
|
|
|
for name, data_list in collection_data.items():
|
|
|
|
collection = self._inner.__getattribute__(name)
|
|
|
|
collection.extend(data_list)
|
2023-12-19 12:58:39 +00:00
|
|
|
|
2023-12-29 20:50:40 +00:00
|
|
|
self._inner.__setattr__(name, collection)
|
2023-12-20 10:02:38 +00:00
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
def __init_collections__(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def __getattribute__(self, __name: str) -> Any:
|
|
|
|
"""
|
|
|
|
Returns the attribute of _inner if the attribute exists,
|
|
|
|
else it returns the attribute of self.
|
|
|
|
|
|
|
|
That the _inner gets checked first is essential for the type hints.
|
|
|
|
:param __name:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
|
2024-01-15 09:56:59 +00:00
|
|
|
if __name.startswith("_") or __name in self._outer_attribute or __name.isupper():
|
2024-01-15 09:50:24 +00:00
|
|
|
return object.__getattribute__(self, __name)
|
2023-12-20 10:02:38 +00:00
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
_inner: InnerData = super().__getattribute__("_inner")
|
|
|
|
try:
|
|
|
|
return _inner.__getattribute__(__name)
|
|
|
|
except AttributeError:
|
|
|
|
return super().__getattribute__(__name)
|
|
|
|
|
|
|
|
def __setattr__(self, __name, __value):
|
|
|
|
if not __name.startswith("_") and hasattr(self, "_inner"):
|
|
|
|
_inner: InnerData = super().__getattribute__("_inner")
|
|
|
|
return _inner.__setattr__(__name, __value)
|
|
|
|
|
|
|
|
return super().__setattr__(__name, __value)
|
|
|
|
|
2024-01-15 11:48:36 +00:00
|
|
|
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def add_list_of_other_objects(self, object_list: List[OuterProxy]):
|
|
|
|
d: Dict[Type[OuterProxy], List[OuterProxy]] = defaultdict(list)
|
|
|
|
|
|
|
|
for db_object in object_list:
|
|
|
|
d[type(db_object)].append(db_object)
|
|
|
|
|
|
|
|
for key, value in d.items():
|
|
|
|
self._add_other_db_objects(key, value)
|
|
|
|
|
2023-12-19 12:58:39 +00:00
|
|
|
def __hash__(self):
|
2024-04-18 13:30:04 +00:00
|
|
|
return id(self)
|
2023-12-19 12:58:39 +00:00
|
|
|
|
|
|
|
def __eq__(self, other: Any):
|
|
|
|
return self.__hash__() == other.__hash__()
|
|
|
|
|
2024-04-29 20:37:07 +00:00
|
|
|
def merge(self, __other: Optional[OuterProxy], **kwargs):
|
2023-12-19 12:58:39 +00:00
|
|
|
"""
|
|
|
|
1. merges the data of __other in self
|
|
|
|
2. replaces the data of __other with the data of self
|
|
|
|
|
|
|
|
:param __other:
|
|
|
|
:return:
|
|
|
|
"""
|
2023-12-29 14:43:33 +00:00
|
|
|
if __other is None:
|
|
|
|
return
|
|
|
|
|
2024-05-13 19:45:12 +00:00
|
|
|
a_id = self.id
|
2024-04-16 11:23:20 +00:00
|
|
|
|
2024-04-12 15:11:17 +00:00
|
|
|
a = self
|
|
|
|
b = __other
|
|
|
|
|
2024-04-18 12:37:20 +00:00
|
|
|
if a.id == b.id:
|
2024-04-15 11:56:40 +00:00
|
|
|
return
|
2024-04-17 16:13:03 +00:00
|
|
|
|
2024-04-15 11:56:40 +00:00
|
|
|
# switch instances if more efficient
|
|
|
|
if len(b._inner._refers_to_instances) > len(a._inner._refers_to_instances):
|
2024-04-12 15:11:17 +00:00
|
|
|
a, b = b, a
|
|
|
|
|
2024-05-03 12:52:12 +00:00
|
|
|
object_trace(f"merging {a.option_string} | {b.option_string}")
|
2024-04-18 13:30:04 +00:00
|
|
|
|
2024-04-18 12:37:20 +00:00
|
|
|
old_inner = b._inner
|
2024-04-12 15:11:17 +00:00
|
|
|
|
2024-04-18 13:30:04 +00:00
|
|
|
for instance in b._inner._refers_to_instances.copy():
|
2024-04-15 12:12:26 +00:00
|
|
|
instance._inner = a._inner
|
2024-04-18 13:30:04 +00:00
|
|
|
a._inner._refers_to_instances.add(instance)
|
2023-12-19 12:58:39 +00:00
|
|
|
|
2024-04-29 20:37:07 +00:00
|
|
|
a._inner.__merge__(old_inner, **kwargs)
|
2024-04-18 12:37:20 +00:00
|
|
|
del old_inner
|
|
|
|
|
2024-05-13 19:45:12 +00:00
|
|
|
self.id = a_id
|
|
|
|
|
2024-04-29 20:37:07 +00:00
|
|
|
def __merge__(self, __other: Optional[OuterProxy], **kwargs):
|
|
|
|
self.merge(__other, **kwargs)
|
2024-04-17 12:15:56 +00:00
|
|
|
|
2024-04-10 09:20:49 +00:00
|
|
|
def mark_as_fetched(self, *url_hash_list: List[str]):
|
|
|
|
for url_hash in url_hash_list:
|
2024-04-19 11:37:12 +00:00
|
|
|
self._inner._fetched_from[url_hash] = {
|
2024-04-10 09:20:49 +00:00
|
|
|
"time": get_unix_time(),
|
|
|
|
"url": url_hash,
|
|
|
|
}
|
|
|
|
|
|
|
|
def already_fetched_from(self, url_hash: str) -> bool:
|
2024-04-19 11:37:12 +00:00
|
|
|
res = self._inner._fetched_from.get(url_hash, None)
|
2024-04-10 09:20:49 +00:00
|
|
|
|
|
|
|
if res is None:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return get_unix_time() - res["time"] < main_settings["refresh_after"]
|
|
|
|
|
2023-03-09 17:19:49 +00:00
|
|
|
@property
|
2023-12-19 21:11:46 +00:00
|
|
|
def metadata(self) -> Metadata:
|
2023-03-09 17:19:49 +00:00
|
|
|
"""
|
2023-12-19 21:11:46 +00:00
|
|
|
This is an interface.
|
|
|
|
:return:
|
2023-03-09 17:19:49 +00:00
|
|
|
"""
|
2023-03-10 08:09:35 +00:00
|
|
|
return Metadata()
|
|
|
|
|
2023-03-10 09:54:15 +00:00
|
|
|
@property
|
2023-12-19 21:11:46 +00:00
|
|
|
def options(self) -> List[P]:
|
2024-04-29 11:43:34 +00:00
|
|
|
r = []
|
|
|
|
|
|
|
|
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
|
|
r.extend(self.__getattribute__(collection_string_attribute))
|
|
|
|
|
|
|
|
r.append(self)
|
|
|
|
|
|
|
|
for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
|
|
r.extend(self.__getattribute__(collection_string_attribute))
|
|
|
|
|
|
|
|
return r
|
2023-03-10 09:54:15 +00:00
|
|
|
|
2024-04-30 15:43:00 +00:00
|
|
|
@property
|
|
|
|
def option_string(self) -> str:
|
|
|
|
return self.title_string
|
|
|
|
|
2024-04-29 16:18:57 +00:00
|
|
|
INDEX_DEPENDS_ON: List[str] = []
|
|
|
|
|
2023-03-10 09:54:15 +00:00
|
|
|
@property
|
2023-12-19 21:11:46 +00:00
|
|
|
def indexing_values(self) -> List[Tuple[str, object]]:
|
2023-03-14 10:03:54 +00:00
|
|
|
"""
|
2023-12-19 21:11:46 +00:00
|
|
|
This is an interface.
|
|
|
|
It is supposed to return a map of the name and values for all important attributes.
|
|
|
|
This helps in comparing classes for equal data (e.g. being the same song but different attributes)
|
2023-12-19 12:58:39 +00:00
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
TODO
|
|
|
|
Rewrite this approach into a approach, that is centered around statistics, and not binaries.
|
|
|
|
Instead of: one of this matches, it is the same
|
|
|
|
This: If enough attributes are similar enough, they are the same
|
2023-03-18 11:36:53 +00:00
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
Returns:
|
|
|
|
List[Tuple[str, object]]: the first element in the tuple is the name of the attribute, the second the value.
|
|
|
|
"""
|
2022-12-01 12:15:30 +00:00
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
return []
|
2023-12-20 10:02:38 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
@lru_cache()
|
|
|
|
def all_collections(self):
|
|
|
|
r = []
|
|
|
|
|
|
|
|
for key in self._default_factories:
|
|
|
|
val = self._inner.__getattribute__(key)
|
|
|
|
if hasattr(val, "__is_collection__"):
|
|
|
|
r.append(val)
|
|
|
|
|
|
|
|
return r
|
|
|
|
|
2024-05-13 12:22:33 +00:00
|
|
|
@property
|
|
|
|
def root_collections(self) -> List[Collection]:
|
|
|
|
if len(self.UPWARDS_COLLECTION_STRING_ATTRIBUTES) == 0:
|
|
|
|
return [self]
|
|
|
|
|
|
|
|
r = []
|
|
|
|
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
|
|
r.extend(self.__getattribute__(collection_string_attribute))
|
|
|
|
|
|
|
|
return r
|
|
|
|
|
|
|
|
def _compile(self, **kwargs):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def compile(self, from_root=False, **kwargs):
|
|
|
|
# compile from the root
|
|
|
|
if not from_root:
|
|
|
|
for c in self.root_collections:
|
|
|
|
c.compile(from_root=True, **kwargs)
|
|
|
|
return
|
|
|
|
|
|
|
|
self._compile(**kwargs)
|
|
|
|
|
|
|
|
for c_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
|
|
for c in self.__getattribute__(c_attribute):
|
|
|
|
c.compile(from_root=True, **kwargs)
|
|
|
|
|
2024-04-29 11:43:34 +00:00
|
|
|
TITEL = "id"
|
2024-04-10 08:25:05 +00:00
|
|
|
@property
|
|
|
|
def title_string(self) -> str:
|
2024-05-03 12:52:12 +00:00
|
|
|
return str(self.__getattribute__(self.TITEL)) + (f" {self.id}" if DEBUG_PRINT_ID else "")
|
2024-04-25 21:20:07 +00:00
|
|
|
|
2024-05-16 15:30:53 +00:00
|
|
|
@property
|
|
|
|
def title_value(self) -> str:
|
|
|
|
return str(self.__getattribute__(self.TITEL))
|
|
|
|
|
2024-04-25 21:20:07 +00:00
|
|
|
def __repr__(self):
|
|
|
|
return f"{type(self).__name__}({self.title_string})"
|
2024-05-13 19:45:12 +00:00
|
|
|
|
|
|
|
def get_child_collections(self):
|
|
|
|
for collection_string_attribute in self.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
|
|
yield self.__getattribute__(collection_string_attribute)
|
|
|
|
|
|
|
|
def get_parent_collections(self):
|
|
|
|
for collection_string_attribute in self.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
|
|
|
|
yield self.__getattribute__(collection_string_attribute)
|