Compare commits

...

3 Commits

6 changed files with 68 additions and 79 deletions

View File

@ -46,7 +46,7 @@ init_logging()
from . import cli
if DEBUG:
sys.setrecursionlimit(500)
sys.setrecursionlimit(300)
if main_settings['modify_gc']:

View File

@ -1,7 +1,7 @@
from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator
from .parents import OuterProxy
T = TypeVar('T', bound=OuterProxy)
@ -35,39 +35,24 @@ class Collection(Generic[T]):
# Key: collection attribute (str) of appended element
# Value: main collection to sync to
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {}
self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
self.contain_self_on_append: List[str] = []
self._indexed_values = defaultdict(set)
self._indexed_to_objects = defaultdict(list)
self.extend(data)
def _map_element(self, __object: T, from_map: bool = False):
if __object.id in self._contains_ids:
return
__object._inner._mapped_in_collection.add(self)
self._contains_ids.add(__object.id)
for name, value in __object.indexing_values:
if value is None:
if value is None or value == __object._inner._default_values.get(name):
continue
self._indexed_values[name].add(value)
self._indexed_to_objects[value].append(__object)
if not from_map:
for attribute, new_object in self.contain_given_in_attribute.items():
__object.__getattribute__(attribute).contain_collection_inside(new_object)
for attribute, new_object in self.contain_attribute_in_given.items():
new_object.contain_collection_inside(__object.__getattribute__(attribute))
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object)
def _unmap_element(self, __object: T):
if __object.id in self._contains_ids:
self._contains_ids.remove(__object.id)
@ -186,15 +171,8 @@ class Collection(Generic[T]):
def contains(self, __object: T) -> bool:
return len(self._contained_in_sub(__object)) > 0
def _append(self, __object: T, from_map: bool = False):
print(self, __object)
self._map_element(__object, from_map=from_map)
self._data.append(__object)
def _find_object_in_self(self, __object: T) -> Optional[T]:
for name, value in __object.indexing_values:
if value is None or value == __object._default_factories.get(name, lambda: None)():
continue
if value in self._indexed_values[name]:
return self._indexed_to_objects[value][0]
@ -211,6 +189,7 @@ class Collection(Generic[T]):
if no_sibling:
return self, None
"""
# find in siblings and all children of siblings
for parent in self.parents:
for sibling in parent.children:
@ -220,6 +199,7 @@ class Collection(Generic[T]):
o, other_object = sibling._find_object(__object, no_sibling=True)
if other_object is not None:
return o, other_object
"""
return self, None
@ -235,7 +215,7 @@ class Collection(Generic[T]):
:return:
"""
if __object is None or __object.id in self._contains_ids:
if __object is None:
return
append_to, existing_object = self._find_object(__object)
@ -243,54 +223,47 @@ class Collection(Generic[T]):
if existing_object is None:
# append
append_to._data.append(__object)
append_to._map_element(__object, from_map=from_map)
append_to._map_element(__object)
# only modify collections if the object actually has been appended
for collection_attribute, child_collection in self.contain_given_in_attribute.items():
__object.__getattribute__(collection_attribute).contain_collection_inside(child_collection, __object)
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object)
else:
# merge
# merge only if the two objects are not the same
if existing_object.id == __object.id:
return
append_to._unmap_element(existing_object)
existing_object.merge(__object)
append_to._map_element(existing_object, from_map=from_map)
append_to._map_element(existing_object)
def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False):
def extend(self, __iterable: Optional[Generator[T, None, None]]):
if __iterable is None:
return
for __object in __iterable:
self.append(__object, from_map=from_map)
self.append(__object)
def sync_with_other_collection(self, equal_collection: Collection):
"""
If two collections always need to have the same values, this can be used.
Internally:
1. import the data from other to self
- _data
- contained_collections
2. replace all refs from the other object, with refs from this object
"""
if equal_collection is self:
return
# don't add the elements from the subelements from the other collection.
# this will be done in the next step.
self.extend(equal_collection._data)
# add all submodules
for equal_sub_collection in equal_collection.children:
self.contain_collection_inside(equal_sub_collection)
def contain_collection_inside(self, sub_collection: Collection):
def contain_collection_inside(self, sub_collection: Collection, _object: T):
"""
This collection will ALWAYS contain everything from the passed in collection
"""
if self is sub_collection or sub_collection in self.children:
return
_object._inner._is_collection_child[self] = sub_collection
_object._inner._is_collection_parent[sub_collection] = self
self.children.append(sub_collection)
sub_collection.parents.append(self)
@property
def data(self) -> List[T]:
return list(i for i in self.__iter__())
return list(self.__iter__())
def __len__(self) -> int:
return len(self._data) + sum(len(collection) for collection in self.children)
@ -312,7 +285,7 @@ class Collection(Generic[T]):
yield from c.__iter__(finished_ids=finished_ids)
def __merge__(self, __other: Collection, override: bool = False):
self.extend(__other._data, from_map=True)
self.extend(__other.__iter__())
def __getitem__(self, item: int):
if item < len(self._data):

View File

@ -29,12 +29,16 @@ class InnerData:
_refers_to_instances: set = None
def __init__(self, object_type, **kwargs):
self._refers_to_instances =set()
self._refers_to_instances = set()
# collection : collection that is a collection of self
self._is_collection_child: Dict[Collection, Collection] = {}
self._is_collection_parent: Dict[Collection, Collection] = {}
# initialize the default values
self.__default_values = {}
self._default_values = {}
for name, factory in object_type._default_factories.items():
self.__default_values[name] = factory()
self._default_values[name] = factory()
for key, value in kwargs.items():
self.__setattr__(key, value)
@ -48,7 +52,7 @@ class InnerData:
for key, value in __other.__dict__.copy().items():
# just set the other value if self doesn't already have it
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self.__default_values.get(key)):
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
self.__setattr__(key, value)
continue
@ -183,7 +187,7 @@ class OuterProxy:
if __other is None:
return
object_trace(f"merging {type(self).__name__} [{self.title_string}] with {type(__other).__name__} [{__other.title_string}]")
object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}]")
a = self
b = __other
@ -196,11 +200,20 @@ class OuterProxy:
a, b = b, a
a._inner.__merge__(b._inner, override=override)
for collection, child_collection in b._inner._is_collection_child.items():
collection.children.remove(child_collection)
for collection, parent_collection in b._inner._is_collection_parent.items():
collection.parents.remove(parent_collection)
a._inner._refers_to_instances.update(b._inner._refers_to_instances)
for instance in b._inner._refers_to_instances:
instance._inner = a._inner
def __merge__(self, __other: Optional[OuterProxy], override: bool = False):
self.merge(__other, override)
def mark_as_fetched(self, *url_hash_list: List[str]):
for url_hash in url_hash_list:
self._fetched_from[url_hash] = {

View File

@ -49,6 +49,7 @@ class Song(Base):
source_collection: SourceCollection
target_collection: Collection[Target]
lyrics_collection: Collection[Lyrics]
main_artist_collection: Collection[Artist]
feature_artist_collection: Collection[Artist]
album_collection: Collection[Album]
@ -241,13 +242,17 @@ class Album(Base):
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection")
def __init_collections__(self):
self.song_collection.contain_attribute_in_given = {
"main_artist_collection": self.artist_collection
}
self.song_collection.append_object_to_attribute = {
"album_collection": self
}
self.artist_collection.append_object_to_attribute = {
"main_album_collection": self
}
self.artist_collection.contain_given_in_attribute = {
"label_collection": self.label_collection
}
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
if object_type is Song:
self.song_collection.extend(object_list)
@ -642,6 +647,15 @@ class Label(Base):
contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list,
**kwargs)
def __init_collections__(self):
self.album_collection.append_object_to_attribute = {
"label_collection": self
}
self.current_artist_collection.append_object_to_attribute = {
"label_collection": self
}
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [

View File

@ -1,7 +1,7 @@
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Type, Union
from typing import List, Optional, Type, Union, Generator
from urllib.parse import urlparse
import pycountry
@ -1056,7 +1056,7 @@ class Musify(Page):
date=date
)
def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> List[Album]:
def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]:
"""
POST https://musify.club/artist/filteralbums
ArtistID: 280348
@ -1077,18 +1077,8 @@ class Musify(Page):
return []
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
discography: List[Album] = []
for card_soup in soup.find_all("div", {"class": "card"}):
new_album: Album = self._parse_album_card(card_soup, artist_name)
album_source: Source
if stop_at_level > 1:
for album_source in new_album.source_collection.get_sources_from_page(self.SOURCE_TYPE):
new_album.merge(self.fetch_album(album_source, stop_at_level=stop_at_level-1))
discography.append(new_album)
return discography
yield self._parse_album_card(card_soup, artist_name)
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
"""
@ -1110,8 +1100,7 @@ class Musify(Page):
artist = self._get_artist_attributes(url)
discography: List[Album] = self._get_discography(url, artist.name)
artist.main_album_collection.extend(discography)
artist.main_album_collection.extend(self._get_discography(url, artist.name))
return artist

View File

@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True
DEBUG_LOGGING = DEBUG and True
DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False
DEBUG_OBJECT_TRACE = DEBUG and True
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False
DEBUG_DUMP = DEBUG and True