Compare commits

..

3 Commits

6 changed files with 68 additions and 79 deletions

View File

@ -46,7 +46,7 @@ init_logging()
from . import cli from . import cli
if DEBUG: if DEBUG:
sys.setrecursionlimit(500) sys.setrecursionlimit(300)
if main_settings['modify_gc']: if main_settings['modify_gc']:

View File

@ -1,7 +1,7 @@
from __future__ import annotations from __future__ import annotations
from collections import defaultdict from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator
from .parents import OuterProxy from .parents import OuterProxy
T = TypeVar('T', bound=OuterProxy) T = TypeVar('T', bound=OuterProxy)
@ -35,39 +35,24 @@ class Collection(Generic[T]):
# Key: collection attribute (str) of appended element # Key: collection attribute (str) of appended element
# Value: main collection to sync to # Value: main collection to sync to
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {}
self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
self.contain_self_on_append: List[str] = []
self._indexed_values = defaultdict(set) self._indexed_values = defaultdict(set)
self._indexed_to_objects = defaultdict(list) self._indexed_to_objects = defaultdict(list)
self.extend(data) self.extend(data)
def _map_element(self, __object: T, from_map: bool = False): def _map_element(self, __object: T, from_map: bool = False):
if __object.id in self._contains_ids: __object._inner._mapped_in_collection.add(self)
return
self._contains_ids.add(__object.id) self._contains_ids.add(__object.id)
for name, value in __object.indexing_values: for name, value in __object.indexing_values:
if value is None: if value is None or value == __object._inner._default_values.get(name):
continue continue
self._indexed_values[name].add(value) self._indexed_values[name].add(value)
self._indexed_to_objects[value].append(__object) self._indexed_to_objects[value].append(__object)
if not from_map:
for attribute, new_object in self.contain_given_in_attribute.items():
__object.__getattribute__(attribute).contain_collection_inside(new_object)
for attribute, new_object in self.contain_attribute_in_given.items():
new_object.contain_collection_inside(__object.__getattribute__(attribute))
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object)
def _unmap_element(self, __object: T): def _unmap_element(self, __object: T):
if __object.id in self._contains_ids: if __object.id in self._contains_ids:
self._contains_ids.remove(__object.id) self._contains_ids.remove(__object.id)
@ -186,15 +171,8 @@ class Collection(Generic[T]):
def contains(self, __object: T) -> bool: def contains(self, __object: T) -> bool:
return len(self._contained_in_sub(__object)) > 0 return len(self._contained_in_sub(__object)) > 0
def _append(self, __object: T, from_map: bool = False):
print(self, __object)
self._map_element(__object, from_map=from_map)
self._data.append(__object)
def _find_object_in_self(self, __object: T) -> Optional[T]: def _find_object_in_self(self, __object: T) -> Optional[T]:
for name, value in __object.indexing_values: for name, value in __object.indexing_values:
if value is None or value == __object._default_factories.get(name, lambda: None)():
continue
if value in self._indexed_values[name]: if value in self._indexed_values[name]:
return self._indexed_to_objects[value][0] return self._indexed_to_objects[value][0]
@ -211,6 +189,7 @@ class Collection(Generic[T]):
if no_sibling: if no_sibling:
return self, None return self, None
"""
# find in siblings and all children of siblings # find in siblings and all children of siblings
for parent in self.parents: for parent in self.parents:
for sibling in parent.children: for sibling in parent.children:
@ -220,6 +199,7 @@ class Collection(Generic[T]):
o, other_object = sibling._find_object(__object, no_sibling=True) o, other_object = sibling._find_object(__object, no_sibling=True)
if other_object is not None: if other_object is not None:
return o, other_object return o, other_object
"""
return self, None return self, None
@ -235,7 +215,7 @@ class Collection(Generic[T]):
:return: :return:
""" """
if __object is None or __object.id in self._contains_ids: if __object is None:
return return
append_to, existing_object = self._find_object(__object) append_to, existing_object = self._find_object(__object)
@ -243,54 +223,47 @@ class Collection(Generic[T]):
if existing_object is None: if existing_object is None:
# append # append
append_to._data.append(__object) append_to._data.append(__object)
append_to._map_element(__object, from_map=from_map) append_to._map_element(__object)
# only modify collections if the object actually has been appended
for collection_attribute, child_collection in self.contain_given_in_attribute.items():
__object.__getattribute__(collection_attribute).contain_collection_inside(child_collection, __object)
for attribute, new_object in self.append_object_to_attribute.items():
__object.__getattribute__(attribute).append(new_object)
else: else:
# merge # merge only if the two objects are not the same
if existing_object.id == __object.id:
return
append_to._unmap_element(existing_object) append_to._unmap_element(existing_object)
existing_object.merge(__object) existing_object.merge(__object)
append_to._map_element(existing_object, from_map=from_map) append_to._map_element(existing_object)
def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False): def extend(self, __iterable: Optional[Generator[T, None, None]]):
if __iterable is None: if __iterable is None:
return return
for __object in __iterable: for __object in __iterable:
self.append(__object, from_map=from_map) self.append(__object)
def sync_with_other_collection(self, equal_collection: Collection): def contain_collection_inside(self, sub_collection: Collection, _object: T):
"""
If two collections always need to have the same values, this can be used.
Internally:
1. import the data from other to self
- _data
- contained_collections
2. replace all refs from the other object, with refs from this object
"""
if equal_collection is self:
return
# don't add the elements from the subelements from the other collection.
# this will be done in the next step.
self.extend(equal_collection._data)
# add all submodules
for equal_sub_collection in equal_collection.children:
self.contain_collection_inside(equal_sub_collection)
def contain_collection_inside(self, sub_collection: Collection):
""" """
This collection will ALWAYS contain everything from the passed in collection This collection will ALWAYS contain everything from the passed in collection
""" """
if self is sub_collection or sub_collection in self.children: if self is sub_collection or sub_collection in self.children:
return return
_object._inner._is_collection_child[self] = sub_collection
_object._inner._is_collection_parent[sub_collection] = self
self.children.append(sub_collection) self.children.append(sub_collection)
sub_collection.parents.append(self) sub_collection.parents.append(self)
@property @property
def data(self) -> List[T]: def data(self) -> List[T]:
return list(i for i in self.__iter__()) return list(self.__iter__())
def __len__(self) -> int: def __len__(self) -> int:
return len(self._data) + sum(len(collection) for collection in self.children) return len(self._data) + sum(len(collection) for collection in self.children)
@ -312,7 +285,7 @@ class Collection(Generic[T]):
yield from c.__iter__(finished_ids=finished_ids) yield from c.__iter__(finished_ids=finished_ids)
def __merge__(self, __other: Collection, override: bool = False): def __merge__(self, __other: Collection, override: bool = False):
self.extend(__other._data, from_map=True) self.extend(__other.__iter__())
def __getitem__(self, item: int): def __getitem__(self, item: int):
if item < len(self._data): if item < len(self._data):

View File

@ -31,10 +31,14 @@ class InnerData:
def __init__(self, object_type, **kwargs): def __init__(self, object_type, **kwargs):
self._refers_to_instances = set() self._refers_to_instances = set()
# collection : collection that is a collection of self
self._is_collection_child: Dict[Collection, Collection] = {}
self._is_collection_parent: Dict[Collection, Collection] = {}
# initialize the default values # initialize the default values
self.__default_values = {} self._default_values = {}
for name, factory in object_type._default_factories.items(): for name, factory in object_type._default_factories.items():
self.__default_values[name] = factory() self._default_values[name] = factory()
for key, value in kwargs.items(): for key, value in kwargs.items():
self.__setattr__(key, value) self.__setattr__(key, value)
@ -48,7 +52,7 @@ class InnerData:
for key, value in __other.__dict__.copy().items(): for key, value in __other.__dict__.copy().items():
# just set the other value if self doesn't already have it # just set the other value if self doesn't already have it
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self.__default_values.get(key)): if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
self.__setattr__(key, value) self.__setattr__(key, value)
continue continue
@ -183,7 +187,7 @@ class OuterProxy:
if __other is None: if __other is None:
return return
object_trace(f"merging {type(self).__name__} [{self.title_string}] with {type(__other).__name__} [{__other.title_string}]") object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}]")
a = self a = self
b = __other b = __other
@ -196,11 +200,20 @@ class OuterProxy:
a, b = b, a a, b = b, a
a._inner.__merge__(b._inner, override=override) a._inner.__merge__(b._inner, override=override)
for collection, child_collection in b._inner._is_collection_child.items():
collection.children.remove(child_collection)
for collection, parent_collection in b._inner._is_collection_parent.items():
collection.parents.remove(parent_collection)
a._inner._refers_to_instances.update(b._inner._refers_to_instances) a._inner._refers_to_instances.update(b._inner._refers_to_instances)
for instance in b._inner._refers_to_instances: for instance in b._inner._refers_to_instances:
instance._inner = a._inner instance._inner = a._inner
def __merge__(self, __other: Optional[OuterProxy], override: bool = False):
self.merge(__other, override)
def mark_as_fetched(self, *url_hash_list: List[str]): def mark_as_fetched(self, *url_hash_list: List[str]):
for url_hash in url_hash_list: for url_hash in url_hash_list:
self._fetched_from[url_hash] = { self._fetched_from[url_hash] = {

View File

@ -49,6 +49,7 @@ class Song(Base):
source_collection: SourceCollection source_collection: SourceCollection
target_collection: Collection[Target] target_collection: Collection[Target]
lyrics_collection: Collection[Lyrics] lyrics_collection: Collection[Lyrics]
main_artist_collection: Collection[Artist] main_artist_collection: Collection[Artist]
feature_artist_collection: Collection[Artist] feature_artist_collection: Collection[Artist]
album_collection: Collection[Album] album_collection: Collection[Album]
@ -241,13 +242,17 @@ class Album(Base):
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection")
def __init_collections__(self): def __init_collections__(self):
self.song_collection.contain_attribute_in_given = {
"main_artist_collection": self.artist_collection
}
self.song_collection.append_object_to_attribute = { self.song_collection.append_object_to_attribute = {
"album_collection": self "album_collection": self
} }
self.artist_collection.append_object_to_attribute = {
"main_album_collection": self
}
self.artist_collection.contain_given_in_attribute = {
"label_collection": self.label_collection
}
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
if object_type is Song: if object_type is Song:
self.song_collection.extend(object_list) self.song_collection.extend(object_list)
@ -642,6 +647,15 @@ class Label(Base):
contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list, contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list,
**kwargs) **kwargs)
def __init_collections__(self):
self.album_collection.append_object_to_attribute = {
"label_collection": self
}
self.current_artist_collection.append_object_to_attribute = {
"label_collection": self
}
@property @property
def indexing_values(self) -> List[Tuple[str, object]]: def indexing_values(self) -> List[Tuple[str, object]]:
return [ return [

View File

@ -1,7 +1,7 @@
from collections import defaultdict from collections import defaultdict
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from typing import List, Optional, Type, Union from typing import List, Optional, Type, Union, Generator
from urllib.parse import urlparse from urllib.parse import urlparse
import pycountry import pycountry
@ -1056,7 +1056,7 @@ class Musify(Page):
date=date date=date
) )
def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> List[Album]: def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]:
""" """
POST https://musify.club/artist/filteralbums POST https://musify.club/artist/filteralbums
ArtistID: 280348 ArtistID: 280348
@ -1077,18 +1077,8 @@ class Musify(Page):
return [] return []
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
discography: List[Album] = []
for card_soup in soup.find_all("div", {"class": "card"}): for card_soup in soup.find_all("div", {"class": "card"}):
new_album: Album = self._parse_album_card(card_soup, artist_name) yield self._parse_album_card(card_soup, artist_name)
album_source: Source
if stop_at_level > 1:
for album_source in new_album.source_collection.get_sources_from_page(self.SOURCE_TYPE):
new_album.merge(self.fetch_album(album_source, stop_at_level=stop_at_level-1))
discography.append(new_album)
return discography
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
""" """
@ -1110,8 +1100,7 @@ class Musify(Page):
artist = self._get_artist_attributes(url) artist = self._get_artist_attributes(url)
discography: List[Album] = self._get_discography(url, artist.name) artist.main_album_collection.extend(self._get_discography(url, artist.name))
artist.main_album_collection.extend(discography)
return artist return artist

View File

@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True DEBUG = (__stage__ == "dev") and True
DEBUG_LOGGING = DEBUG and True DEBUG_LOGGING = DEBUG and True
DEBUG_TRACE = DEBUG and True DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_OBJECT_TRACE = DEBUG and True
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False DEBUG_PAGES = DEBUG and False
DEBUG_DUMP = DEBUG and True DEBUG_DUMP = DEBUG and True