Compare commits
No commits in common. "b15d0839ef77349511146becb8bbe8417a6fe89e" and "0b216b7d40476f6566c7bd7c7cc7ccb91e238f5a" have entirely different histories.
b15d0839ef
...
0b216b7d40
@ -46,7 +46,7 @@ init_logging()
|
||||
from . import cli
|
||||
|
||||
if DEBUG:
|
||||
sys.setrecursionlimit(300)
|
||||
sys.setrecursionlimit(500)
|
||||
|
||||
|
||||
if main_settings['modify_gc']:
|
||||
|
@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator
|
||||
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple
|
||||
from .parents import OuterProxy
|
||||
|
||||
T = TypeVar('T', bound=OuterProxy)
|
||||
@ -35,24 +35,39 @@ class Collection(Generic[T]):
|
||||
# Key: collection attribute (str) of appended element
|
||||
# Value: main collection to sync to
|
||||
self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {}
|
||||
self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {}
|
||||
self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {}
|
||||
|
||||
self.contain_self_on_append: List[str] = []
|
||||
|
||||
self._indexed_values = defaultdict(set)
|
||||
self._indexed_to_objects = defaultdict(list)
|
||||
|
||||
self.extend(data)
|
||||
|
||||
def _map_element(self, __object: T, from_map: bool = False):
|
||||
__object._inner._mapped_in_collection.add(self)
|
||||
if __object.id in self._contains_ids:
|
||||
return
|
||||
|
||||
self._contains_ids.add(__object.id)
|
||||
|
||||
for name, value in __object.indexing_values:
|
||||
if value is None or value == __object._inner._default_values.get(name):
|
||||
if value is None:
|
||||
continue
|
||||
|
||||
self._indexed_values[name].add(value)
|
||||
self._indexed_to_objects[value].append(__object)
|
||||
|
||||
if not from_map:
|
||||
for attribute, new_object in self.contain_given_in_attribute.items():
|
||||
__object.__getattribute__(attribute).contain_collection_inside(new_object)
|
||||
|
||||
for attribute, new_object in self.contain_attribute_in_given.items():
|
||||
new_object.contain_collection_inside(__object.__getattribute__(attribute))
|
||||
|
||||
for attribute, new_object in self.append_object_to_attribute.items():
|
||||
__object.__getattribute__(attribute).append(new_object)
|
||||
|
||||
def _unmap_element(self, __object: T):
|
||||
if __object.id in self._contains_ids:
|
||||
self._contains_ids.remove(__object.id)
|
||||
@ -171,8 +186,15 @@ class Collection(Generic[T]):
|
||||
def contains(self, __object: T) -> bool:
|
||||
return len(self._contained_in_sub(__object)) > 0
|
||||
|
||||
def _append(self, __object: T, from_map: bool = False):
|
||||
print(self, __object)
|
||||
self._map_element(__object, from_map=from_map)
|
||||
self._data.append(__object)
|
||||
|
||||
def _find_object_in_self(self, __object: T) -> Optional[T]:
|
||||
for name, value in __object.indexing_values:
|
||||
if value is None or value == __object._default_factories.get(name, lambda: None)():
|
||||
continue
|
||||
if value in self._indexed_values[name]:
|
||||
return self._indexed_to_objects[value][0]
|
||||
|
||||
@ -189,7 +211,6 @@ class Collection(Generic[T]):
|
||||
if no_sibling:
|
||||
return self, None
|
||||
|
||||
"""
|
||||
# find in siblings and all children of siblings
|
||||
for parent in self.parents:
|
||||
for sibling in parent.children:
|
||||
@ -199,7 +220,6 @@ class Collection(Generic[T]):
|
||||
o, other_object = sibling._find_object(__object, no_sibling=True)
|
||||
if other_object is not None:
|
||||
return o, other_object
|
||||
"""
|
||||
|
||||
return self, None
|
||||
|
||||
@ -215,7 +235,7 @@ class Collection(Generic[T]):
|
||||
:return:
|
||||
"""
|
||||
|
||||
if __object is None:
|
||||
if __object is None or __object.id in self._contains_ids:
|
||||
return
|
||||
|
||||
append_to, existing_object = self._find_object(__object)
|
||||
@ -223,47 +243,54 @@ class Collection(Generic[T]):
|
||||
if existing_object is None:
|
||||
# append
|
||||
append_to._data.append(__object)
|
||||
append_to._map_element(__object)
|
||||
|
||||
# only modify collections if the object actually has been appended
|
||||
for collection_attribute, child_collection in self.contain_given_in_attribute.items():
|
||||
__object.__getattribute__(collection_attribute).contain_collection_inside(child_collection, __object)
|
||||
|
||||
for attribute, new_object in self.append_object_to_attribute.items():
|
||||
__object.__getattribute__(attribute).append(new_object)
|
||||
append_to._map_element(__object, from_map=from_map)
|
||||
else:
|
||||
# merge only if the two objects are not the same
|
||||
if existing_object.id == __object.id:
|
||||
return
|
||||
|
||||
# merge
|
||||
append_to._unmap_element(existing_object)
|
||||
existing_object.merge(__object)
|
||||
append_to._map_element(existing_object)
|
||||
append_to._map_element(existing_object, from_map=from_map)
|
||||
|
||||
|
||||
def extend(self, __iterable: Optional[Generator[T, None, None]]):
|
||||
def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False):
|
||||
if __iterable is None:
|
||||
return
|
||||
|
||||
for __object in __iterable:
|
||||
self.append(__object)
|
||||
self.append(__object, from_map=from_map)
|
||||
|
||||
def contain_collection_inside(self, sub_collection: Collection, _object: T):
|
||||
def sync_with_other_collection(self, equal_collection: Collection):
|
||||
"""
|
||||
If two collections always need to have the same values, this can be used.
|
||||
|
||||
Internally:
|
||||
1. import the data from other to self
|
||||
- _data
|
||||
- contained_collections
|
||||
2. replace all refs from the other object, with refs from this object
|
||||
"""
|
||||
if equal_collection is self:
|
||||
return
|
||||
|
||||
# don't add the elements from the subelements from the other collection.
|
||||
# this will be done in the next step.
|
||||
self.extend(equal_collection._data)
|
||||
# add all submodules
|
||||
for equal_sub_collection in equal_collection.children:
|
||||
self.contain_collection_inside(equal_sub_collection)
|
||||
|
||||
def contain_collection_inside(self, sub_collection: Collection):
|
||||
"""
|
||||
This collection will ALWAYS contain everything from the passed in collection
|
||||
"""
|
||||
if self is sub_collection or sub_collection in self.children:
|
||||
return
|
||||
|
||||
_object._inner._is_collection_child[self] = sub_collection
|
||||
_object._inner._is_collection_parent[sub_collection] = self
|
||||
|
||||
self.children.append(sub_collection)
|
||||
sub_collection.parents.append(self)
|
||||
|
||||
@property
|
||||
def data(self) -> List[T]:
|
||||
return list(self.__iter__())
|
||||
return list(i for i in self.__iter__())
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._data) + sum(len(collection) for collection in self.children)
|
||||
@ -285,7 +312,7 @@ class Collection(Generic[T]):
|
||||
yield from c.__iter__(finished_ids=finished_ids)
|
||||
|
||||
def __merge__(self, __other: Collection, override: bool = False):
|
||||
self.extend(__other.__iter__())
|
||||
self.extend(__other._data, from_map=True)
|
||||
|
||||
def __getitem__(self, item: int):
|
||||
if item < len(self._data):
|
||||
|
@ -31,14 +31,10 @@ class InnerData:
|
||||
def __init__(self, object_type, **kwargs):
|
||||
self._refers_to_instances =set()
|
||||
|
||||
# collection : collection that is a collection of self
|
||||
self._is_collection_child: Dict[Collection, Collection] = {}
|
||||
self._is_collection_parent: Dict[Collection, Collection] = {}
|
||||
|
||||
# initialize the default values
|
||||
self._default_values = {}
|
||||
self.__default_values = {}
|
||||
for name, factory in object_type._default_factories.items():
|
||||
self._default_values[name] = factory()
|
||||
self.__default_values[name] = factory()
|
||||
|
||||
for key, value in kwargs.items():
|
||||
self.__setattr__(key, value)
|
||||
@ -52,7 +48,7 @@ class InnerData:
|
||||
|
||||
for key, value in __other.__dict__.copy().items():
|
||||
# just set the other value if self doesn't already have it
|
||||
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
|
||||
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self.__default_values.get(key)):
|
||||
self.__setattr__(key, value)
|
||||
continue
|
||||
|
||||
@ -187,7 +183,7 @@ class OuterProxy:
|
||||
if __other is None:
|
||||
return
|
||||
|
||||
object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}]")
|
||||
object_trace(f"merging {type(self).__name__} [{self.title_string}] with {type(__other).__name__} [{__other.title_string}]")
|
||||
|
||||
a = self
|
||||
b = __other
|
||||
@ -200,20 +196,11 @@ class OuterProxy:
|
||||
a, b = b, a
|
||||
|
||||
a._inner.__merge__(b._inner, override=override)
|
||||
for collection, child_collection in b._inner._is_collection_child.items():
|
||||
collection.children.remove(child_collection)
|
||||
|
||||
for collection, parent_collection in b._inner._is_collection_parent.items():
|
||||
collection.parents.remove(parent_collection)
|
||||
|
||||
a._inner._refers_to_instances.update(b._inner._refers_to_instances)
|
||||
|
||||
for instance in b._inner._refers_to_instances:
|
||||
instance._inner = a._inner
|
||||
|
||||
def __merge__(self, __other: Optional[OuterProxy], override: bool = False):
|
||||
self.merge(__other, override)
|
||||
|
||||
def mark_as_fetched(self, *url_hash_list: List[str]):
|
||||
for url_hash in url_hash_list:
|
||||
self._fetched_from[url_hash] = {
|
||||
|
@ -49,7 +49,6 @@ class Song(Base):
|
||||
source_collection: SourceCollection
|
||||
target_collection: Collection[Target]
|
||||
lyrics_collection: Collection[Lyrics]
|
||||
|
||||
main_artist_collection: Collection[Artist]
|
||||
feature_artist_collection: Collection[Artist]
|
||||
album_collection: Collection[Album]
|
||||
@ -242,17 +241,13 @@ class Album(Base):
|
||||
UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection")
|
||||
|
||||
def __init_collections__(self):
|
||||
self.song_collection.contain_attribute_in_given = {
|
||||
"main_artist_collection": self.artist_collection
|
||||
}
|
||||
self.song_collection.append_object_to_attribute = {
|
||||
"album_collection": self
|
||||
}
|
||||
|
||||
self.artist_collection.append_object_to_attribute = {
|
||||
"main_album_collection": self
|
||||
}
|
||||
self.artist_collection.contain_given_in_attribute = {
|
||||
"label_collection": self.label_collection
|
||||
}
|
||||
|
||||
def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
|
||||
if object_type is Song:
|
||||
self.song_collection.extend(object_list)
|
||||
@ -647,15 +642,6 @@ class Label(Base):
|
||||
contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list,
|
||||
**kwargs)
|
||||
|
||||
def __init_collections__(self):
|
||||
self.album_collection.append_object_to_attribute = {
|
||||
"label_collection": self
|
||||
}
|
||||
|
||||
self.current_artist_collection.append_object_to_attribute = {
|
||||
"label_collection": self
|
||||
}
|
||||
|
||||
@property
|
||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
||||
return [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Type, Union, Generator
|
||||
from typing import List, Optional, Type, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pycountry
|
||||
@ -1056,7 +1056,7 @@ class Musify(Page):
|
||||
date=date
|
||||
)
|
||||
|
||||
def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]:
|
||||
def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> List[Album]:
|
||||
"""
|
||||
POST https://musify.club/artist/filteralbums
|
||||
ArtistID: 280348
|
||||
@ -1077,8 +1077,18 @@ class Musify(Page):
|
||||
return []
|
||||
soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser")
|
||||
|
||||
discography: List[Album] = []
|
||||
for card_soup in soup.find_all("div", {"class": "card"}):
|
||||
yield self._parse_album_card(card_soup, artist_name)
|
||||
new_album: Album = self._parse_album_card(card_soup, artist_name)
|
||||
album_source: Source
|
||||
|
||||
if stop_at_level > 1:
|
||||
for album_source in new_album.source_collection.get_sources_from_page(self.SOURCE_TYPE):
|
||||
new_album.merge(self.fetch_album(album_source, stop_at_level=stop_at_level-1))
|
||||
|
||||
discography.append(new_album)
|
||||
|
||||
return discography
|
||||
|
||||
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
|
||||
"""
|
||||
@ -1100,7 +1110,8 @@ class Musify(Page):
|
||||
|
||||
artist = self._get_artist_attributes(url)
|
||||
|
||||
artist.main_album_collection.extend(self._get_discography(url, artist.name))
|
||||
discography: List[Album] = self._get_discography(url, artist.name)
|
||||
artist.main_album_collection.extend(discography)
|
||||
|
||||
return artist
|
||||
|
||||
|
@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod")
|
||||
DEBUG = (__stage__ == "dev") and True
|
||||
DEBUG_LOGGING = DEBUG and True
|
||||
DEBUG_TRACE = DEBUG and True
|
||||
DEBUG_OBJECT_TRACE = DEBUG and True
|
||||
DEBUG_OBJECT_TRACE = DEBUG and False
|
||||
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
|
||||
DEBUG_PAGES = DEBUG and False
|
||||
DEBUG_DUMP = DEBUG and True
|
||||
|
Loading…
Reference in New Issue
Block a user