From 301ff82bcf2efdc176aac1a601a153cd3223b4ae Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Fri, 19 Apr 2024 13:37:12 +0200 Subject: [PATCH] feat: implemented the merging from where it has been fetched from --- music_kraken/objects/parents.py | 8 +++++--- music_kraken/pages/bandcamp.py | 11 ++++------- music_kraken/utils/shared.py | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 7b71269..53bc5bc 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -32,6 +32,7 @@ class InnerData: def __init__(self, object_type, **kwargs): self._refers_to_instances = set() + self._fetched_from: dict = {} # collection : collection that is a collection of self self._is_collection_child: Dict[Collection, Collection] = {} @@ -52,6 +53,8 @@ class InnerData: :return: """ + self._fetched_from.update(__other._fetched_from) + for key, value in __other.__dict__.copy().items(): # just set the other value if self doesn't already have it if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)): @@ -109,7 +112,6 @@ class OuterProxy: del kwargs[name] - self._fetched_from: dict = {} self._inner: InnerData = InnerData(type(self), **kwargs) self._inner._refers_to_instances.add(self) @@ -220,13 +222,13 @@ class OuterProxy: def mark_as_fetched(self, *url_hash_list: List[str]): for url_hash in url_hash_list: - self._fetched_from[url_hash] = { + self._inner._fetched_from[url_hash] = { "time": get_unix_time(), "url": url_hash, } def already_fetched_from(self, url_hash: str) -> bool: - res = self._fetched_from.get(url_hash, None) + res = self._inner._fetched_from.get(url_hash, None) if res is None: return False diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index a670026..52142eb 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -352,10 +352,9 @@ class Bandcamp(Page): if len(other_data_list) > 0: other_data = json.loads(other_data_list[0]["data-tralbum"]) - if DEBUG: - dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False) - dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False) - dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False) + dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False) + dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False) + dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False) data = json.loads(data_container.text) album_data = data["inAlbum"] @@ -367,7 +366,7 @@ class Bandcamp(Page): song = Song( title=clean_song_title(data["name"], artist_name=artist_data["name"]), - source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], + source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)], album_list=[Album( title=album_data["name"].strip(), date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"), @@ -380,8 +379,6 @@ class Bandcamp(Page): lyrics_list=self._fetch_lyrics(soup=soup) ) - song.source_collection.append(source) - return song def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 6676393..b3f30e5 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -13,7 +13,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"): __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True -DEBUG_LOGGING = DEBUG and True +DEBUG_LOGGING = DEBUG and False DEBUG_TRACE = DEBUG and True DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False