feat: implemented the merging from where it has been fetched from
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
06ffae06a6
commit
301ff82bcf
@ -32,6 +32,7 @@ class InnerData:
|
|||||||
|
|
||||||
def __init__(self, object_type, **kwargs):
|
def __init__(self, object_type, **kwargs):
|
||||||
self._refers_to_instances = set()
|
self._refers_to_instances = set()
|
||||||
|
self._fetched_from: dict = {}
|
||||||
|
|
||||||
# collection : collection that is a collection of self
|
# collection : collection that is a collection of self
|
||||||
self._is_collection_child: Dict[Collection, Collection] = {}
|
self._is_collection_child: Dict[Collection, Collection] = {}
|
||||||
@ -52,6 +53,8 @@ class InnerData:
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
self._fetched_from.update(__other._fetched_from)
|
||||||
|
|
||||||
for key, value in __other.__dict__.copy().items():
|
for key, value in __other.__dict__.copy().items():
|
||||||
# just set the other value if self doesn't already have it
|
# just set the other value if self doesn't already have it
|
||||||
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
|
if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)):
|
||||||
@ -109,7 +112,6 @@ class OuterProxy:
|
|||||||
|
|
||||||
del kwargs[name]
|
del kwargs[name]
|
||||||
|
|
||||||
self._fetched_from: dict = {}
|
|
||||||
self._inner: InnerData = InnerData(type(self), **kwargs)
|
self._inner: InnerData = InnerData(type(self), **kwargs)
|
||||||
self._inner._refers_to_instances.add(self)
|
self._inner._refers_to_instances.add(self)
|
||||||
|
|
||||||
@ -220,13 +222,13 @@ class OuterProxy:
|
|||||||
|
|
||||||
def mark_as_fetched(self, *url_hash_list: List[str]):
|
def mark_as_fetched(self, *url_hash_list: List[str]):
|
||||||
for url_hash in url_hash_list:
|
for url_hash in url_hash_list:
|
||||||
self._fetched_from[url_hash] = {
|
self._inner._fetched_from[url_hash] = {
|
||||||
"time": get_unix_time(),
|
"time": get_unix_time(),
|
||||||
"url": url_hash,
|
"url": url_hash,
|
||||||
}
|
}
|
||||||
|
|
||||||
def already_fetched_from(self, url_hash: str) -> bool:
|
def already_fetched_from(self, url_hash: str) -> bool:
|
||||||
res = self._fetched_from.get(url_hash, None)
|
res = self._inner._fetched_from.get(url_hash, None)
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
return False
|
return False
|
||||||
|
@ -352,10 +352,9 @@ class Bandcamp(Page):
|
|||||||
if len(other_data_list) > 0:
|
if len(other_data_list) > 0:
|
||||||
other_data = json.loads(other_data_list[0]["data-tralbum"])
|
other_data = json.loads(other_data_list[0]["data-tralbum"])
|
||||||
|
|
||||||
if DEBUG:
|
dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
|
||||||
dump_to_file("bandcamp_song_data.json", data_container.text, is_json=True, exit_after_dump=False)
|
dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
|
||||||
dump_to_file("bandcamp_song_data_other.json", json.dumps(other_data), is_json=True, exit_after_dump=False)
|
dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)
|
||||||
dump_to_file("bandcamp_song_page.html", r.text, exit_after_dump=False)
|
|
||||||
|
|
||||||
data = json.loads(data_container.text)
|
data = json.loads(data_container.text)
|
||||||
album_data = data["inAlbum"]
|
album_data = data["inAlbum"]
|
||||||
@ -367,7 +366,7 @@ class Bandcamp(Page):
|
|||||||
|
|
||||||
song = Song(
|
song = Song(
|
||||||
title=clean_song_title(data["name"], artist_name=artist_data["name"]),
|
title=clean_song_title(data["name"], artist_name=artist_data["name"]),
|
||||||
source_list=[Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
|
source_list=[source, Source(self.SOURCE_TYPE, data.get("mainEntityOfPage", data["@id"]), audio_url=mp3_url)],
|
||||||
album_list=[Album(
|
album_list=[Album(
|
||||||
title=album_data["name"].strip(),
|
title=album_data["name"].strip(),
|
||||||
date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"),
|
date=ID3Timestamp.strptime(data["datePublished"], "%d %b %Y %H:%M:%S %Z"),
|
||||||
@ -380,8 +379,6 @@ class Bandcamp(Page):
|
|||||||
lyrics_list=self._fetch_lyrics(soup=soup)
|
lyrics_list=self._fetch_lyrics(soup=soup)
|
||||||
)
|
)
|
||||||
|
|
||||||
song.source_collection.append(source)
|
|
||||||
|
|
||||||
return song
|
return song
|
||||||
|
|
||||||
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
|
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
|
||||||
|
@ -13,7 +13,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"):
|
|||||||
__stage__ = os.getenv("STAGE", "prod")
|
__stage__ = os.getenv("STAGE", "prod")
|
||||||
|
|
||||||
DEBUG = (__stage__ == "dev") and True
|
DEBUG = (__stage__ == "dev") and True
|
||||||
DEBUG_LOGGING = DEBUG and True
|
DEBUG_LOGGING = DEBUG and False
|
||||||
DEBUG_TRACE = DEBUG and True
|
DEBUG_TRACE = DEBUG and True
|
||||||
DEBUG_OBJECT_TRACE = DEBUG and False
|
DEBUG_OBJECT_TRACE = DEBUG and False
|
||||||
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
|
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
|
||||||
|
Loading…
Reference in New Issue
Block a user