From bedd0fe819c068161794d2518567618233b90357 Mon Sep 17 00:00:00 2001
From: Lars Noack <lars@webcontact.de>
Date: Wed, 15 May 2024 13:16:11 +0200
Subject: [PATCH] fix: runtime errors

---
 development/actual_donwload.py                |   5 +-
 music_kraken/cli/main_downloader.py           |   2 +-
 music_kraken/download/__init__.py             |   1 +
 music_kraken/download/page_attributes.py      |  29 ++--
 music_kraken/download/results.py              |   1 -
 music_kraken/objects/parents.py               |   1 +
 music_kraken/objects/source.py                |  34 ++--
 music_kraken/pages/abstract.py                | 151 +-----------------
 music_kraken/pages/bandcamp.py                |   3 +-
 music_kraken/pages/encyclopaedia_metallum.py  |  10 +-
 .../pages/youtube_music/youtube_music.py      |  17 +-
 music_kraken/utils/enums/__init__.py          |   7 +-
 12 files changed, 65 insertions(+), 196 deletions(-)

diff --git a/development/actual_donwload.py b/development/actual_donwload.py
index a8eb732..d91876e 100644
--- a/development/actual_donwload.py
+++ b/development/actual_donwload.py
@@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG)
 
 if __name__ == "__main__":
     commands = [
-        "s: #a Crystal F",
-        "d: 20",
+        "s: #a I'm in a coffin",
+        "0",
+        "d: 0",
     ]
 
     
diff --git a/music_kraken/cli/main_downloader.py b/music_kraken/cli/main_downloader.py
index ac91cab..e3fe2cb 100644
--- a/music_kraken/cli/main_downloader.py
+++ b/music_kraken/cli/main_downloader.py
@@ -317,7 +317,7 @@ class Downloader:
 
         for database_object in data_objects:
             r = self.pages.download(
-                music_object=database_object, 
+                data_object=database_object, 
                 genre=self.genre, 
                 **kwargs
             )
diff --git a/music_kraken/download/__init__.py b/music_kraken/download/__init__.py
index a52bd87..7ca0086 100644
--- a/music_kraken/download/__init__.py
+++ b/music_kraken/download/__init__.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass, field
+from typing import Set
 
 from ..utils.config import main_settings
 from ..utils.enums.album import AlbumType
diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py
index 6b52704..f6f7786 100644
--- a/music_kraken/download/page_attributes.py
+++ b/music_kraken/download/page_attributes.py
@@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List
 from collections import defaultdict
 from pathlib import Path
 import re
+import logging
 
 from . import FetchOptions, DownloadOptions
 from .results import SearchResults
@@ -17,6 +18,7 @@ from ..objects import (
     Label,
 )
 from ..audio import write_metadata_to_target, correct_codec
+from ..utils import output, BColors
 from ..utils.string_processing import fit_to_file_system
 from ..utils.config import youtube_settings, main_settings
 from ..utils.path_manager import LOCATIONS
@@ -69,6 +71,8 @@ if DEBUG_PAGES:
 
 class Pages:
     def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
+        self.LOGGER = logging.getLogger("download")
+        
         self.download_options: DownloadOptions = download_options or DownloadOptions()
         self.fetch_options: FetchOptions = fetch_options or FetchOptions()
 
@@ -118,7 +122,9 @@ class Pages:
             return data_object
         
         source: Source
-        for source in data_object.source_collection.get_sources():
+        for source in data_object.source_collection.get_sources(source_type_sorting={
+            "only_with_page": True,
+        }):
             new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
             if new_data_object is not None:
                 data_object.merge(new_data_object)
@@ -129,10 +135,15 @@ class Pages:
         if not source.has_page:
             return None
         
-        func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs)
+        source_type = source.page.get_source_type(source=source)
+        if source_type is None:
+            self.LOGGER.debug(f"Could not determine source type for {source}.")
+            return None
+
+        func = getattr(source.page, fetch_map[source_type])
         
         # fetching the data object and marking it as fetched
-        data_object: DataObject = func(source=source)
+        data_object: DataObject = func(source=source, **kwargs)
         data_object.mark_as_fetched(source.hash_url)
         return data_object
 
@@ -175,7 +186,7 @@ class Pages:
         
         # download all children
         download_result: DownloadResult = DownloadResult()
-        for c in data_object.get_children():
+        for c in data_object.get_child_collections():
             for d in c:
                 if self._skip_object(d):
                     continue
@@ -209,7 +220,7 @@ class Pages:
 
             path_template = path_template.replace(f"{{{field}}}", naming[field][0])
 
-        return possible_parts
+        return path_template
 
     def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
         """
@@ -235,7 +246,7 @@ class Pages:
         # removing duplicates from the naming, and process the strings
         for key, value in naming.items():
             # https://stackoverflow.com/a/17016257
-            naming[key] = list(dict.fromkeys(items))
+            naming[key] = list(dict.fromkeys(value))
 
         # manage the targets
         tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
@@ -248,14 +259,14 @@ class Pages:
             )
         ))
         for target in song.target_collection:
-            if target.exists():
+            if target.exists:
                 output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
                 r.found_on_disk += 1
 
                 if self.download_options.download_again_if_found:
                     target.copy_content(tmp)
             else:
-                target.create_parent_directories()
+                target.create_path()
                 output(f'- {target.file_path}', color=BColors.GREY)
 
         # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source
@@ -294,7 +305,7 @@ class Pages:
         if used_source is not None:
             used_source.page.post_process_hook(song=song, temp_target=tmp)
 
-        if not found_on_disc or self.download_options.process_metadata_if_found:
+        if not found_on_disk or self.download_options.process_metadata_if_found:
             write_metadata_to_target(metadata=song.metadata, target=tmp, song=song)
 
         tmp.delete()
diff --git a/music_kraken/download/results.py b/music_kraken/download/results.py
index 00afea9..2486c26 100644
--- a/music_kraken/download/results.py
+++ b/music_kraken/download/results.py
@@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union
 from dataclasses import dataclass
 
 from ..objects import DatabaseObject
-from ..utils.enums.source import SourceType
 from ..pages import Page, EncyclopaediaMetallum, Musify
 
 
diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py
index 51eb8e6..16ebe6a 100644
--- a/music_kraken/objects/parents.py
+++ b/music_kraken/objects/parents.py
@@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
 from pathlib import Path
 import inspect
 
+from .source import SourceCollection
 from .metadata import Metadata
 from ..utils import get_unix_time, object_trace, generate_id
 from ..utils.config import logging_settings, main_settings
diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py
index 3d0b492..b227cc8 100644
--- a/music_kraken/objects/source.py
+++ b/music_kraken/objects/source.py
@@ -20,13 +20,11 @@ from dataclasses import dataclass, field
 from functools import cached_property
 
 from ..utils import generate_id
-from ..utils.enums import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.config import youtube_settings
 from ..utils.string_processing import hash_url, shorten_display_url
 
 from .metadata import Mapping, Metadata
-from .parents import OuterProxy
-from .collection import Collection
 if TYPE_CHECKING:
     from ..pages.abstract import Page
 
@@ -54,38 +52,38 @@ class Source:
         url = parsed_url.geturl()
         
         if "musify" in parsed_url.netloc:
-            return cls(SourceType.MUSIFY, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page)
 
         if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
-            return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page)
 
         if url.startswith("https://www.deezer"):
-            return cls(SourceType.DEEZER, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page)
         
         if url.startswith("https://open.spotify.com"):
-            return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page)
 
         if "bandcamp" in url:
-            return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page)
 
         if "wikipedia" in parsed_url.netloc:
-            return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page)
 
         if url.startswith("https://www.metal-archives.com/"):
-            return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
 
         # the less important once
         if url.startswith("https://www.facebook"):
-            return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page)
 
         if url.startswith("https://www.instagram"):
-            return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page)
 
         if url.startswith("https://twitter"):
-            return cls(SourceType.TWITTER, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page)
 
         if url.startswith("https://myspace.com"):
-            return cls(SourceType.MYSPACE, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page)
 
     @property
     def has_page(self) -> bool:
@@ -134,7 +132,7 @@ class SourceCollection:
     _sources_by_type: Dict[SourceType, List[Source]]
 
     def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
-        self._page_to_source_list = defaultdict(list)
+        self._sources_by_type = defaultdict(list)
         self._indexed_sources = {}
 
         self.extend(data or [])
@@ -157,7 +155,7 @@ class SourceCollection:
             Iterable[SourceType]: A list of source types.
         """
 
-        source_types: List[SourceType] = self._page_to_source_list.keys()
+        source_types: List[SourceType] = self._sources_by_type.keys()
         if only_with_page:
             source_types = filter(lambda st: st.has_page, source_types)
 
@@ -186,7 +184,7 @@ class SourceCollection:
                 source_types = self.source_types(**source_type_sorting)
 
             for source_type in source_types:
-                yield from self._page_to_source_list[source_type]
+                yield from self._sources_by_type[source_type]
 
     def append(self, source: Source):
         if source is None:
@@ -202,7 +200,7 @@ class SourceCollection:
             existing_source.__merge__(source)
             source = existing_source
         else:
-            self._page_to_source_list[source.source_type].append(source)
+            self._sources_by_type[source.source_type].append(source)
 
         changed = False
         for key in source.indexing_values:
diff --git a/music_kraken/pages/abstract.py b/music_kraken/pages/abstract.py
index f542671..8783dbb 100644
--- a/music_kraken/pages/abstract.py
+++ b/music_kraken/pages/abstract.py
@@ -49,15 +49,16 @@ class DownloadOptions:
 
 class Page:
     SOURCE_TYPE: SourceType
-    LOGGER: LOGGER
+    LOGGER: logging.Logger
 
     def __new__(cls, *args, **kwargs):
-        cls.SOURCE_TYPE.register_page(cls)
         cls.LOGGER = logging.getLogger(cls.__name__)
 
         return super().__new__(cls)
 
     def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
+        self.SOURCE_TYPE.register_page(self)
+        
         self.download_options: DownloadOptions = download_options or DownloadOptions()
         self.fetch_options: FetchOptions = fetch_options or FetchOptions()
 
@@ -145,151 +146,7 @@ class Page:
     def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
         return Label()
 
-    def download(
-        self, 
-        music_object: DatabaseObject, 
-        genre: str, 
-    ) -> DownloadResult:
-        naming_dict: NamingDict = NamingDict({"genre": genre})
-
-        def fill_naming_objects(naming_music_object: DatabaseObject):
-            nonlocal naming_dict
-
-            for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
-                collection: Collection = getattr(naming_music_object, collection_name)
-
-                if collection.empty:
-                    continue
-                
-                dom_ordered_music_object: DatabaseObject = collection[0]
-                naming_dict.add_object(dom_ordered_music_object)
-                return fill_naming_objects(dom_ordered_music_object)
-
-        fill_naming_objects(music_object)
-
-        return self._download(music_object, naming_dict)
-
-    def _download(
-        self, 
-        music_object: DatabaseObject, 
-        naming_dict: NamingDict, 
-        **kwargs
-    ) -> DownloadResult:
-        if isinstance(music_object, Song):
-            output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
-        else:
-            output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
-
-        # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
-        if isinstance(music_object, Album):
-            if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
-                return DownloadResult()
-
-        if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
-            self.fetch_details(music_object=music_object, stop_at_level=1)
-
-        if isinstance(music_object, Album):
-            music_object.update_tracksort()
-            
-        naming_dict.add_object(music_object)
-
-        if isinstance(music_object, Song):
-            return self._download_song(music_object, naming_dict)
-
-        download_result: DownloadResult = DownloadResult()
-
-        for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
-            collection: Collection = getattr(music_object, collection_name)
-
-            sub_ordered_music_object: DatabaseObject
-            for sub_ordered_music_object in collection:
-                download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
-
-        return download_result
-
-    def _download_song(self, song: Song, naming_dict: NamingDict):
-        song.compile()
-        if "genre" not in naming_dict and song.genre is not None:
-            naming_dict["genre"] = song.genre
-
-        if song.genre is None:
-            song.genre = naming_dict["genre"]
-
-        path_parts = Formatter().parse(main_settings["download_path"])
-        file_parts = Formatter().parse(main_settings["download_file"])
-        new_target = Target(
-            relative_to_music_dir=True,
-            file_path=Path(
-                main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
-                main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
-            )
-        )
-
-        if song.target_collection.empty:
-            song.target_collection.append(new_target)
-
-        r = DownloadResult(1)
-        temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
-
-        found_on_disc = False
-        target: Target
-        for target in song.target_collection:
-            current_exists = target.exists
-
-            if current_exists:
-                output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
-                target.copy_content(temp_target)
-                found_on_disc = True
-
-                r.found_on_disk += 1
-                r.add_target(target)
-            else:
-                output(f'- {target.file_path}', color=BColors.GREY)
-
-
-        sources = song.source_collection.get_sources(self.SOURCE_TYPE)
-
-        skip_intervals = []
-        if not found_on_disc:
-            for source in sources:
-                r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
-
-                if not r.is_fatal_error:
-                    skip_intervals = self.get_skip_intervals(song, source)
-                    break
-        
-        if temp_target.exists:
-            r.merge(self._post_process_targets(
-                song=song, 
-                temp_target=temp_target,
-                skip_intervals=skip_intervals,
-                found_on_disc=found_on_disc,
-            ))
-
-        return r
-
-    def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult:
-        if not found_on_disc or self.download_options.process_audio_if_found:
-            correct_codec(temp_target, skip_intervals=skip_intervals)
-
-        self.post_process_hook(song, temp_target)
-
-        if not found_on_disc or self.download_options.process_metadata_if_found:
-            write_metadata_to_target(song.metadata, temp_target, song)
-
-        r = DownloadResult()
-
-        target: Target
-        for target in song.target_collection:
-            if temp_target is not target:
-                temp_target.copy_content(target)
-            r.add_target(target)
-
-        temp_target.delete()
-        r.sponsor_segments += len(skip_intervals)
-
-        return r
-
+    # to download stuff
     def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
         return []
 
diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py
index 30dbbb0..c938189 100644
--- a/music_kraken/pages/bandcamp.py
+++ b/music_kraken/pages/bandcamp.py
@@ -62,8 +62,7 @@ class Bandcamp(Page):
         super().__init__(*args, **kwargs)
 
     def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
-        parsed_url = urlparse(source.url)
-        path = parsed_url.path.replace("/", "")
+        path = source.parsed_url.path.replace("/", "")
 
         if path == "" or path.startswith("music"):
             return Artist
diff --git a/music_kraken/pages/encyclopaedia_metallum.py b/music_kraken/pages/encyclopaedia_metallum.py
index 6ebd1d7..9c1fefe 100644
--- a/music_kraken/pages/encyclopaedia_metallum.py
+++ b/music_kraken/pages/encyclopaedia_metallum.py
@@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
 from ..connection import Connection
 from ..utils.config import logging_settings
 from .abstract import Page
-from ..utils.enums.source import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.enums.album import AlbumType
 from ..utils.support_classes.query import Query
 from ..objects import (
@@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
             _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
         ],
         source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id)
         ]
     )
 
@@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
     return Artist(
         name=artist_name,
         source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url)
         ]
     )
 
@@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
         title=album_name,
         album_type=album_type,
         source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url)
         ],
         artist_list=[
             _artist_from_json(artist_html=artist_html)
@@ -207,7 +207,7 @@ def create_grid(
 
 
 class EncyclopaediaMetallum(Page):
-    SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM
+    SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
     LOGGER = logging_settings["metal_archives_logger"]
     
     def __init__(self, **kwargs):
diff --git a/music_kraken/pages/youtube_music/youtube_music.py b/music_kraken/pages/youtube_music/youtube_music.py
index d62df42..2e01805 100644
--- a/music_kraken/pages/youtube_music/youtube_music.py
+++ b/music_kraken/pages/youtube_music/youtube_music.py
@@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path
 
 from ...utils import dump_to_file
 
-from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork
 from ..abstract import Page
 from ...objects import (
-    Artist,
+    DatabaseObject as DataObject,
     Source,
-    SourceType,
+    FormattedText,
+    ID3Timestamp,
+    Artwork,
+    Artist,
     Song,
     Album,
     Label,
     Target,
     Lyrics,
-    FormattedText
 )
 from ...connection import Connection
+from ...utils.enums import SourceType, ALL_SOURCE_TYPES
 from ...utils.enums.album import AlbumType
 from ...utils.support_classes.download_result import DownloadResult
 
@@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = {
 
 class YoutubeMusic(SuperYouTube):
     # CHANGE
-    SOURCE_TYPE = SourceType.YOUTUBE_MUSIC
-    LOGGER = logging_settings["youtube_music_logger"]
+    SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE
 
     def __init__(self, *args, ydl_opts: dict = None, **kwargs):
         self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
@@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube):
             default='{}'
         )) or {}
 
-    def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
+    def get_source_type(self, source: Source) -> Optional[Type[DataObject]]:
         return super().get_source_type(source)
 
-    def general_search(self, search_query: str) -> List[DatabaseObject]:
+    def general_search(self, search_query: str) -> List[DataObject]:
         search_query = search_query.strip()
 
         urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))
diff --git a/music_kraken/utils/enums/__init__.py b/music_kraken/utils/enums/__init__.py
index e460a03..28f0b9f 100644
--- a/music_kraken/utils/enums/__init__.py
+++ b/music_kraken/utils/enums/__init__.py
@@ -14,10 +14,11 @@ class SourceType:
     page_type: Type[Page] = None
     page: Page = None
 
+    def register_page(self, page: Page):
+        self.page = page
 
-    def register_page(self, page_type: Type[Page]):
-        self.page_type = page
-        self.page = page_type()
+    def __hash__(self):
+        return hash(self.name)
 
     @property
     def has_page(self) -> bool: