fix: stream retry

fix: saving streaming progress on retry
fix: setting the genre of the song
2024-05-15 17:14:01 +02:00 · 2024-05-15 15:04:00 +02:00 · 2024-05-15 14:51:30 +02:00 · 2024-05-15 14:26:19 +02:00 · 2024-05-15 14:21:15 +02:00 · 2024-05-15 14:10:32 +02:00
20 changed files with 225 additions and 274 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -25,6 +25,7 @@
        "encyclopaedia",
        "ENDC",
        "Gitea",
+        "iframe",
        "isrc",
        "levenshtein",
        "metallum",
--- a/development/actual_donwload.py
+++ b/development/actual_donwload.py
@@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG)

 if __name__ == "__main__":
    commands = [
-        "s: #a Crystal F",
-        "d: 20",
+        "s: #a I'm in a coffin",
+        "0",
+        "d: 0",
    ]

    
--- a/music_kraken/audio/codec.py
+++ b/music_kraken/audio/codec.py
@@ -10,12 +10,12 @@ from ..objects import Target
 LOGGER = logging_settings["codex_logger"]


-def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], interval_list: List[Tuple[float, float]] = None):
+def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], audio_format: str = main_settings["audio_format"], skip_intervals: List[Tuple[float, float]] = None):
    if not target.exists:
        LOGGER.warning(f"Target doesn't exist: {target.file_path}")
        return
    
-    interval_list = interval_list or []
+    skip_intervals = skip_intervals or []

    bitrate_b = int(bitrate_kb / 1024)

@@ -29,7 +29,7 @@ def correct_codec(target: Target, bitrate_kb: int = main_settings["bitrate"], au
    
    start = 0
    next_start = 0
-    for end, next_start in interval_list:
+    for end, next_start in skip_intervals:
        aselect_list.append(f"between(t,{start},{end})")
        start = next_start
    aselect_list.append(f"gte(t,{next_start})")
--- a/music_kraken/cli/main_downloader.py
+++ b/music_kraken/cli/main_downloader.py
@@ -178,8 +178,6 @@ class Downloader:
        page_count = 0
        for option in self.current_results.formatted_generator():
            if isinstance(option, Option):
-                _downloadable = self.pages.is_downloadable(option.music_object)
-
                r = f"{BColors.GREY.value}{option.index:0{self.option_digits}}{BColors.ENDC.value} {option.music_object.option_string}"
                print(r)
            else:
@@ -319,7 +317,7 @@ class Downloader:

        for database_object in data_objects:
            r = self.pages.download(
-                music_object=database_object, 
+                data_object=database_object, 
                genre=self.genre, 
                **kwargs
            )
--- a/music_kraken/connection/connection.py
+++ b/music_kraken/connection/connection.py
@@ -317,7 +317,7 @@ class Connection:
            name = kwargs.pop("description")

        if progress > 0:
-            headers = dict() if headers is None else headers
+            headers = kwargs.get("headers", dict())
            headers["Range"] = f"bytes={target.size}-"

        r = self.request(
@@ -366,6 +366,7 @@ class Connection:
            if retry:
                self.LOGGER.warning(f"Retrying stream...")
                accepted_response_codes.add(206)
+                stream_kwargs["progress"] = progress
                return Connection.stream_into(**stream_kwargs)

            return DownloadResult()
--- a/music_kraken/download/init.py
+++ b/music_kraken/download/init.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass, field
+from typing import Set

 from ..utils.config import main_settings
 from ..utils.enums.album import AlbumType
--- a/music_kraken/download/page_attributes.py
+++ b/music_kraken/download/page_attributes.py
@@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List
 from collections import defaultdict
 from pathlib import Path
 import re
+import logging

 from . import FetchOptions, DownloadOptions
 from .results import SearchResults
@@ -16,10 +17,12 @@ from ..objects import (
    Artist,
    Label,
 )
+from ..audio import write_metadata_to_target, correct_codec
+from ..utils import output, BColors
 from ..utils.string_processing import fit_to_file_system
 from ..utils.config import youtube_settings, main_settings
 from ..utils.path_manager import LOCATIONS
-from ..utils.enums import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.support_classes.download_result import DownloadResult
 from ..utils.support_classes.query import Query
 from ..utils.support_classes.download_result import DownloadResult
@@ -68,6 +71,8 @@ if DEBUG_PAGES:

 class Pages:
    def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
+        self.LOGGER = logging.getLogger("download")
+        
        self.download_options: DownloadOptions = download_options or DownloadOptions()
        self.fetch_options: FetchOptions = fetch_options or FetchOptions()

@@ -117,7 +122,9 @@ class Pages:
            return data_object
        
        source: Source
-        for source in data_object.source_collection.get_sources():
+        for source in data_object.source_collection.get_sources(source_type_sorting={
+            "only_with_page": True,
+        }):
            new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
            if new_data_object is not None:
                data_object.merge(new_data_object)
@@ -125,36 +132,27 @@ class Pages:
        return data_object

    def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
-        page: Page = self._get_page_from_enum(source.source_type)
-        if page is None:
+        if not source.has_page:
            return None
        
-        # getting the appropriate function for the page and the object type
-        source_type = page.get_source_type(source)
-        if not hasattr(page, fetch_map[source_type]):
+        source_type = source.page.get_source_type(source=source)
+        if source_type is None:
+            self.LOGGER.debug(f"Could not determine source type for {source}.")
            return None
-        func = getattr(page, fetch_map[source_type])(source=source, **kwargs)
+
+        func = getattr(source.page, fetch_map[source_type])
        
        # fetching the data object and marking it as fetched
-        data_object: DataObject = func(source=source)
+        data_object: DataObject = func(source=source, **kwargs)
        data_object.mark_as_fetched(source.hash_url)
        return data_object

    def fetch_from_url(self, url: str) -> Optional[DataObject]:
-        source = Source.match_url(url, SourceType.MANUAL)
+        source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
        if source is None:
            return None
        
        return self.fetch_from_source(source=source)
-
-    def is_downloadable(self, music_object: DataObject) -> bool:
-        _page_types = set(self._source_to_page)
-        for src in music_object.source_collection.source_pages:
-            if src in self._source_to_page:
-                _page_types.add(self._source_to_page[src])
-
-        audio_pages = self._audio_pages_set.intersection(_page_types)
-        return len(audio_pages) > 0
    
    def _skip_object(self, data_object: DataObject) -> bool:
        if isinstance(data_object, Album):
@@ -166,6 +164,7 @@ class Pages:
    def download(self, data_object: DataObject, genre: str, **kwargs) -> DownloadResult:
        # fetch the given object
        self.fetch_details(data_object)
+        output(f"\nDownloading {data_object.option_string}...", color=BColors.BOLD)
        
        # fetching all parent objects (e.g. if you only download a song)
        if not kwargs.get("fetched_upwards", False):
@@ -188,7 +187,7 @@ class Pages:
        
        # download all children
        download_result: DownloadResult = DownloadResult()
-        for c in data_object.get_children():
+        for c in data_object.get_child_collections():
            for d in c:
                if self._skip_object(d):
                    continue
@@ -205,7 +204,7 @@ class Pages:

            self._download_song(data_object, naming={
                "genre": [genre],
-                "audio_format": main_settings["audio_format"],
+                "audio_format": [main_settings["audio_format"]],
            })

        return download_result
@@ -222,12 +221,7 @@ class Pages:

            path_template = path_template.replace(f"{{{field}}}", naming[field][0])

-        return possible_parts
-
-    def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]:
-        pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()]
-        pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True)
-        return list(pages)
+        return path_template

    def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
        """
@@ -242,7 +236,6 @@ class Pages:
        # manage the naming
        naming: Dict[str, List[str]] = defaultdict(list, naming)
        naming["song"].append(song.title_string)
-        naming["genre"].append(song.genre)
        naming["isrc"].append(song.isrc)
        naming["album"].extend(a.title_string for a in song.album_collection)
        naming["album_type"].extend(a.album_type.value for a in song.album_collection)
@@ -253,11 +246,11 @@ class Pages:
        # removing duplicates from the naming, and process the strings
        for key, value in naming.items():
            # https://stackoverflow.com/a/17016257
-            naming[key] = list(dict.fromkeys(items))
+            naming[key] = list(dict.fromkeys(value))
+        song.genre = naming["genre"][0]

        # manage the targets
        tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
-        found_on_disc = False

        song.target_collection.append(Target(
            relative_to_music_dir=True,
@@ -267,24 +260,64 @@ class Pages:
            )
        ))
        for target in song.target_collection:
-            if target.exists():
-                output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
-
-                found_on_disc = True
+            if target.exists:
+                output(f'{target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
                r.found_on_disk += 1
-                target.copy_content(tmp)
+
+                if not self.download_options.download_again_if_found:
+                    target.copy_content(tmp)
            else:
-                target.create_parent_directories()
-                output(f'- {target.file_path}', color=BColors.GREY)
+                target.create_path()
+                output(f'{target.file_path}', color=BColors.GREY)

-        # actually download
-        for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"):
-            r = page.download_song_to_target(song, tmp, r)
+        # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source
+        used_source: Optional[Source] = None
+        skip_intervals: List[Tuple[float, float]] = []
+        for source in song.source_collection.get_sources(source_type_sorting={
+            "only_with_page": True,
+            "sort_key": lambda page: page.download_priority,
+            "reverse": True,
+        }):
+            if tmp.exists:
+                break

+            used_source = source
+            streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download")
+            skip_intervals = source.page.get_skip_intervals(song=song, source=source)
+
+            # if something has been downloaded but it somehow failed, delete the file
+            if streaming_results.is_fatal_error and tmp.exists:
+                tmp.delete()
+
+        # if everything went right, the file should exist now
+        if not tmp.exists:
+            if used_source is None:
+                r.error_message = f"No source found for {song.option_string}."
+            else:
+                r.error_message = f"Something went wrong downloading {song.option_string}."
+            return r
+
+        # post process the audio
+        found_on_disk = used_source is None
+        if not found_on_disk or self.download_options.process_audio_if_found:
+            correct_codec(target=tmp, skip_intervals=skip_intervals)
+            r.sponsor_segments = len(skip_intervals)
+
+        if used_source is not None:
+            used_source.page.post_process_hook(song=song, temp_target=tmp)
+
+        if not found_on_disk or self.download_options.process_metadata_if_found:
+            write_metadata_to_target(metadata=song.metadata, target=tmp, song=song)
+
+        # copy the tmp target to the final locations
+        for target in song.target_collection:
+            tmp.copy_content(target)
+
+        tmp.delete()
        return r

    def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
-        source = Source.match_url(url, SourceType.MANUAL)
+        source = Source.match_url(url, ALL_SOURCE_TYPES.MANUAL)
        
        if source is None:
            raise UrlNotFoundException(url=url)
--- a/music_kraken/download/results.py
+++ b/music_kraken/download/results.py
@@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union
 from dataclasses import dataclass

 from ..objects import DatabaseObject
-from ..utils.enums.source import SourceType
 from ..pages import Page, EncyclopaediaMetallum, Musify


--- a/music_kraken/objects/formatted_text.py
+++ b/music_kraken/objects/formatted_text.py
@@ -38,8 +38,13 @@ class FormattedText:
    def markdown(self) -> str:
        return md(self.html).strip()

+    @property
+    def plain(self) -> str:
+        md = self.markdown
+        return md.replace("\n\n", "\n")
+
    def __str__(self) -> str:
        return self.markdown

-    plaintext = markdown
+    plaintext = plain
    
--- a/music_kraken/objects/lyrics.py
+++ b/music_kraken/objects/lyrics.py
@@ -34,6 +34,6 @@ class Lyrics(OuterProxy):
    @property
    def metadata(self) -> Metadata:
        return Metadata({
-            id3Mapping.UNSYNCED_LYRICS: [self.text.markdown]
+            id3Mapping.UNSYNCED_LYRICS: [self.text.plaintext]
        })

--- a/music_kraken/objects/parents.py
+++ b/music_kraken/objects/parents.py
@@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
 from pathlib import Path
 import inspect

+from .source import SourceCollection
 from .metadata import Metadata
 from ..utils import get_unix_time, object_trace, generate_id
 from ..utils.config import logging_settings, main_settings
--- a/music_kraken/objects/song.py
+++ b/music_kraken/objects/song.py
@@ -155,9 +155,6 @@ class Song(Base):
        self.main_artist_collection.extend_object_to_attribute = {
            "main_album_collection": self.album_collection
        }
-        self.feature_artist_collection.append_object_to_attribute = {
-            "feature_song_collection": self
-        }

        self.feature_artist_collection.push_to = [self.main_artist_collection]
        self.main_artist_collection.pull_from = [self.feature_artist_collection]
@@ -464,7 +461,6 @@ class Artist(Base):
    source_collection: SourceCollection
    contact_collection: Collection[Contact]

-    feature_song_collection: Collection[Song]
    main_album_collection: Collection[Album]
    label_collection: Collection[Label]

@@ -479,7 +475,6 @@ class Artist(Base):
        "general_genre": lambda: "",

        "source_collection": SourceCollection,
-        "feature_song_collection": Collection,
        "main_album_collection": Collection,
        "contact_collection": Collection,
        "label_collection": Collection,
@@ -511,14 +506,10 @@ class Artist(Base):
        Base.__init__(**real_kwargs)


-    DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection", "feature_song_collection")
+    DOWNWARDS_COLLECTION_STRING_ATTRIBUTES = ("main_album_collection",)
    UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("label_collection",)

    def __init_collections__(self):
-        self.feature_song_collection.append_object_to_attribute = {
-            "feature_artist_collection": self
-        }
-
        self.main_album_collection.append_object_to_attribute = {
            "artist_collection": self
        }
@@ -530,7 +521,6 @@ class Artist(Base):
    def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]):
        if object_type is Song:
            # this doesn't really make sense
-            # self.feature_song_collection.extend(object_list)
            return

        if object_type is Artist:
@@ -628,8 +618,6 @@ class Artist(Base):
        if len(self.main_album_collection) > 0:
            r += f" with {len(self.main_album_collection)} albums"
        
-        if len(self.feature_song_collection) > 0:
-            r += f" featured in {len(self.feature_song_collection)} songs"
        r += BColors.ENDC.value

        return r
--- a/music_kraken/objects/source.py
+++ b/music_kraken/objects/source.py
@@ -2,19 +2,31 @@ from __future__ import annotations

 from collections import defaultdict
 from enum import Enum
-from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator
+from typing import (
+    List, 
+    Dict, 
+    Set, 
+    Tuple, 
+    Optional, 
+    Iterable, 
+    Generator, 
+    TypedDict, 
+    Callable, 
+    Any,
+    TYPE_CHECKING
+)
 from urllib.parse import urlparse, ParseResult
 from dataclasses import dataclass, field
 from functools import cached_property

 from ..utils import generate_id
-from ..utils.enums import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.config import youtube_settings
 from ..utils.string_processing import hash_url, shorten_display_url

 from .metadata import Mapping, Metadata
-from .parents import OuterProxy
-from .collection import Collection
+if TYPE_CHECKING:
+    from ..pages.abstract import Page



@@ -29,10 +41,6 @@ class Source:

    def __post_init__(self):
        self.referrer_page = self.referrer_page or self.source_type
-    
-    @property
-    def parsed_url(self) -> ParseResult:
-        return urlparse(self.url)

    @classmethod
    def match_url(cls, url: str, referrer_page: SourceType) -> Optional[Source]:
@@ -44,38 +52,50 @@ class Source:
        url = parsed_url.geturl()
        
        if "musify" in parsed_url.netloc:
-            return cls(SourceType.MUSIFY, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page)

        if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
-            return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page)

        if url.startswith("https://www.deezer"):
-            return cls(SourceType.DEEZER, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page)
        
        if url.startswith("https://open.spotify.com"):
-            return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page)

        if "bandcamp" in url:
-            return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page)

        if "wikipedia" in parsed_url.netloc:
-            return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page)

        if url.startswith("https://www.metal-archives.com/"):
-            return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)

        # the less important once
        if url.startswith("https://www.facebook"):
-            return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page)

        if url.startswith("https://www.instagram"):
-            return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page)

        if url.startswith("https://twitter"):
-            return cls(SourceType.TWITTER, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page)

        if url.startswith("https://myspace.com"):
-            return cls(SourceType.MYSPACE, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page)
+
+    @property
+    def has_page(self) -> bool:
+        return self.source_type.page is not None
+    
+    @property
+    def page(self) -> Page:
+        return self.source_type.page
+
+    @property
+    def parsed_url(self) -> ParseResult:
+        return urlparse(self.url)

    @property
    def hash_url(self) -> str:
@@ -99,27 +119,72 @@ class Source:
    page_str = property(fget=lambda self: self.source_type.value)


+class SourceTypeSorting(TypedDict):
+    sort_key: Callable[[SourceType], Any]
+    reverse: bool
+    only_with_page: bool
+
+
 class SourceCollection:
    __change_version__ = generate_id()

    _indexed_sources: Dict[str, Source]
-    _page_to_source_list: Dict[SourceType, List[Source]]
+    _sources_by_type: Dict[SourceType, List[Source]]

    def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
-        self._page_to_source_list = defaultdict(list)
+        self._sources_by_type = defaultdict(list)
        self._indexed_sources = {}

        self.extend(data or [])

-    def has_source_page(self, *source_pages: SourceType) -> bool:
-        return any(source_page in self._page_to_source_list for source_page in source_pages)
+    def source_types(
+        self, 
+        only_with_page: bool = False, 
+        sort_key = lambda page: page.name, 
+        reverse: bool = False
+    ) -> Iterable[SourceType]:
+        """
+        Returns a list of all source types contained in this source collection.

-    def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
-        if not len(source_pages):
-            source_pages = self.source_pages
+        Args:
+            only_with_page (bool, optional): If True, only returns source types that have a page, meaning you can download from them.
+            sort_key (function, optional): A function that defines the sorting key for the source types. Defaults to lambda page: page.name.
+            reverse (bool, optional): If True, sorts the source types in reverse order. Defaults to False.

-        for page in source_pages:
-            yield from self._page_to_source_list[page]
+        Returns:
+            Iterable[SourceType]: A list of source types.
+        """
+
+        source_types: List[SourceType] = self._sources_by_type.keys()
+        if only_with_page:
+            source_types = filter(lambda st: st.has_page, source_types)
+
+        return sorted(
+            source_types, 
+            key=sort_key, 
+            reverse=reverse
+        )
+
+    def get_sources(self, *source_types: List[SourceType], source_type_sorting: SourceTypeSorting = None) -> Generator[Source]:
+            """
+            Retrieves sources based on the provided source types and source type sorting.
+
+            Args:
+                *source_types (List[Source]): Variable number of source types to filter the sources.
+                source_type_sorting (SourceTypeSorting): Sorting criteria for the source types. This is only relevant if no source types are provided.
+
+            Yields:
+                Generator[Source]: A generator that yields the sources based on the provided filters.
+
+            Returns:
+                None
+            """
+            if not len(source_types):
+                source_type_sorting = source_type_sorting or {}
+                source_types = self.source_types(**source_type_sorting)
+
+            for source_type in source_types:
+                yield from self._sources_by_type[source_type]

    def append(self, source: Source):
        if source is None:
@@ -135,7 +200,7 @@ class SourceCollection:
            existing_source.__merge__(source)
            source = existing_source
        else:
-            self._page_to_source_list[source.source_type].append(source)
+            self._sources_by_type[source.source_type].append(source)

        changed = False
        for key in source.indexing_values:
@@ -156,10 +221,6 @@ class SourceCollection:
    def __merge__(self, other: SourceCollection, **kwargs):
        self.extend(other)
        
-    @property
-    def source_pages(self) -> Iterable[SourceType]:
-        return sorted(self._page_to_source_list.keys(), key=lambda page: page.value)
-
    @property
    def hash_url_list(self) -> List[str]:
        return [hash_url(source.url) for source in self.get_sources()]
@@ -170,7 +231,7 @@ class SourceCollection:

    @property
    def homepage_list(self) -> List[str]:
-        return [source.homepage for source in self.source_pages]
+        return [source_type.homepage for source_type in self._sources_by_type.keys()]

    def indexing_values(self) -> Generator[Tuple[str, str], None, None]:
        for index in self._indexed_sources:
--- a/music_kraken/pages/abstract.py
+++ b/music_kraken/pages/abstract.py
@@ -49,15 +49,16 @@ class DownloadOptions:

 class Page:
    SOURCE_TYPE: SourceType
-    LOGGER: LOGGER
+    LOGGER: logging.Logger

    def __new__(cls, *args, **kwargs):
-        cls.SOURCE_TYPE.register_page(cls)
        cls.LOGGER = logging.getLogger(cls.__name__)

        return super().__new__(cls)

    def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
+        self.SOURCE_TYPE.register_page(self)
+        
        self.download_options: DownloadOptions = download_options or DownloadOptions()
        self.fetch_options: FetchOptions = fetch_options or FetchOptions()

@@ -145,153 +146,7 @@ class Page:
    def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
        return Label()

-    def download(
-        self, 
-        music_object: DatabaseObject, 
-        genre: str, 
-    ) -> DownloadResult:
-        naming_dict: NamingDict = NamingDict({"genre": genre})
-
-        def fill_naming_objects(naming_music_object: DatabaseObject):
-            nonlocal naming_dict
-
-            for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
-                collection: Collection = getattr(naming_music_object, collection_name)
-
-                if collection.empty:
-                    continue
-                
-                dom_ordered_music_object: DatabaseObject = collection[0]
-                naming_dict.add_object(dom_ordered_music_object)
-                return fill_naming_objects(dom_ordered_music_object)
-
-        fill_naming_objects(music_object)
-
-        return self._download(music_object, naming_dict)
-
-    def _download(
-        self, 
-        music_object: DatabaseObject, 
-        naming_dict: NamingDict, 
-        **kwargs
-    ) -> DownloadResult:
-        if isinstance(music_object, Song):
-            output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
-        else:
-            output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
-
-        # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
-        if isinstance(music_object, Album):
-            if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
-                return DownloadResult()
-
-        if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
-            self.fetch_details(music_object=music_object, stop_at_level=1)
-
-        if isinstance(music_object, Album):
-            music_object.update_tracksort()
-            
-        naming_dict.add_object(music_object)
-
-        if isinstance(music_object, Song):
-            return self._download_song(music_object, naming_dict)
-
-        download_result: DownloadResult = DownloadResult()
-
-        for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
-            collection: Collection = getattr(music_object, collection_name)
-
-            sub_ordered_music_object: DatabaseObject
-            for sub_ordered_music_object in collection:
-                download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
-
-        return download_result
-
-    def _download_song(self, song: Song, naming_dict: NamingDict):
-        song.compile()
-        if "genre" not in naming_dict and song.genre is not None:
-            naming_dict["genre"] = song.genre
-
-        if song.genre is None:
-            song.genre = naming_dict["genre"]
-
-        path_parts = Formatter().parse(main_settings["download_path"])
-        file_parts = Formatter().parse(main_settings["download_file"])
-        new_target = Target(
-            relative_to_music_dir=True,
-            file_path=Path(
-                main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
-                main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
-            )
-        )
-
-        if song.target_collection.empty:
-            song.target_collection.append(new_target)
-
-        r = DownloadResult(1)
-        temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
-
-        found_on_disc = False
-        target: Target
-        for target in song.target_collection:
-            current_exists = target.exists
-
-            if current_exists:
-                output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
-                target.copy_content(temp_target)
-                found_on_disc = True
-
-                r.found_on_disk += 1
-                r.add_target(target)
-            else:
-                output(f'- {target.file_path}', color=BColors.GREY)
-
-        if not song.source_collection.has_source_page(self.SOURCE_TYPE):
-            return DownloadResult(error_message=f"No {self.__class__.__name__} source found for {song.option_string}.")
-
-        sources = song.source_collection.get_sources(self.SOURCE_TYPE)
-
-        skip_intervals = []
-        if not found_on_disc:
-            for source in sources:
-                r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
-
-                if not r.is_fatal_error:
-                    skip_intervals = self.get_skip_intervals(song, source)
-                    break
-        
-        if temp_target.exists:
-            r.merge(self._post_process_targets(
-                song=song, 
-                temp_target=temp_target,
-                interval_list=skip_intervals,
-                found_on_disc=found_on_disc,
-            ))
-
-        return r
-
-    def _post_process_targets(self, song: Song, temp_target: Target, interval_list: List, found_on_disc: bool) -> DownloadResult:
-        if not found_on_disc or self.download_options.process_audio_if_found:
-            correct_codec(temp_target, interval_list=interval_list)
-
-        self.post_process_hook(song, temp_target)
-
-        if not found_on_disc or self.download_options.process_metadata_if_found:
-            write_metadata_to_target(song.metadata, temp_target, song)
-
-        r = DownloadResult()
-
-        target: Target
-        for target in song.target_collection:
-            if temp_target is not target:
-                temp_target.copy_content(target)
-            r.add_target(target)
-
-        temp_target.delete()
-        r.sponsor_segments += len(interval_list)
-
-        return r
-
+    # to download stuff
    def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
        return []

--- a/music_kraken/pages/bandcamp.py
+++ b/music_kraken/pages/bandcamp.py
@@ -51,7 +51,6 @@ class BandcampTypes(Enum):

 class Bandcamp(Page):
    SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP
-    LOGGER = logging_settings["bandcamp_logger"]

    def __init__(self, *args, **kwargs):
        self.connection: Connection = Connection(
@@ -63,8 +62,7 @@ class Bandcamp(Page):
        super().__init__(*args, **kwargs)

    def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
-        parsed_url = urlparse(source.url)
-        path = parsed_url.path.replace("/", "")
+        path = source.parsed_url.path.replace("/", "")

        if path == "" or path.startswith("music"):
            return Artist
--- a/music_kraken/pages/encyclopaedia_metallum.py
+++ b/music_kraken/pages/encyclopaedia_metallum.py
@@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
 from ..connection import Connection
 from ..utils.config import logging_settings
 from .abstract import Page
-from ..utils.enums.source import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.enums.album import AlbumType
 from ..utils.support_classes.query import Query
 from ..objects import (
@@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
            _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
        ],
        source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id)
        ]
    )

@@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
    return Artist(
        name=artist_name,
        source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url)
        ]
    )

@@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
        title=album_name,
        album_type=album_type,
        source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url)
        ],
        artist_list=[
            _artist_from_json(artist_html=artist_html)
@@ -207,7 +207,7 @@ def create_grid(


 class EncyclopaediaMetallum(Page):
-    SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM
+    SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
    LOGGER = logging_settings["metal_archives_logger"]
    
    def __init__(self, **kwargs):
--- a/music_kraken/pages/musify.py
+++ b/music_kraken/pages/musify.py
@@ -502,9 +502,18 @@ class Musify(Page):
        for video_container in video_container_list:
            iframe_list: List[BeautifulSoup] = video_container.findAll("iframe")
            for iframe in iframe_list:
+                """
+                the url could look like this
+                https://www.youtube.com/embed/sNObCkhzOYA?si=dNVgnZMBNVlNb0P_
+                """
+                parsed_url = urlparse(iframe["src"])
+                path_parts = parsed_url.path.strip("/").split("/")
+                if path_parts[0] != "embed" or len(path_parts) < 2:
+                    continue
+                
                source_list.append(Source(
-                    SourceType.YOUTUBE,
-                    iframe["src"],
+                    ALL_SOURCE_TYPES.YOUTUBE,
+                    f"https://music.youtube.com/watch?v={path_parts[1]}",
                    referrer_page=self.SOURCE_TYPE
                ))
        
--- a/music_kraken/pages/youtube.py
+++ b/music_kraken/pages/youtube.py
@@ -41,8 +41,6 @@ class YouTube(SuperYouTube):
    # CHANGE
    SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE

-    NO_ADDITIONAL_DATA_FROM_SONG = False
-
    def __init__(self, *args, **kwargs):
        self.connection: Connection = Connection(
            host=get_invidious_url(),
--- a/music_kraken/pages/youtube_music/youtube_music.py
+++ b/music_kraken/pages/youtube_music/youtube_music.py
@@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path

 from ...utils import dump_to_file

-from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork
 from ..abstract import Page
 from ...objects import (
-    Artist,
+    DatabaseObject as DataObject,
    Source,
-    SourceType,
+    FormattedText,
+    ID3Timestamp,
+    Artwork,
+    Artist,
    Song,
    Album,
    Label,
    Target,
    Lyrics,
-    FormattedText
 )
 from ...connection import Connection
+from ...utils.enums import SourceType, ALL_SOURCE_TYPES
 from ...utils.enums.album import AlbumType
 from ...utils.support_classes.download_result import DownloadResult

@@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = {

 class YoutubeMusic(SuperYouTube):
    # CHANGE
-    SOURCE_TYPE = SourceType.YOUTUBE_MUSIC
-    LOGGER = logging_settings["youtube_music_logger"]
+    SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE

    def __init__(self, *args, ydl_opts: dict = None, **kwargs):
        self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
@@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube):
            default='{}'
        )) or {}

-    def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
+    def get_source_type(self, source: Source) -> Optional[Type[DataObject]]:
        return super().get_source_type(source)

-    def general_search(self, search_query: str) -> List[DatabaseObject]:
+    def general_search(self, search_query: str) -> List[DataObject]:
        search_query = search_query.strip()

        urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))
@@ -619,7 +620,7 @@ class YoutubeMusic(SuperYouTube):
            Artist(
                name=name,
                source_list=[Source(
-                    SourceType.YOUTUBE_MUSIC, 
+                    self.SOURCE_TYPE, 
                    f"https://music.youtube.com/channel/{ydl_res.get('channel_id', ydl_res.get('uploader_id', ''))}"
            )]
        ) for name in artist_names]
@@ -640,7 +641,7 @@ class YoutubeMusic(SuperYouTube):
            artwork=Artwork(*ydl_res.get("thumbnails", [])),
            main_artist_list=artist_list,
            source_list=[Source(
-                SourceType.YOUTUBE_MUSIC,
+                self.SOURCE_TYPE,
                f"https://music.youtube.com/watch?v={ydl_res.get('id')}"
            ), source],
        )
--- a/music_kraken/utils/enums/init.py
+++ b/music_kraken/utils/enums/init.py
@@ -14,10 +14,11 @@ class SourceType:
    page_type: Type[Page] = None
    page: Page = None

+    def register_page(self, page: Page):
+        self.page = page

-    def register_page(self, page_type: Type[Page]):
-        self.page_type = page
-        self.page = page_type()
+    def __hash__(self):
+        return hash(self.name)

    @property
    def has_page(self) -> bool:
Author	SHA1	Message	Date
Lars Noack	80ad2727de	fix: stream retry All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details ci/woodpecker/pull_request_closed/woodpecker Pipeline was successful Details	2024-05-15 17:14:01 +02:00
Lars Noack	19b83ce880	fix: saving streaming progress on retry All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 15:04:00 +02:00
Lars Noack	1bf04439f0	fix: setting the genre of the song All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 14:51:30 +02:00
Lars Noack	bab6aeb45d	fix: removed double linebreaks from formated text, plaintext All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 14:26:19 +02:00
Lars Noack	98afe5047d	fix: wrong creation of source types All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 14:21:15 +02:00
Lars Noack	017752c4d0	feat: better download output All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 14:10:32 +02:00
Lars Noack	ea4c73158e	fix: audio format is replaced completely All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 13:58:44 +02:00
Lars Noack	0096dfe5cb	feat: copying the downloaded music into the final locations All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details ci/woodpecker/pr/woodpecker Pipeline was successful Details	2024-05-15 13:17:36 +02:00
Lars Noack	bedd0fe819	fix: runtime errors All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-05-15 13:16:11 +02:00
Lars Noack	ac6c513d56	draft: post process song All checks were successful ci/woodpecker/push/woodpecker Pipeline was successful Details	2024-05-15 12:30:54 +02:00
Lars Noack	cc14253239	draft: streaming the audio	2024-05-15 12:18:08 +02:00
Lars Noack	14f986a497	draft: rewrote sources	2024-05-15 11:44:39 +02:00