draft: rewriting soure

2024-05-14 15:18:17 +02:00
parent bb32fc7647
commit da8887b279
19 changed files with 198 additions and 242 deletions
--- a/music_kraken/download/init.py
+++ b/music_kraken/download/init.py
@@ -15,5 +15,6 @@ class DownloadOptions:
    download_all: bool = False
    album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))

+    download_again_if_found: bool = False
    process_audio_if_found: bool = False
    process_metadata_if_found: bool = True
--- a/music_kraken/download/page_attributes.py
+++ b/music_kraken/download/page_attributes.py
@@ -1,17 +1,29 @@
 from typing import Tuple, Type, Dict, Set, Optional, List
 from collections import defaultdict
 from pathlib import Path
+import re

 from . import FetchOptions, DownloadOptions
 from .results import SearchResults
-from ..objects import DatabaseObject as DataObject, Source, Album, Song, Artist, Label
-
+from ..objects import (
+    DatabaseObject as DataObject,
+    Collection,
+    Target,
+    Source,
+    Options,
+    Song,
+    Album,
+    Artist,
+    Label,
+)
 from ..utils.string_processing import fit_to_file_system
-from ..utils.config import youtube_settings
-from ..utils.enums.source import SourcePages
+from ..utils.config import youtube_settings, main_settings
+from ..utils.path_manager import LOCATIONS
+from ..utils.enums import SourceType
 from ..utils.support_classes.download_result import DownloadResult
 from ..utils.support_classes.query import Query
 from ..utils.support_classes.download_result import DownloadResult
+from ..utils.exception import MKMissingNameException
 from ..utils.exception.download import UrlNotFoundException
 from ..utils.shared import DEBUG_PAGES

@@ -61,7 +73,7 @@ class Pages:

        # initialize all page instances
        self._page_instances: Dict[Type[Page], Page] = dict()
-        self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
+        self._source_to_page: Dict[SourceType, Type[Page]] = dict()
        
        exclude_pages = exclude_pages if exclude_pages is not None else set()
        
@@ -84,7 +96,7 @@ class Pages:
            self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
            self._source_to_page[page_type.SOURCE_TYPE] = page_type

-    def _get_page_from_enum(self, source_page: SourcePages) -> Page:
+    def _get_page_from_enum(self, source_page: SourceType) -> Page:
        if source_page not in self._source_to_page:
            return None
        return self._page_instances[self._source_to_page[source_page]]
@@ -113,7 +125,7 @@ class Pages:
        return data_object

    def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
-        page: Page = self._get_page_from_enum(source.page_enum)
+        page: Page = self._get_page_from_enum(source.source_type)
        if page is None:
            return None
        
@@ -129,7 +141,7 @@ class Pages:
        return data_object

    def fetch_from_url(self, url: str) -> Optional[DataObject]:
-        source = Source.match_url(url, SourcePages.MANUAL)
+        source = Source.match_url(url, SourceType.MANUAL)
        if source is None:
            return None
        
@@ -198,13 +210,39 @@ class Pages:

        return download_result

+    def _extract_fields_from_template(self, path_template: str) -> Set[str]:
+        return set(re.findall(r"{([^}]+)}", path_template))
+
+    def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str:
+        field_names: Set[str] = self._extract_fields_from_template(path_template)
+        
+        for field in field_names:
+            if len(naming[field]) == 0:
+                raise MKMissingNameException(f"Missing field for {field}.")
+
+            path_template = path_template.replace(f"{{{field}}}", naming[field][0])
+
+        return possible_parts
+
+    def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]:
+        pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()]
+        pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True)
+        return list(pages)
+
    def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
+        """
+        TODO
+        Search the song in the file system.
+        """
+        r = DownloadResult(total=1)
+        
        # pre process the data recursively
        song.compile()
        
        # manage the naming
        naming: Dict[str, List[str]] = defaultdict(list, naming)
        naming["song"].append(song.title_string)
+        naming["genre"].append(song.genre)
        naming["isrc"].append(song.isrc)
        naming["album"].extend(a.title_string for a in song.album_collection)
        naming["album_type"].extend(a.album_type.value for a in song.album_collection)
@@ -216,21 +254,41 @@ class Pages:
        for key, value in naming.items():
            # https://stackoverflow.com/a/17016257
            naming[key] = list(dict.fromkeys(items))
-            naming[key] = [fit_to_file_system(i) for i in naming[key] if i is not None]

-        # get every possible path
-        path_format = Path(main_settings["download_path"], main_settings["download_file"])
-        for part in path_format.parts:
-            pass
+        # manage the targets
+        tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
+        found_on_disc = False

-        return DownloadOptions()
+        song.target_collection.append(Target(
+            relative_to_music_dir=True,
+            file_path=Path(
+                self._parse_path_template(main_settings["download_path"], naming=naming), 
+                self._parse_path_template(main_settings["download_file"], naming=naming),
+            )
+        ))
+        for target in song.target_collection:
+            if target.exists():
+                output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
+
+                found_on_disc = True
+                r.found_on_disk += 1
+                target.copy_content(tmp)
+            else:
+                target.create_parent_directories()
+                output(f'- {target.file_path}', color=BColors.GREY)
+
+        # actually download
+        for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"):
+            r = page.download_song_to_target(song, tmp, r)
+
+        return r

    def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
-        source = Source.match_url(url, SourcePages.MANUAL)
+        source = Source.match_url(url, SourceType.MANUAL)
        
        if source is None:
            raise UrlNotFoundException(url=url)
        
-        _actual_page = self._source_to_page[source.page_enum]
+        _actual_page = self._source_to_page[source.source_type]
        
        return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)
--- a/music_kraken/download/results.py
+++ b/music_kraken/download/results.py
@@ -2,7 +2,7 @@ from typing import Tuple, Type, Dict, List, Generator, Union
 from dataclasses import dataclass

 from ..objects import DatabaseObject
-from ..utils.enums.source import SourcePages
+from ..utils.enums.source import SourceType
 from ..pages import Page, EncyclopaediaMetallum, Musify