draft: rewriting soure
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
2024-05-14 15:18:17 +02:00
parent bb32fc7647
commit da8887b279
19 changed files with 198 additions and 242 deletions

View File

@@ -15,5 +15,6 @@ class DownloadOptions:
download_all: bool = False
album_type_blacklist: Set[AlbumType] = field(default_factory=lambda: set(AlbumType(a) for a in main_settings["album_type_blacklist"]))
download_again_if_found: bool = False
process_audio_if_found: bool = False
process_metadata_if_found: bool = True

View File

@@ -1,17 +1,29 @@
from typing import Tuple, Type, Dict, Set, Optional, List
from collections import defaultdict
from pathlib import Path
import re
from . import FetchOptions, DownloadOptions
from .results import SearchResults
from ..objects import DatabaseObject as DataObject, Source, Album, Song, Artist, Label
from ..objects import (
DatabaseObject as DataObject,
Collection,
Target,
Source,
Options,
Song,
Album,
Artist,
Label,
)
from ..utils.string_processing import fit_to_file_system
from ..utils.config import youtube_settings
from ..utils.enums.source import SourcePages
from ..utils.config import youtube_settings, main_settings
from ..utils.path_manager import LOCATIONS
from ..utils.enums import SourceType
from ..utils.support_classes.download_result import DownloadResult
from ..utils.support_classes.query import Query
from ..utils.support_classes.download_result import DownloadResult
from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES
@@ -61,7 +73,7 @@ class Pages:
# initialize all page instances
self._page_instances: Dict[Type[Page], Page] = dict()
self._source_to_page: Dict[SourcePages, Type[Page]] = dict()
self._source_to_page: Dict[SourceType, Type[Page]] = dict()
exclude_pages = exclude_pages if exclude_pages is not None else set()
@@ -84,7 +96,7 @@ class Pages:
self._page_instances[page_type] = page_type(fetch_options=self.fetch_options, download_options=self.download_options)
self._source_to_page[page_type.SOURCE_TYPE] = page_type
def _get_page_from_enum(self, source_page: SourcePages) -> Page:
def _get_page_from_enum(self, source_page: SourceType) -> Page:
if source_page not in self._source_to_page:
return None
return self._page_instances[self._source_to_page[source_page]]
@@ -113,7 +125,7 @@ class Pages:
return data_object
def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]:
page: Page = self._get_page_from_enum(source.page_enum)
page: Page = self._get_page_from_enum(source.source_type)
if page is None:
return None
@@ -129,7 +141,7 @@ class Pages:
return data_object
def fetch_from_url(self, url: str) -> Optional[DataObject]:
source = Source.match_url(url, SourcePages.MANUAL)
source = Source.match_url(url, SourceType.MANUAL)
if source is None:
return None
@@ -198,13 +210,39 @@ class Pages:
return download_result
def _extract_fields_from_template(self, path_template: str) -> Set[str]:
return set(re.findall(r"{([^}]+)}", path_template))
def _parse_path_template(self, path_template: str, naming: Dict[str, List[str]]) -> str:
field_names: Set[str] = self._extract_fields_from_template(path_template)
for field in field_names:
if len(naming[field]) == 0:
raise MKMissingNameException(f"Missing field for {field}.")
path_template = path_template.replace(f"{{{field}}}", naming[field][0])
return possible_parts
def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]:
pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()]
pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True)
return list(pages)
def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
"""
TODO
Search the song in the file system.
"""
r = DownloadResult(total=1)
# pre process the data recursively
song.compile()
# manage the naming
naming: Dict[str, List[str]] = defaultdict(list, naming)
naming["song"].append(song.title_string)
naming["genre"].append(song.genre)
naming["isrc"].append(song.isrc)
naming["album"].extend(a.title_string for a in song.album_collection)
naming["album_type"].extend(a.album_type.value for a in song.album_collection)
@@ -216,21 +254,41 @@ class Pages:
for key, value in naming.items():
# https://stackoverflow.com/a/17016257
naming[key] = list(dict.fromkeys(items))
naming[key] = [fit_to_file_system(i) for i in naming[key] if i is not None]
# get every possible path
path_format = Path(main_settings["download_path"], main_settings["download_file"])
for part in path_format.parts:
pass
# manage the targets
tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
found_on_disc = False
return DownloadOptions()
song.target_collection.append(Target(
relative_to_music_dir=True,
file_path=Path(
self._parse_path_template(main_settings["download_path"], naming=naming),
self._parse_path_template(main_settings["download_file"], naming=naming),
)
))
for target in song.target_collection:
if target.exists():
output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
found_on_disc = True
r.found_on_disk += 1
target.copy_content(tmp)
else:
target.create_parent_directories()
output(f'- {target.file_path}', color=BColors.GREY)
# actually download
for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"):
r = page.download_song_to_target(song, tmp, r)
return r
def fetch_url(self, url: str, stop_at_level: int = 2) -> Tuple[Type[Page], DataObject]:
source = Source.match_url(url, SourcePages.MANUAL)
source = Source.match_url(url, SourceType.MANUAL)
if source is None:
raise UrlNotFoundException(url=url)
_actual_page = self._source_to_page[source.page_enum]
_actual_page = self._source_to_page[source.source_type]
return _actual_page, self._page_instances[_actual_page].fetch_object_from_source(source=source, stop_at_level=stop_at_level)

View File

@@ -2,7 +2,7 @@ from typing import Tuple, Type, Dict, List, Generator, Union
from dataclasses import dataclass
from ..objects import DatabaseObject
from ..utils.enums.source import SourcePages
from ..utils.enums.source import SourceType
from ..pages import Page, EncyclopaediaMetallum, Musify