From cc142532394a64d8556074a2bcf506b77628167d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 15 May 2024 12:18:08 +0200 Subject: [PATCH] draft: streaming the audio --- music_kraken/download/page_attributes.py | 50 +++++++++++++++--------- music_kraken/objects/source.py | 18 ++++++++- music_kraken/pages/bandcamp.py | 1 - 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/music_kraken/download/page_attributes.py b/music_kraken/download/page_attributes.py index 6cef729..2a4297d 100644 --- a/music_kraken/download/page_attributes.py +++ b/music_kraken/download/page_attributes.py @@ -16,6 +16,7 @@ from ..objects import ( Artist, Label, ) +from ..audio import write_metadata_to_target, correct_codec from ..utils.string_processing import fit_to_file_system from ..utils.config import youtube_settings, main_settings from ..utils.path_manager import LOCATIONS @@ -125,15 +126,10 @@ class Pages: return data_object def fetch_from_source(self, source: Source, **kwargs) -> Optional[DataObject]: - page: Page = self._get_page_from_enum(source.source_type) - if page is None: + if not source.has_page: return None - # getting the appropriate function for the page and the object type - source_type = page.get_source_type(source) - if not hasattr(page, fetch_map[source_type]): - return None - func = getattr(page, fetch_map[source_type])(source=source, **kwargs) + func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs) # fetching the data object and marking it as fetched data_object: DataObject = func(source=source) @@ -215,11 +211,6 @@ class Pages: return possible_parts - def _get_pages_with_source(self, data_object: DataObject, sort_by_attribute: str = "DOWNLOAD_PRIORITY") -> List[Page]: - pages = [self._get_page_from_enum(s.source_type) for s in data_object.source_collection.get_sources()] - pages.sort(key=lambda p: getattr(p, sort_by_attribute), reverse=True) - return list(pages) - def _download_song(self, song: Song, naming: dict) -> DownloadOptions: """ TODO @@ -248,7 +239,6 @@ class Pages: # manage the targets tmp: Target = Target.temp(file_extension=main_settings["audio_format"]) - found_on_disc = False song.target_collection.append(Target( relative_to_music_dir=True, @@ -260,17 +250,39 @@ class Pages: for target in song.target_collection: if target.exists(): output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY) - - found_on_disc = True r.found_on_disk += 1 - target.copy_content(tmp) + + if self.download_options.download_again_if_found: + target.copy_content(tmp) else: target.create_parent_directories() output(f'- {target.file_path}', color=BColors.GREY) - # actually download - for page in self._get_pages_with_source(song, sort_by_attribute="DOWNLOAD_PRIORITY"): - r = page.download_song_to_target(song, tmp, r) + # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source + used_source: Optional[Source] = None + skip_intervals: List[Tuple[float, float]] = [] + for source in song.source_collection.get_sources(source_type_sorting={ + "only_with_page": True, + "sort_key": lambda page: page.download_priority, + "reverse": True, + }): + if tmp.exists: + break + + used_source = source + streaming_results = source.page.download_song_to_target(source=source, target=tmp, desc="download") + skip_intervals = source.page.get_skip_intervals(song=song, source=source) + + # if something has been downloaded but it somehow failed, delete the file + if streaming_results.is_fatal_error and tmp.exists: + tmp.delete() + + # if everything went right, the file should exist now + if not tmp.exists: + r.error_message = f"The song {song.option_string} couldn't be downloaded." + return r + + # post process the audio return r diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 6258468..3d0b492 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -2,7 +2,19 @@ from __future__ import annotations from collections import defaultdict from enum import Enum -from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator, TypedDict, Callable, Any +from typing import ( + List, + Dict, + Set, + Tuple, + Optional, + Iterable, + Generator, + TypedDict, + Callable, + Any, + TYPE_CHECKING +) from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property @@ -15,6 +27,8 @@ from ..utils.string_processing import hash_url, shorten_display_url from .metadata import Mapping, Metadata from .parents import OuterProxy from .collection import Collection +if TYPE_CHECKING: + from ..pages.abstract import Page @@ -78,7 +92,7 @@ class Source: return self.source_type.page is not None @property - def page(self) -> OuterProxy: + def page(self) -> Page: return self.source_type.page @property diff --git a/music_kraken/pages/bandcamp.py b/music_kraken/pages/bandcamp.py index 44bc6a1..30dbbb0 100644 --- a/music_kraken/pages/bandcamp.py +++ b/music_kraken/pages/bandcamp.py @@ -51,7 +51,6 @@ class BandcampTypes(Enum): class Bandcamp(Page): SOURCE_TYPE = ALL_SOURCE_TYPES.BANDCAMP - LOGGER = logging_settings["bandcamp_logger"] def __init__(self, *args, **kwargs): self.connection: Connection = Connection(