From c93c469576d33564cf11dbf9ac310e39a5c97093 Mon Sep 17 00:00:00 2001 From: Hellow2 Date: Fri, 31 Mar 2023 09:47:03 +0200 Subject: [PATCH] implemented stream --- .../metadata/metadata_fetch.py | 2 +- src/music_kraken/objects/metadata.py | 3 +++ src/music_kraken/objects/target.py | 25 +++++++++++++++++++ src/music_kraken/pages/abstract.py | 19 ++++++++++---- src/music_kraken/pages/musify.py | 10 +++----- src/music_kraken/utils/shared.py | 10 ++++---- 6 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/music_kraken/not_used_anymore/metadata/metadata_fetch.py b/src/music_kraken/not_used_anymore/metadata/metadata_fetch.py index 6ac2c90..7f71c86 100644 --- a/src/music_kraken/not_used_anymore/metadata/metadata_fetch.py +++ b/src/music_kraken/not_used_anymore/metadata/metadata_fetch.py @@ -332,7 +332,7 @@ if __name__ == "__main__": level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ - logging.FileHandler(os.path.join(temp_dir, LOG_FILE)), + logging.FileHandler(os.path.join(TEMP_DIR, LOG_FILE)), logging.StreamHandler() ] ) diff --git a/src/music_kraken/objects/metadata.py b/src/music_kraken/objects/metadata.py index 0a41832..3d98d35 100644 --- a/src/music_kraken/objects/metadata.py +++ b/src/music_kraken/objects/metadata.py @@ -140,6 +140,9 @@ class ID3Timestamp: minute=minute, second=second ) + + def __hash__(self): + return self.date_obj.__hash__() def __lt__(self, other): return self.date_obj < other.date_obj diff --git a/src/music_kraken/objects/target.py b/src/music_kraken/objects/target.py index 1d03a3b..76734ab 100644 --- a/src/music_kraken/objects/target.py +++ b/src/music_kraken/objects/target.py @@ -1,6 +1,8 @@ from typing import Optional, List, Tuple from pathlib import Path from collections import defaultdict +import requests +# from tqdm import tqdm from ..utils import shared from .parents import DatabaseObject @@ -60,3 +62,26 @@ class Target(DatabaseObject): copy_to.create_path() with open(self.file_path, "wb") as write_to: write_to.write(read_from.read()) + + def stream_into(self, r: requests.Response): + self.create_path() + + chunk_size = 1024 + total_size = int(r.headers.get('content-length')) + initial_pos = 0 + + with open(self.file_path,'wb') as f: + for chunk in r.iter_content(chunk_size=chunk_size): + size = f.write(chunk) + + """ + # doesn't work yet due to + # https://github.com/tqdm/tqdm/issues/261 + + + with open(self.file_path,'wb') as f, \ + tqdm(desc=self._file, total=total_size, unit='iB', unit_scale=True, unit_divisor=chunk_size) as pbar: + for chunk in r.iter_content(chunk_size=chunk_size): + size = f.write(chunk) + pbar.update(size) + """ diff --git a/src/music_kraken/pages/abstract.py b/src/music_kraken/pages/abstract.py index c0f74b1..b0a538a 100644 --- a/src/music_kraken/pages/abstract.py +++ b/src/music_kraken/pages/abstract.py @@ -1,3 +1,4 @@ +import random from typing import Optional, Union, Type, Dict, List from bs4 import BeautifulSoup import requests @@ -35,19 +36,22 @@ class Page: SOURCE_TYPE: SourcePages @classmethod - def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ + def get_request(cls, url: str, stream: bool = False, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[ requests.Response]: retry = False try: - r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT) + r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT, stream=stream) except requests.exceptions.Timeout: retry = True + except requests.exceptions.ConnectionError: + retry = True if not retry and r.status_code in accepted_response_codes: return r - LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})") - LOGGER.debug(r.content) + if not retry: + LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})") + LOGGER.debug(r.content) if trie >= cls.TRIES: LOGGER.warning("to many tries. Aborting.") @@ -330,7 +334,12 @@ class Page: if len(sources) == 0: return - temp_target = cls._download_song_to_targets(source=sources[0], target_list=song.target_collection.shallow_list) + temp_target: Target = Target( + path=shared.TEMP_DIR, + file=str(random.randint(0, 999999)) + ) + + cls._download_song_to_targets(source=sources[0], target=temp_target) cls._post_process_targets(song, temp_target) @classmethod diff --git a/src/music_kraken/pages/musify.py b/src/music_kraken/pages/musify.py index 1a25629..d3520ac 100644 --- a/src/music_kraken/pages/musify.py +++ b/src/music_kraken/pages/musify.py @@ -897,7 +897,7 @@ class Musify(Page): return None @classmethod - def _download_song_to_targets(cls, source: Source) -> Path: + def _download_song_to_targets(cls, source: Source, target: Target) -> Path: """ https://musify.club/track/im-in-a-coffin-life-never-was-waste-of-skin-16360302 https://musify.club/track/dl/16360302/im-in-a-coffin-life-never-was-waste-of-skin.mp3 @@ -906,12 +906,8 @@ class Musify(Page): if url.source_type != MusifyTypes.SONG: return - target: Target = Target( - path=TEMP_FOLDER, - file=str(random.randint(0, 999999)) - ) + endpoint = f"https://musify.club/track/dl/{url.musify_id}/{url.name_without_id}.mp3" print(endpoint) - - return target + target.stream_into(cls.get_request(endpoint, stream=True)) diff --git a/src/music_kraken/utils/shared.py b/src/music_kraken/utils/shared.py index 7c0b37c..f8ea941 100644 --- a/src/music_kraken/utils/shared.py +++ b/src/music_kraken/utils/shared.py @@ -11,18 +11,18 @@ LOG_FILE = "download_logs.log" TEMP_DATABASE_FILE = "metadata.db" DATABASE_STRUCTURE_FILE = "database_structure.sql" DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql" -temp_dir = os.path.join(tempfile.gettempdir(), TEMP_FOLDER) -if not os.path.exists(temp_dir): - os.mkdir(temp_dir) +TEMP_DIR = os.path.join(tempfile.gettempdir(), TEMP_FOLDER) +if not os.path.exists(TEMP_DIR): + os.mkdir(TEMP_DIR) -TEMP_DATABASE_PATH = os.path.join(temp_dir, TEMP_DATABASE_FILE) +TEMP_DATABASE_PATH = os.path.join(TEMP_DIR, TEMP_DATABASE_FILE) # configure logger default logging.basicConfig( level=logging.INFO, format=logging.BASIC_FORMAT, handlers=[ - logging.FileHandler(os.path.join(temp_dir, LOG_FILE)), + logging.FileHandler(os.path.join(TEMP_DIR, LOG_FILE)), logging.StreamHandler() ] )