implemented stream

This commit is contained in:
Hellow2 2023-03-31 09:47:03 +02:00
parent 5067b84f48
commit c93c469576
6 changed files with 51 additions and 18 deletions

View File

@ -332,7 +332,7 @@ if __name__ == "__main__":
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(os.path.join(temp_dir, LOG_FILE)),
logging.FileHandler(os.path.join(TEMP_DIR, LOG_FILE)),
logging.StreamHandler()
]
)

View File

@ -141,6 +141,9 @@ class ID3Timestamp:
second=second
)
def __hash__(self):
return self.date_obj.__hash__()
def __lt__(self, other):
return self.date_obj < other.date_obj

View File

@ -1,6 +1,8 @@
from typing import Optional, List, Tuple
from pathlib import Path
from collections import defaultdict
import requests
# from tqdm import tqdm
from ..utils import shared
from .parents import DatabaseObject
@ -60,3 +62,26 @@ class Target(DatabaseObject):
copy_to.create_path()
with open(self.file_path, "wb") as write_to:
write_to.write(read_from.read())
def stream_into(self, r: requests.Response):
self.create_path()
chunk_size = 1024
total_size = int(r.headers.get('content-length'))
initial_pos = 0
with open(self.file_path,'wb') as f:
for chunk in r.iter_content(chunk_size=chunk_size):
size = f.write(chunk)
"""
# doesn't work yet due to
# https://github.com/tqdm/tqdm/issues/261
with open(self.file_path,'wb') as f, \
tqdm(desc=self._file, total=total_size, unit='iB', unit_scale=True, unit_divisor=chunk_size) as pbar:
for chunk in r.iter_content(chunk_size=chunk_size):
size = f.write(chunk)
pbar.update(size)
"""

View File

@ -1,3 +1,4 @@
import random
from typing import Optional, Union, Type, Dict, List
from bs4 import BeautifulSoup
import requests
@ -35,17 +36,20 @@ class Page:
SOURCE_TYPE: SourcePages
@classmethod
def get_request(cls, url: str, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
def get_request(cls, url: str, stream: bool = False, accepted_response_codes: set = set((200,)), trie: int = 0) -> Optional[
requests.Response]:
retry = False
try:
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT)
r = cls.API_SESSION.get(url, timeout=cls.TIMEOUT, stream=stream)
except requests.exceptions.Timeout:
retry = True
except requests.exceptions.ConnectionError:
retry = True
if not retry and r.status_code in accepted_response_codes:
return r
if not retry:
LOGGER.warning(f"{cls.__name__} responded wit {r.status_code} at GET:{url}. ({trie}-{cls.TRIES})")
LOGGER.debug(r.content)
@ -330,7 +334,12 @@ class Page:
if len(sources) == 0:
return
temp_target = cls._download_song_to_targets(source=sources[0], target_list=song.target_collection.shallow_list)
temp_target: Target = Target(
path=shared.TEMP_DIR,
file=str(random.randint(0, 999999))
)
cls._download_song_to_targets(source=sources[0], target=temp_target)
cls._post_process_targets(song, temp_target)
@classmethod

View File

@ -897,7 +897,7 @@ class Musify(Page):
return None
@classmethod
def _download_song_to_targets(cls, source: Source) -> Path:
def _download_song_to_targets(cls, source: Source, target: Target) -> Path:
"""
https://musify.club/track/im-in-a-coffin-life-never-was-waste-of-skin-16360302
https://musify.club/track/dl/16360302/im-in-a-coffin-life-never-was-waste-of-skin.mp3
@ -906,12 +906,8 @@ class Musify(Page):
if url.source_type != MusifyTypes.SONG:
return
target: Target = Target(
path=TEMP_FOLDER,
file=str(random.randint(0, 999999))
)
endpoint = f"https://musify.club/track/dl/{url.musify_id}/{url.name_without_id}.mp3"
print(endpoint)
return target
target.stream_into(cls.get_request(endpoint, stream=True))

View File

@ -11,18 +11,18 @@ LOG_FILE = "download_logs.log"
TEMP_DATABASE_FILE = "metadata.db"
DATABASE_STRUCTURE_FILE = "database_structure.sql"
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql"
temp_dir = os.path.join(tempfile.gettempdir(), TEMP_FOLDER)
if not os.path.exists(temp_dir):
os.mkdir(temp_dir)
TEMP_DIR = os.path.join(tempfile.gettempdir(), TEMP_FOLDER)
if not os.path.exists(TEMP_DIR):
os.mkdir(TEMP_DIR)
TEMP_DATABASE_PATH = os.path.join(temp_dir, TEMP_DATABASE_FILE)
TEMP_DATABASE_PATH = os.path.join(TEMP_DIR, TEMP_DATABASE_FILE)
# configure logger default
logging.basicConfig(
level=logging.INFO,
format=logging.BASIC_FORMAT,
handlers=[
logging.FileHandler(os.path.join(temp_dir, LOG_FILE)),
logging.FileHandler(os.path.join(TEMP_DIR, LOG_FILE)),
logging.StreamHandler()
]
)