Merge remote-tracking branch 'origin/source/bandcamp' into source/bandcamp

This commit is contained in:
Hellow 2024-01-22 18:36:26 +01:00
commit 3f14f933c0
13 changed files with 371 additions and 76 deletions

View File

@ -2,7 +2,5 @@
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$/../forks/sponsorblock.py" vcs="Git" />
<mapping directory="$PROJECT_DIR$/../rythmbox-id3-lyrics-support" vcs="Git" />
</component>
</project>

View File

@ -1,4 +1,4 @@
requests~=2.28.1
requests~=2.31.0
mutagen~=1.46.0
musicbrainzngs~=0.7.1
jellyfish~=0.9.0
@ -7,7 +7,7 @@ pycountry~=22.3.5
python-dateutil~=2.8.2
pandoc~=2.3
SQLAlchemy~=2.0.7
setuptools~=60.2.0
setuptools~=68.2.0
tqdm~=4.65.0
ffmpeg-python~=0.2.0
platformdirs~=3.2.0
@ -18,3 +18,7 @@ pyffmpeg~=2.4.2.18
ffmpeg-progress-yield~=0.7.8
pathvalidate~=2.5.2
guppy3~=3.1.3
toml~=0.10.2
typing_extensions~=4.7.1
responses~=0.24.1

View File

@ -4,12 +4,13 @@ import sys
from .utils.shared import DEBUG, DEBUG_LOGGING
from .utils.config import logging_settings, main_settings, read_config
read_config()
from . import cli
if DEBUG:
import sys
sys.setrecursionlimit(100)

View File

@ -79,6 +79,18 @@ def cli():
action="store_true"
)
parser.add_argument(
"--clear-cache",
help="Deletes the cache.",
action="store_true"
)
parser.add_argument(
"--clean-cache",
help="Deletes the outdated cache. (all expired cached files, and not indexed files)",
action="store_true"
)
arguments = parser.parse_args()
if arguments.verbose or arguments.test:
@ -112,6 +124,12 @@ def cli():
if arguments.frontend:
cli.set_frontend(silent=False)
if arguments.clear_cache:
cli.clear_cache()
if arguments.clean_cache:
cli.clean_cache()
# getting the genre
genre: str = arguments.genre
if arguments.test:

View File

@ -0,0 +1,21 @@
from logging import getLogger
from ...connection.cache import Cache
def clear_cache():
"""
Deletes the cache.
:return:
"""
Cache("main", getLogger("cache")).clear()
def clean_cache():
"""
Deletes the outdated cache. (all expired cached files, and not indexed files)
:return:
"""
Cache("main", getLogger("cache")).clean()

View File

@ -0,0 +1,193 @@
import json
from pathlib import Path
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import List, Optional
from functools import lru_cache
import logging
from ..utils.config import main_settings
@dataclass
class CacheAttribute:
module: str
name: str
created: datetime
expires: datetime
@property
def id(self):
return f"{self.module}_{self.name}"
@property
def is_valid(self):
return datetime.now() < self.expires
def __eq__(self, other):
return self.__dict__ == other.__dict__
class Cache:
def __init__(self, module: str, logger: logging.Logger):
self.module = module
self.logger: logging.Logger = logger
self._dir = main_settings["cache_directory"]
self.index = Path(self._dir, "index.json")
if not self.index.is_file():
with self.index.open("w") as i:
i.write(json.dumps([]))
self.cached_attributes: List[CacheAttribute] = []
self._id_to_attribute = {}
self._time_fields = {"created", "expires"}
with self.index.open("r") as i:
for c in json.loads(i.read()):
for key in self._time_fields:
c[key] = datetime.fromisoformat(c[key])
ca = CacheAttribute(**c)
self.cached_attributes.append(ca)
self._id_to_attribute[ca.id] = ca
@lru_cache()
def _init_module(self, module: str) -> Path:
"""
:param module:
:return: the module path
"""
r = Path(self._dir, module)
r.mkdir(exist_ok=True)
return r
def _write_index(self, indent: int = 4):
_json = []
for c in self.cached_attributes:
d = c.__dict__
for key in self._time_fields:
d[key] = d[key].isoformat()
_json.append(d)
with self.index.open("w") as f:
f.write(json.dumps(_json, indent=indent))
def _write_attribute(self, cached_attribute: CacheAttribute, write: bool = True) -> bool:
existing_attribute: Optional[CacheAttribute] = self._id_to_attribute.get(cached_attribute.id)
if existing_attribute is not None:
# the attribute exists
if existing_attribute == cached_attribute:
return True
if existing_attribute.is_valid:
return False
existing_attribute.__dict__ = cached_attribute.__dict__
else:
self.cached_attributes.append(cached_attribute)
self._id_to_attribute[cached_attribute.id] = cached_attribute
if write:
self._write_index()
return True
def set(self, content: bytes, name: str, expires_in: float = 10):
"""
:param content:
:param module:
:param name:
:param expires_in: the unit is days
:return:
"""
if name == "":
return
module_path = self._init_module(self.module)
cache_attribute = CacheAttribute(
module=self.module,
name=name,
created=datetime.now(),
expires=datetime.now() + timedelta(days=expires_in),
)
self._write_attribute(cache_attribute)
cache_path = Path(module_path, name)
with cache_path.open("wb") as content_file:
self.logger.debug(f"writing cache to {cache_path}")
content_file.write(content)
def get(self, name: str) -> Optional[bytes]:
path = Path(self._dir, self.module, name)
if not path.is_file():
return None
# check if it is outdated
existing_attribute: CacheAttribute = self._id_to_attribute[f"{self.module}_{name}"]
if not existing_attribute.is_valid:
return
with path.open("rb") as f:
return f.read()
def clean(self):
keep = set()
for ca in self.cached_attributes.copy():
file = Path(self._dir, ca.module, ca.name)
if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}")
file.unlink()
self.cached_attributes.remove(ca)
del self._id_to_attribute[ca.id]
else:
keep.add(file)
# iterate through every module (folder)
for module_path in self._dir.iterdir():
if not module_path.is_dir():
continue
# delete all files not in keep
for path in module_path.iterdir():
if path not in keep:
self.logger.info(f"Deleting cache {path}")
path.unlink()
# delete all empty directories
for path in module_path.iterdir():
if path.is_dir() and not list(path.iterdir()):
self.logger.debug(f"Deleting cache directory {path}")
path.rmdir()
self._write_index()
def clear(self):
"""
delete every file in the cache directory
:return:
"""
for path in self._dir.iterdir():
if path.is_dir():
for file in path.iterdir():
file.unlink()
path.rmdir()
else:
path.unlink()
self.cached_attributes.clear()
self._id_to_attribute.clear()
self._write_index()
def __repr__(self):
return f"<Cache {self.module}>"

View File

@ -1,16 +1,18 @@
import time
from typing import List, Dict, Callable, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult
import logging
import threading
import time
from typing import List, Dict, Optional, Set
from urllib.parse import urlparse, urlunsplit, ParseResult
import requests
import responses
from tqdm import tqdm
from .cache import Cache
from .rotating import RotatingProxy
from ..objects import Target
from ..utils.config import main_settings
from ..utils.support_classes.download_result import DownloadResult
from ..objects import Target
class Connection:
@ -25,13 +27,18 @@ class Connection:
accepted_response_codes: Set[int] = None,
semantic_not_found: bool = True,
sleep_after_404: float = 0.0,
heartbeat_interval = 0,
heartbeat_interval=0,
module: str = "general",
cache_expiring_duration: float = 10
):
if proxies is None:
proxies = main_settings["proxies"]
if header_values is None:
header_values = dict()
self.cache: Cache = Cache(module=module, logger=logger)
self.cache_expiring_duration = cache_expiring_duration
self.HEADER_VALUES = header_values
self.LOGGER = logger
@ -55,23 +62,24 @@ class Connection:
@property
def user_agent(self) -> str:
return self.session.headers.get("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36")
return self.session.headers.get("user-agent",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36")
def start_heartbeat(self):
if self.heartbeat_interval <= 0:
self.LOGGER.warning(f"Can't start a heartbeat with {self.heartbeat_interval}s in between.")
self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval, ), daemon=True)
self.heartbeat_thread = threading.Thread(target=self._heartbeat_loop, args=(self.heartbeat_interval,),
daemon=True)
self.heartbeat_thread.start()
def heartbeat_failed(self):
self.LOGGER.warning(f"I just died... (The heartbeat failed)")
def heartbeat(self):
# Your code to send heartbeat requests goes here
print("the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()")
print(
"the hearth is beating, but it needs to be implemented ;-;\nFuck youuuu for setting heartbeat in the constructor to true, but not implementing the method Connection.hearbeat()")
def _heartbeat_loop(self, interval: float):
def heartbeat_wrapper():
@ -85,8 +93,6 @@ class Connection:
heartbeat_wrapper()
time.sleep(interval)
def base_url(self, url: ParseResult = None):
if url is None:
url = self.HOST
@ -119,9 +125,12 @@ class Connection:
return headers
def _request(
def save(self, r: requests.Response, name: str, **kwargs):
self.cache.set(r.content, name, expires_in=kwargs.get("expires_in", self.cache_expiring_duration))
def request(
self,
request: Callable,
method: str,
try_count: int,
accepted_response_codes: set,
url: str,
@ -131,8 +140,20 @@ class Connection:
raw_url: bool = False,
sleep_after_404: float = None,
is_heartbeat: bool = False,
name: str = "",
**kwargs
) -> Optional[requests.Response]:
if name != "":
cached = self.cache.get(name)
with responses.RequestsMock() as resp:
resp.add(
method=method,
url=url,
body=cached,
)
return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
if sleep_after_404 is None:
sleep_after_404 = self.sleep_after_404
if try_count >= self.TRIES:
@ -158,9 +179,10 @@ class Connection:
while self.session_is_occupied and not is_heartbeat:
pass
r: requests.Response = request(request_url, timeout=timeout, headers=headers, **kwargs)
r: requests.Response = requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
if r.status_code in accepted_response_codes:
self.save(r, name, **kwargs)
return r
if self.SEMANTIC_NOT_FOUND and r.status_code == 404:
@ -187,15 +209,16 @@ class Connection:
if self.heartbeat_interval > 0 and self.heartbeat_thread is None:
self.start_heartbeat()
return self._request(
request=request,
try_count=try_count+1,
return self.request(
method=method,
try_count=try_count + 1,
accepted_response_codes=accepted_response_codes,
url=url,
timeout=timeout,
headers=headers,
sleep_after_404=sleep_after_404,
is_heartbeat=is_heartbeat,
name=name,
**kwargs
)
@ -213,8 +236,8 @@ class Connection:
if accepted_response_codes is None:
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES
r = self._request(
request=self.session.get,
r = self.request(
method="GET",
try_count=0,
accepted_response_codes=accepted_response_codes,
url=url,
@ -241,8 +264,8 @@ class Connection:
raw_url: bool = False,
**kwargs
) -> Optional[requests.Response]:
r = self._request(
request=self.session.post,
r = self.request(
method="POST",
try_count=0,
accepted_response_codes=accepted_response_codes or self.ACCEPTED_RESPONSE_CODES,
url=url,
@ -282,9 +305,9 @@ class Connection:
if accepted_response_codes is None:
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES
r = self._request(
request=self.session.get,
r = self.request(
method="GET",
try_count=0,
accepted_response_codes=accepted_response_codes,
url=url,
@ -310,8 +333,9 @@ class Connection:
https://en.wikipedia.org/wiki/Kilobyte
> The internationally recommended unit symbol for the kilobyte is kB.
"""
with tqdm(total=total_size-target.size, unit='B', unit_scale=True, unit_divisor=1024, desc=description) as t:
with tqdm(total=total_size - target.size, unit='B', unit_scale=True, unit_divisor=1024,
desc=description) as t:
try:
for chunk in r.iter_content(chunk_size=chunk_size):
size = f.write(chunk)
@ -321,7 +345,8 @@ class Connection:
except requests.exceptions.ConnectionError:
if try_count >= self.TRIES:
self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.")
return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunksize might help.")
return DownloadResult(
error_message=f"Stream timed out from {url}, reducing the chunksize might help.")
self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})")
retry = True
@ -329,15 +354,14 @@ class Connection:
if total_size > progress:
retry = True
if retry:
self.LOGGER.warning(f"Retrying stream...")
accepted_response_codes.add(206)
return self.stream_into(
url = url,
target = target,
description = description,
try_count=try_count+1,
url=url,
target=target,
description=description,
try_count=try_count + 1,
progress=progress,
accepted_response_codes=accepted_response_codes,
timeout=timeout,

View File

@ -53,7 +53,8 @@ class Bandcamp(Page):
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://bandcamp.com/",
logger=self.LOGGER
logger=self.LOGGER,
module="bandcamp",
)
super().__init__(*args, **kwargs)
@ -356,6 +357,5 @@ class Bandcamp(Page):
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
if source.audio_url is None:
print(source)
return DownloadResult(error_message="Couldn't find download link.")
return self.connection.stream_into(url=source.audio_url, target=target, description=desc)

View File

@ -7,6 +7,9 @@ from dataclasses import dataclass
import re
from functools import lru_cache
from youtube_dl.jsinterp import JSInterpreter
from youtube_dl.extractor.youtube import YoutubeIE
from ...utils.exception.config import SettingValueError
from ...utils.config import main_settings, youtube_settings, logging_settings
from ...utils.shared import DEBUG, DEBUG_YOUTUBE_INITIALIZING
@ -60,7 +63,8 @@ class YoutubeMusicConnection(Connection):
heartbeat_interval=113.25,
header_values={
"Accept-Language": accept_language
}
},
module="youtube_music",
)
# cookie consent for youtube
@ -189,8 +193,10 @@ class YoutubeMusic(SuperYouTube):
# save cookies in settings
youtube_settings["youtube_music_consent_cookies"] = cookie_dict
else:
self.connection.save(r, "index.html")
r = self.connection.get("https://music.youtube.com/")
r = self.connection.get("https://music.youtube.com/", name="index.html")
if r is None:
return

View File

@ -12,7 +12,7 @@ from ..attributes.special_attributes import (
AudioFormatAttribute,
)
config = Config([
config = Config((
Attribute(name="hasnt_yet_started", default_value=False, description="This will be set automatically, to look if it needs to run the scripts that run on start."),
Attribute(name="bitrate", default_value=125, description="Streams the audio with given bitrate [kB/s]. Can't stream with a higher Bitrate, than the audio source provides."),
AudioFormatAttribute(name="audio_format", default_value="mp3", description="""Music Kraken will stream the audio into this format.
@ -64,6 +64,8 @@ all the error messages are shown."""),
PathAttribute(name="temp_directory", default_value=LOCATIONS.TEMP_DIRECTORY.resolve(), description="All temporary stuff is gonna be dumped in this directory."),
PathAttribute(name="log_file", default_value=LOCATIONS.get_log_file("download_logs.log").resolve()),
PathAttribute(name="ffmpeg_binary", default_value=LOCATIONS.FFMPEG_BIN.resolve(), description="Set the path to the ffmpeg binary."),
PathAttribute(name="cache_directory", default_value=LOCATIONS.CACHE_DIRECTORY.resolve(),
description="Set the path of the cache directory."),
Attribute(
name="not_a_genre_regex",
description="These regular expressions tell music-kraken, which sub-folders of the music-directory\n"
@ -93,7 +95,7 @@ But anyways... Freedom of thought, so go ahead and change the messages."""),
Attribute(name="id_bits", default_value=64, description="I really dunno why I even made this a setting.. Modifying this is a REALLY dumb idea."),
Description("🏳️‍⚧️🏳️‍⚧️ Protect trans youth. 🏳️‍⚧️🏳️‍⚧️\n"),
], LOCATIONS.get_config_file("main"))
), LOCATIONS.get_config_file("main"))
class SettingsStructure(TypedDict):
@ -126,4 +128,4 @@ class SettingsStructure(TypedDict):
log_file: Path
not_a_genre_regex: List[str]
ffmpeg_binary: Path
cache_directory: Path

View File

@ -1,29 +0,0 @@
from typing import List, Iterable, Dict, TypeVar, Generic, Iterator, Any, Type
from enum import Enum
from dataclasses import dataclass
from collections import defaultdict
class HookEventTypes(Enum):
pass
@dataclass
class Event:
target: Any
class Hooks:
def __init__(self, target) -> None:
self.target = target
self._callbacks: Dict[HookEventTypes, List[callable]] = defaultdict(list)
def add_event_listener(self, event_type: HookEventTypes, callback: callable):
self._callbacks[event_type].append(callback)
def trigger_event(self, event_type: HookEventTypes, *args, **kwargs):
event: Event = Event(target=self.target)
for callback in self._callbacks[event_type]:
callback(event, *args, **kwargs)

View File

@ -1,14 +1,63 @@
import configparser
from pathlib import Path
import os
from os.path import expandvars
import logging
from sys import platform
import tempfile
from typing import Optional
from pyffmpeg import FFmpeg
from .music_directory import get_music_directory
from .config_directory import get_config_directory
class Locations:
@staticmethod
def _get_env(key: str, default: Path, default_for_windows: bool = True) -> Optional[Path]:
res = os.environ.get(key.upper())
if res is not None:
return res
xdg_user_dirs_file = os.environ.get("XDG_CONFIG_HOME") or Path(Path.home(), ".config", "user-dirs.dirs")
xdg_user_dirs_default_file = Path("/etc/xdg/user-dirs.defaults")
def get_dir_from_xdg_file(xdg_file_path: os.PathLike) -> Optional[Path]:
nonlocal key
try:
with open(xdg_file_path, 'r') as f:
data = "[XDG_USER_DIRS]\n" + f.read()
config = configparser.ConfigParser(allow_no_value=True)
config.read_string(data)
xdg_config = config['XDG_USER_DIRS']
return Path(expandvars(xdg_config[key.lower()].strip('"')))
except (FileNotFoundError, KeyError) as e:
logging.warning(
f"Missing file or No entry found for \"{key}\" in: \"{xdg_file_path}\".\n"
)
logging.debug(str(e))
res = get_dir_from_xdg_file(xdg_user_dirs_file)
if res is not None:
return res
res = get_dir_from_xdg_file(xdg_user_dirs_default_file)
if res is not None:
return res
logging.warning(f"couldn't find a {key}, falling back to: {default}")
if not default_for_windows and platform == "linux":
return
return default
def __init__(self, application_name: os.PathLike = "music-kraken"):
self.FILE_ENCODING: str = "utf-8"
@ -21,6 +70,14 @@ class Locations:
self.CONFIG_DIRECTORY.mkdir(exist_ok=True, parents=True)
self.CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf")
self.LEGACY_CONFIG_FILE = Path(self.CONFIG_DIRECTORY, f"{application_name}.conf")
self.CACHE_DIRECTORY = self._get_env("XDG_CACHE_HOME", Path(Path.home(), ".cache"))
if self.CACHE_DIRECTORY is None:
logging.warning(f"Could not find a cache dir. Falling back to the temp dir: {self.TEMP_DIRECTORY}")
self.CACHE_DIRECTORY = self.TEMP_DIRECTORY
else:
self.CACHE_DIRECTORY = Path(self.CACHE_DIRECTORY, application_name)
self.CACHE_DIRECTORY.mkdir(parents=True, exist_ok=True)
self.FFMPEG_BIN = Path(FFmpeg(enable_log=False).get_ffmpeg_bin())

View File

@ -4,7 +4,7 @@ from .config import main_settings
DEBUG = True
DEBUG_LOGGING = DEBUG and True
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_YOUTUBE_INITIALIZING = DEBUG and True
DEBUG_PAGES = DEBUG and False
if DEBUG: