From 5ba38916d67cff99088a67c0731618f8f50eaaf6 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 8 Apr 2024 18:10:42 +0200 Subject: [PATCH] fix: youtube downloader works now --- src/music_kraken/connection/connection.py | 36 +++++-- .../pages/youtube_music/super_youtube.py | 11 +- .../pages/youtube_music/youtube_music.py | 101 +++++++++++------- 3 files changed, 94 insertions(+), 54 deletions(-) diff --git a/src/music_kraken/connection/connection.py b/src/music_kraken/connection/connection.py index ce98326..c3d3e02 100644 --- a/src/music_kraken/connection/connection.py +++ b/src/music_kraken/connection/connection.py @@ -134,6 +134,7 @@ class Connection: accepted_response_codes: set = None, refer_from_origin: bool = True, raw_url: bool = False, + raw_headers: bool = False, sleep_after_404: float = None, is_heartbeat: bool = False, disable_cache: bool = None, @@ -154,14 +155,20 @@ class Connection: parsed_url = urlparse(url) - _headers = copy.copy(self.HEADER_VALUES) - _headers.update(headers) + + if not raw_headers: + _headers = copy.copy(self.HEADER_VALUES) + _headers.update(headers) - headers = self._update_headers( - headers=_headers, - refer_from_origin=refer_from_origin, - url=parsed_url - ) + headers = self._update_headers( + headers=_headers, + refer_from_origin=refer_from_origin, + url=parsed_url + ) + else: + headers = headers or {} + + request_url = parsed_url.geturl() if not raw_url else url if name != "" and not disable_cache: cached = self.cache.get(name) @@ -170,7 +177,7 @@ class Connection: with responses.RequestsMock() as resp: resp.add( method=method, - url=url, + url=request_url, body=cached, ) return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) @@ -183,8 +190,6 @@ class Connection: if timeout is None: timeout = self.TIMEOUT - request_url = parsed_url.geturl() if not raw_url else url - r = None connection_failed = False try: @@ -270,9 +275,16 @@ class Connection: chunk_size: int = main_settings["chunk_size"], progress: int = 0, method: str = "GET", + try_count: int = 0, + accepted_response_codes: set = None, **kwargs ) -> DownloadResult: + accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes stream_kwargs = copy.copy(locals()) + stream_kwargs.update(stream_kwargs.pop("kwargs")) + + if "description" in kwargs: + name = kwargs.pop("description") if progress > 0: headers = dict() if headers is None else headers @@ -283,6 +295,7 @@ class Connection: name=name, method=method, stream=True, + accepted_response_codes=accepted_response_codes, **kwargs ) @@ -308,13 +321,14 @@ class Connection: progress += size t.update(size) - except requests.exceptions.ConnectionError: + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.ChunkedEncodingError): if try_count >= self.TRIES: self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.") return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunk_size might help.") self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})") retry = True + try_count += 1 if total_size > progress: retry = True diff --git a/src/music_kraken/pages/youtube_music/super_youtube.py b/src/music_kraken/pages/youtube_music/super_youtube.py index e18a473..a169eb4 100644 --- a/src/music_kraken/pages/youtube_music/super_youtube.py +++ b/src/music_kraken/pages/youtube_music/super_youtube.py @@ -1,6 +1,7 @@ from typing import List, Optional, Type, Tuple from urllib.parse import urlparse, urlunparse, parse_qs from enum import Enum +import requests import sponsorblock from sponsorblock.errors import HTTPException, NotFoundException @@ -135,6 +136,11 @@ class SuperYouTube(Page): logger=self.LOGGER, sleep_after_404=youtube_settings["sleep_after_youtube_403"] ) + + self.connection: Connection = Connection( + host=get_invidious_url(), + logger=self.LOGGER + ) # the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does _sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/") @@ -165,10 +171,11 @@ class SuperYouTube(Page): :param desc: :return: """ - r = self.connection.get(YouTubeUrl(source.url).api) + r: requests.Response = self.connection.get(YouTubeUrl(source.url).api) if r is None: return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance") + audio_format = None best_bitrate = 0 @@ -193,7 +200,7 @@ class SuperYouTube(Page): endpoint = audio_format["url"] - return self.download_connection.stream_into(endpoint, target, description=desc, raw_url=True) + return self.download_connection.stream_into(endpoint, target, name=desc, raw_url=True) def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: if not youtube_settings["use_sponsor_block"]: diff --git a/src/music_kraken/pages/youtube_music/youtube_music.py b/src/music_kraken/pages/youtube_music/youtube_music.py index cf0bd0d..516bafd 100644 --- a/src/music_kraken/pages/youtube_music/youtube_music.py +++ b/src/music_kraken/pages/youtube_music/youtube_music.py @@ -168,7 +168,7 @@ class YoutubeMusic(SuperYouTube): LOGGER = logging_settings["youtube_music_logger"] def __init__(self, *args, ydl_opts: dict = None, **kwargs): - self.connection: YoutubeMusicConnection = YoutubeMusicConnection( + self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection( logger=self.LOGGER, accept_language="en-US,en;q=0.5" ) @@ -191,8 +191,8 @@ class YoutubeMusic(SuperYouTube): logger=self.LOGGER, sleep_after_404=youtube_settings["sleep_after_youtube_403"], header_values={ - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Referer": "https://music.youtube.com/", + 'Origin': 'https://music.youtube.com', } ) @@ -208,13 +208,13 @@ class YoutubeMusic(SuperYouTube): search for: "innertubeApiKey" """ - r = self.connection.get("https://music.youtube.com/") + r = self.yt_music_connection.get("https://music.youtube.com/") if r is None: return if urlparse(r.url).netloc == "consent.youtube.com": self.LOGGER.info(f"Making cookie consent request for {type(self).__name__}.") - r = self.connection.post("https://consent.youtube.com/save", data={ + r = self.yt_music_connection.post("https://consent.youtube.com/save", data={ 'gl': 'DE', 'm': '0', 'app': '0', @@ -237,15 +237,15 @@ class YoutubeMusic(SuperYouTube): for cookie in r.cookies: cookie_dict[cookie.name] = cookie.value - for cookie in self.connection.session.cookies: + for cookie in self.yt_music_connection.session.cookies: cookie_dict[cookie.name] = cookie.value # save cookies in settings youtube_settings["youtube_music_consent_cookies"] = cookie_dict else: - self.connection.save(r, "index.html") + self.yt_music_connection.save(r, "index.html") - r = self.connection.get("https://music.youtube.com/", name="index.html") + r = self.yt_music_connection.get("https://music.youtube.com/", name="index.html") if r is None: return @@ -349,7 +349,7 @@ class YoutubeMusic(SuperYouTube): query_continue = "" if self.credentials.ctoken == "" else f"&ctoken={self.credentials.ctoken}&continuation={self.credentials.ctoken}" # construct the request - r = self.connection.post( + r = self.yt_music_connection.post( url=get_youtube_url(path="/youtubei/v1/search", query=f"key={self.credentials.api_key}&prettyPrint=false" + query_continue), json={ @@ -402,7 +402,7 @@ class YoutubeMusic(SuperYouTube): url = urlparse(source.url) browse_id = url.path.replace("/channel/", "") - r = self.connection.post( + r = self.yt_music_connection.post( url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"), json={ "browseId": browse_id, @@ -445,7 +445,7 @@ class YoutubeMusic(SuperYouTube): return album browse_id = list_id_list[0] - r = self.connection.post( + r = self.yt_music_connection.post( url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"), json={ "browseId": browse_id, @@ -479,45 +479,64 @@ class YoutubeMusic(SuperYouTube): return album - def _get_best_format(self, format_list: List[Dict]) -> str: - def _calc_score(_f: dict): - s = 0 - - _url = _f.get("url", "") - if "mime=audio" in _url: - s += 100 - - return s - - highest_score = 0 - best_format = {} - for _format in format_list: - _s = _calc_score(_format) - if _s >= highest_score: - highest_score = _s - best_format = _format - - return best_format.get("url") def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: - # implement the functionality yt_dl provides - ydl_res = self.yt_ie._real_extract(source.url) + song = Song() - source.audio_url = self._get_best_format(ydl_res.get("formats", [{}])) - song = Song( - title=ydl_res.get("title"), - source_list=[source], - ) return song + + def fetch_media_url(self, source: Source) -> dict: + def _get_best_format(format_list: List[Dict]) -> dict: + def _calc_score(_f: dict): + s = 0 + + _url = _f.get("url", "") + if "mime=audio" in _url: + s += 100 + + return s + + highest_score = 0 + best_format = {} + for _format in format_list: + _s = _calc_score(_format) + if _s >= highest_score: + highest_score = _s + best_format = _format + + return best_format + + ydl_res = self.ydl.extract_info(url=source.url, download=False) + _best_format = _get_best_format(ydl_res.get("formats", [{}])) + + print(_best_format) + + return { + "url": _best_format.get("url"), + "chunk_size": _best_format.get("downloader_options", {}).get("http_chunk_size", main_settings["chunk_size"]), + "headers": _best_format.get("http_headers", {}), + } + def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: - self.fetch_song(source) + media = self.fetch_media_url(source) - if source.audio_url is None: - self.LOGGER.warning(f"Couldn't fetch the audio source with the innertube api, falling back to invidious.") - return super().download_song_to_target(source, target) + result = self.download_connection.stream_into( + media["url"], + target, + name=desc, + raw_url=True, + raw_headers=True, + disable_cache=True, + headers=media.get("headers", {}), + # chunk_size=media.get("chunk_size", main_settings["chunk_size"]), + method="GET", + ) - return self.download_connection.stream_into(source.audio_url, target, name=desc, raw_url=True, disable_cache=True) + if result.is_fatal_error: + result.merge(super().download_song_to_target(source=source, target=target, desc=desc)) + + return result def __del__(self): self.ydl.__exit__()