fix: youtube downloader works now

This commit is contained in:
Hazel 2024-04-08 18:10:42 +02:00
parent 86f1f96034
commit 5ba38916d6
3 changed files with 94 additions and 54 deletions

View File

@ -134,6 +134,7 @@ class Connection:
accepted_response_codes: set = None, accepted_response_codes: set = None,
refer_from_origin: bool = True, refer_from_origin: bool = True,
raw_url: bool = False, raw_url: bool = False,
raw_headers: bool = False,
sleep_after_404: float = None, sleep_after_404: float = None,
is_heartbeat: bool = False, is_heartbeat: bool = False,
disable_cache: bool = None, disable_cache: bool = None,
@ -154,14 +155,20 @@ class Connection:
parsed_url = urlparse(url) parsed_url = urlparse(url)
_headers = copy.copy(self.HEADER_VALUES)
_headers.update(headers) if not raw_headers:
_headers = copy.copy(self.HEADER_VALUES)
_headers.update(headers)
headers = self._update_headers( headers = self._update_headers(
headers=_headers, headers=_headers,
refer_from_origin=refer_from_origin, refer_from_origin=refer_from_origin,
url=parsed_url url=parsed_url
) )
else:
headers = headers or {}
request_url = parsed_url.geturl() if not raw_url else url
if name != "" and not disable_cache: if name != "" and not disable_cache:
cached = self.cache.get(name) cached = self.cache.get(name)
@ -170,7 +177,7 @@ class Connection:
with responses.RequestsMock() as resp: with responses.RequestsMock() as resp:
resp.add( resp.add(
method=method, method=method,
url=url, url=request_url,
body=cached, body=cached,
) )
return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs) return requests.request(method=method, url=url, timeout=timeout, headers=headers, **kwargs)
@ -183,8 +190,6 @@ class Connection:
if timeout is None: if timeout is None:
timeout = self.TIMEOUT timeout = self.TIMEOUT
request_url = parsed_url.geturl() if not raw_url else url
r = None r = None
connection_failed = False connection_failed = False
try: try:
@ -270,9 +275,16 @@ class Connection:
chunk_size: int = main_settings["chunk_size"], chunk_size: int = main_settings["chunk_size"],
progress: int = 0, progress: int = 0,
method: str = "GET", method: str = "GET",
try_count: int = 0,
accepted_response_codes: set = None,
**kwargs **kwargs
) -> DownloadResult: ) -> DownloadResult:
accepted_response_codes = self.ACCEPTED_RESPONSE_CODES if accepted_response_codes is None else accepted_response_codes
stream_kwargs = copy.copy(locals()) stream_kwargs = copy.copy(locals())
stream_kwargs.update(stream_kwargs.pop("kwargs"))
if "description" in kwargs:
name = kwargs.pop("description")
if progress > 0: if progress > 0:
headers = dict() if headers is None else headers headers = dict() if headers is None else headers
@ -283,6 +295,7 @@ class Connection:
name=name, name=name,
method=method, method=method,
stream=True, stream=True,
accepted_response_codes=accepted_response_codes,
**kwargs **kwargs
) )
@ -308,13 +321,14 @@ class Connection:
progress += size progress += size
t.update(size) t.update(size)
except requests.exceptions.ConnectionError: except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.ChunkedEncodingError):
if try_count >= self.TRIES: if try_count >= self.TRIES:
self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.") self.LOGGER.warning(f"Stream timed out at \"{url}\": to many retries, aborting.")
return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunk_size might help.") return DownloadResult(error_message=f"Stream timed out from {url}, reducing the chunk_size might help.")
self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})") self.LOGGER.warning(f"Stream timed out at \"{url}\": ({try_count}-{self.TRIES})")
retry = True retry = True
try_count += 1
if total_size > progress: if total_size > progress:
retry = True retry = True

View File

@ -1,6 +1,7 @@
from typing import List, Optional, Type, Tuple from typing import List, Optional, Type, Tuple
from urllib.parse import urlparse, urlunparse, parse_qs from urllib.parse import urlparse, urlunparse, parse_qs
from enum import Enum from enum import Enum
import requests
import sponsorblock import sponsorblock
from sponsorblock.errors import HTTPException, NotFoundException from sponsorblock.errors import HTTPException, NotFoundException
@ -135,6 +136,11 @@ class SuperYouTube(Page):
logger=self.LOGGER, logger=self.LOGGER,
sleep_after_404=youtube_settings["sleep_after_youtube_403"] sleep_after_404=youtube_settings["sleep_after_youtube_403"]
) )
self.connection: Connection = Connection(
host=get_invidious_url(),
logger=self.LOGGER
)
# the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does # the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does
_sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/") _sponsorblock_connection: Connection = Connection(host="https://sponsor.ajay.app/")
@ -165,10 +171,11 @@ class SuperYouTube(Page):
:param desc: :param desc:
:return: :return:
""" """
r = self.connection.get(YouTubeUrl(source.url).api) r: requests.Response = self.connection.get(YouTubeUrl(source.url).api)
if r is None: if r is None:
return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance") return DownloadResult(error_message="Api didn't even respond, maybe try another invidious Instance")
audio_format = None audio_format = None
best_bitrate = 0 best_bitrate = 0
@ -193,7 +200,7 @@ class SuperYouTube(Page):
endpoint = audio_format["url"] endpoint = audio_format["url"]
return self.download_connection.stream_into(endpoint, target, description=desc, raw_url=True) return self.download_connection.stream_into(endpoint, target, name=desc, raw_url=True)
def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]: def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
if not youtube_settings["use_sponsor_block"]: if not youtube_settings["use_sponsor_block"]:

View File

@ -168,7 +168,7 @@ class YoutubeMusic(SuperYouTube):
LOGGER = logging_settings["youtube_music_logger"] LOGGER = logging_settings["youtube_music_logger"]
def __init__(self, *args, ydl_opts: dict = None, **kwargs): def __init__(self, *args, ydl_opts: dict = None, **kwargs):
self.connection: YoutubeMusicConnection = YoutubeMusicConnection( self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
logger=self.LOGGER, logger=self.LOGGER,
accept_language="en-US,en;q=0.5" accept_language="en-US,en;q=0.5"
) )
@ -191,8 +191,8 @@ class YoutubeMusic(SuperYouTube):
logger=self.LOGGER, logger=self.LOGGER,
sleep_after_404=youtube_settings["sleep_after_youtube_403"], sleep_after_404=youtube_settings["sleep_after_youtube_403"],
header_values={ header_values={
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Referer": "https://music.youtube.com/", "Referer": "https://music.youtube.com/",
'Origin': 'https://music.youtube.com',
} }
) )
@ -208,13 +208,13 @@ class YoutubeMusic(SuperYouTube):
search for: "innertubeApiKey" search for: "innertubeApiKey"
""" """
r = self.connection.get("https://music.youtube.com/") r = self.yt_music_connection.get("https://music.youtube.com/")
if r is None: if r is None:
return return
if urlparse(r.url).netloc == "consent.youtube.com": if urlparse(r.url).netloc == "consent.youtube.com":
self.LOGGER.info(f"Making cookie consent request for {type(self).__name__}.") self.LOGGER.info(f"Making cookie consent request for {type(self).__name__}.")
r = self.connection.post("https://consent.youtube.com/save", data={ r = self.yt_music_connection.post("https://consent.youtube.com/save", data={
'gl': 'DE', 'gl': 'DE',
'm': '0', 'm': '0',
'app': '0', 'app': '0',
@ -237,15 +237,15 @@ class YoutubeMusic(SuperYouTube):
for cookie in r.cookies: for cookie in r.cookies:
cookie_dict[cookie.name] = cookie.value cookie_dict[cookie.name] = cookie.value
for cookie in self.connection.session.cookies: for cookie in self.yt_music_connection.session.cookies:
cookie_dict[cookie.name] = cookie.value cookie_dict[cookie.name] = cookie.value
# save cookies in settings # save cookies in settings
youtube_settings["youtube_music_consent_cookies"] = cookie_dict youtube_settings["youtube_music_consent_cookies"] = cookie_dict
else: else:
self.connection.save(r, "index.html") self.yt_music_connection.save(r, "index.html")
r = self.connection.get("https://music.youtube.com/", name="index.html") r = self.yt_music_connection.get("https://music.youtube.com/", name="index.html")
if r is None: if r is None:
return return
@ -349,7 +349,7 @@ class YoutubeMusic(SuperYouTube):
query_continue = "" if self.credentials.ctoken == "" else f"&ctoken={self.credentials.ctoken}&continuation={self.credentials.ctoken}" query_continue = "" if self.credentials.ctoken == "" else f"&ctoken={self.credentials.ctoken}&continuation={self.credentials.ctoken}"
# construct the request # construct the request
r = self.connection.post( r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/search", url=get_youtube_url(path="/youtubei/v1/search",
query=f"key={self.credentials.api_key}&prettyPrint=false" + query_continue), query=f"key={self.credentials.api_key}&prettyPrint=false" + query_continue),
json={ json={
@ -402,7 +402,7 @@ class YoutubeMusic(SuperYouTube):
url = urlparse(source.url) url = urlparse(source.url)
browse_id = url.path.replace("/channel/", "") browse_id = url.path.replace("/channel/", "")
r = self.connection.post( r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"), url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"),
json={ json={
"browseId": browse_id, "browseId": browse_id,
@ -445,7 +445,7 @@ class YoutubeMusic(SuperYouTube):
return album return album
browse_id = list_id_list[0] browse_id = list_id_list[0]
r = self.connection.post( r = self.yt_music_connection.post(
url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"), url=get_youtube_url(path="/youtubei/v1/browse", query=f"key={self.credentials.api_key}&prettyPrint=false"),
json={ json={
"browseId": browse_id, "browseId": browse_id,
@ -479,45 +479,64 @@ class YoutubeMusic(SuperYouTube):
return album return album
def _get_best_format(self, format_list: List[Dict]) -> str:
def _calc_score(_f: dict):
s = 0
_url = _f.get("url", "")
if "mime=audio" in _url:
s += 100
return s
highest_score = 0
best_format = {}
for _format in format_list:
_s = _calc_score(_format)
if _s >= highest_score:
highest_score = _s
best_format = _format
return best_format.get("url")
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song: def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
# implement the functionality yt_dl provides song = Song()
ydl_res = self.yt_ie._real_extract(source.url)
source.audio_url = self._get_best_format(ydl_res.get("formats", [{}]))
song = Song(
title=ydl_res.get("title"),
source_list=[source],
)
return song return song
def fetch_media_url(self, source: Source) -> dict:
def _get_best_format(format_list: List[Dict]) -> dict:
def _calc_score(_f: dict):
s = 0
_url = _f.get("url", "")
if "mime=audio" in _url:
s += 100
return s
highest_score = 0
best_format = {}
for _format in format_list:
_s = _calc_score(_format)
if _s >= highest_score:
highest_score = _s
best_format = _format
return best_format
ydl_res = self.ydl.extract_info(url=source.url, download=False)
_best_format = _get_best_format(ydl_res.get("formats", [{}]))
print(_best_format)
return {
"url": _best_format.get("url"),
"chunk_size": _best_format.get("downloader_options", {}).get("http_chunk_size", main_settings["chunk_size"]),
"headers": _best_format.get("http_headers", {}),
}
def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult: def download_song_to_target(self, source: Source, target: Target, desc: str = None) -> DownloadResult:
self.fetch_song(source) media = self.fetch_media_url(source)
if source.audio_url is None: result = self.download_connection.stream_into(
self.LOGGER.warning(f"Couldn't fetch the audio source with the innertube api, falling back to invidious.") media["url"],
return super().download_song_to_target(source, target) target,
name=desc,
raw_url=True,
raw_headers=True,
disable_cache=True,
headers=media.get("headers", {}),
# chunk_size=media.get("chunk_size", main_settings["chunk_size"]),
method="GET",
)
return self.download_connection.stream_into(source.audio_url, target, name=desc, raw_url=True, disable_cache=True) if result.is_fatal_error:
result.merge(super().download_song_to_target(source=source, target=target, desc=desc))
return result
def __del__(self): def __del__(self):
self.ydl.__exit__() self.ydl.__exit__()