2023-06-13 18:10:11 +00:00
from typing import List , Optional , Type , Tuple
2023-06-13 11:20:49 +00:00
from urllib . parse import urlparse , urlunparse , parse_qs
from enum import Enum
2023-02-06 08:44:11 +00:00
2024-04-25 18:35:36 +00:00
import python_sponsorblock
2023-06-14 13:25:28 +00:00
2023-06-19 14:27:59 +00:00
from . . objects import Source , DatabaseObject , Song , Target
from . abstract import Page
from . . objects import (
2023-02-06 08:44:11 +00:00
Artist ,
Source ,
Song ,
Album ,
2023-06-12 19:53:40 +00:00
Label ,
2023-06-13 18:10:11 +00:00
Target ,
FormattedText ,
ID3Timestamp
2023-02-06 08:44:11 +00:00
)
2023-06-19 14:27:59 +00:00
from . . connection import Connection
2023-07-27 18:44:24 +00:00
from . . utils . string_processing import clean_song_title
2024-05-14 13:18:17 +00:00
from . . utils . enums import SourceType , ALL_SOURCE_TYPES
2023-10-23 14:21:44 +00:00
from . . utils . support_classes . download_result import DownloadResult
2023-09-10 14:27:09 +00:00
from . . utils . config import youtube_settings , main_settings , logging_settings
2023-06-12 19:53:40 +00:00
2023-07-31 17:12:09 +00:00
from . youtube_music . super_youtube import SuperYouTube , YouTubeUrl , get_invidious_url , YouTubeUrlType
2023-06-12 19:53:40 +00:00
2023-02-06 08:44:11 +00:00
2023-06-12 19:53:40 +00:00
"""
2023-06-13 11:20:49 +00:00
- https : / / yt . artemislena . eu / api / v1 / search ? q = Zombiez + - + Topic & page = 1 & date = none & type = channel & duration = none & sort = relevance
- https : / / yt . artemislena . eu / api / v1 / channels / playlists / UCV0Ntl3lVR7xDXKoCU6uUXA
- https : / / yt . artemislena . eu / api / v1 / playlists / OLAK5uy_kcUBiDv5ATbl - R20OjNaZ5G28XFanQOmM
- https : / / yt . artemislena . eu / api / v1 / videos / SULFl39UjgY
2023-06-12 19:53:40 +00:00
"""
2023-02-06 08:44:11 +00:00
2023-06-19 14:27:59 +00:00
def get_piped_url ( path : str = " " , params : str = " " , query : str = " " , fragment : str = " " ) - > str :
2023-09-10 14:27:09 +00:00
return urlunparse ( ( youtube_settings [ " piped_instance " ] . scheme , youtube_settings [ " piped_instance " ] . netloc , path , params , query , fragment ) )
2023-06-13 11:20:49 +00:00
2023-07-31 17:12:09 +00:00
class YouTube ( SuperYouTube ) :
2023-06-12 19:53:40 +00:00
# CHANGE
2024-05-14 13:18:17 +00:00
SOURCE_TYPE = ALL_SOURCE_TYPES . YOUTUBE
2023-02-06 08:44:11 +00:00
2023-06-12 19:53:40 +00:00
def __init__ ( self , * args , * * kwargs ) :
self . connection : Connection = Connection (
2023-06-13 11:29:24 +00:00
host = get_invidious_url ( ) ,
2023-06-12 19:53:40 +00:00
logger = self . LOGGER
)
2023-06-19 14:27:59 +00:00
self . piped_connection : Connection = Connection (
host = get_piped_url ( ) ,
logger = self . LOGGER
)
2023-02-06 08:44:11 +00:00
2023-06-13 18:10:11 +00:00
self . download_connection : Connection = Connection (
host = " https://www.youtube.com/ " ,
2023-06-22 12:30:26 +00:00
logger = self . LOGGER ,
2023-09-10 14:27:09 +00:00
sleep_after_404 = youtube_settings [ " sleep_after_youtube_403 " ]
2023-06-13 18:10:11 +00:00
)
2023-06-14 13:25:28 +00:00
# the stuff with the connection is, to ensure sponsorblock uses the proxies, my programm does
2024-04-25 18:35:36 +00:00
_sponsorblock_connection : Connection = Connection ( )
self . sponsorblock = python_sponsorblock . SponsorBlock ( silent = True , session = _sponsorblock_connection . session )
2023-06-13 18:10:11 +00:00
2023-06-12 19:53:40 +00:00
super ( ) . __init__ ( * args , * * kwargs )
2023-02-06 08:44:11 +00:00
2023-06-12 19:53:40 +00:00
def general_search ( self , search_query : str ) - > List [ DatabaseObject ] :
2023-06-13 18:10:11 +00:00
return self . artist_search ( Artist ( name = search_query , dynamic = True ) )
2023-06-13 13:03:11 +00:00
def _json_to_artist ( self , artist_json : dict ) - > Artist : #
return Artist (
name = artist_json [ " author " ] . replace ( " - Topic " , " " ) ,
source_list = [
Source ( self . SOURCE_TYPE , get_invidious_url ( path = artist_json [ " authorUrl " ] ) )
]
)
2023-06-12 19:53:40 +00:00
def artist_search ( self , artist : Artist ) - > List [ Artist ] :
2023-06-13 11:29:24 +00:00
# https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance
endpoint = get_invidious_url ( path = " /api/v1/search " , query = f " q= { artist . name . replace ( ' ' , ' + ' ) } +-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance " )
2023-06-13 13:03:11 +00:00
artist_list = [ ]
r = self . connection . get ( endpoint )
2023-06-13 18:10:11 +00:00
if r is None :
return [ ]
2023-06-13 13:03:11 +00:00
for search_result in r . json ( ) :
if search_result [ " type " ] != " channel " :
continue
author : str = search_result [ " author " ]
if not author . endswith ( " - Topic " ) :
continue
artist_list . append ( self . _json_to_artist ( search_result ) )
return artist_list
2023-06-12 19:53:40 +00:00
2023-06-13 18:10:11 +00:00
def _fetch_song_from_id ( self , youtube_id : str ) - > Tuple [ Song , Optional [ int ] ] :
# https://yt.artemislena.eu/api/v1/videos/SULFl39UjgY
r = self . connection . get ( get_invidious_url ( path = f " /api/v1/videos/ { youtube_id } " ) )
if r is None :
return Song ( ) , None
data = r . json ( )
if data [ " genre " ] != " Music " :
self . LOGGER . warning ( f " Genre has to be music, trying anyways " )
title = data [ " title " ]
license_str = None
2023-06-13 20:12:35 +00:00
artist_list : List [ Artist ] = [ ]
_author : str = data [ " author " ]
if _author . endswith ( " - Topic " ) :
artist_list . append ( Artist (
name = _author . replace ( " - Topic " , " " ) ,
source_list = [ Source (
self . SOURCE_TYPE , get_invidious_url ( path = f " /channel/ { data [ ' authorId ' ] } " )
) ]
) )
else :
2023-07-27 18:44:24 +00:00
# If the song is not a topic song in the beginning, it cleans the title. If it is from a topic channel, it is clean anyways
# If cleaned data is returned by the api, it will be overridden in the next step anyways
title = clean_song_title ( title , _author )
2023-06-13 20:12:35 +00:00
for music_track in data . get ( " musicTracks " , [ ] ) :
title = music_track [ " song " ]
license_str = music_track [ " license " ]
for artist_name in music_track [ " artist " ] . split ( " x " ) :
artist_list . append ( Artist ( name = artist_name ) )
2023-06-13 18:10:11 +00:00
2023-07-27 18:49:08 +00:00
# if all attempts to get a clean artis name (mainly striping topic or getting the stuff in the api) fail, just add an artist with the name of the uploader channel
if len ( artist_list ) == 0 :
2023-07-27 19:01:01 +00:00
artist_list . append ( Artist ( name = _author ) )
2023-07-27 18:49:08 +00:00
2023-06-13 18:10:11 +00:00
return Song (
title = title ,
source_list = [ Source (
self . SOURCE_TYPE , get_invidious_url ( path = " /watch " , query = f " v= { data [ ' videoId ' ] } " )
) ] ,
2023-06-13 20:12:35 +00:00
notes = FormattedText ( html = data [ " descriptionHtml " ] + f " \n <p> { license_str } </ p> " ) ,
2024-05-16 12:29:50 +00:00
artist_list = artist_list
2023-06-13 18:10:11 +00:00
) , int ( data [ " published " ] )
2023-06-12 19:53:40 +00:00
def fetch_song ( self , source : Source , stop_at_level : int = 1 ) - > Song :
2023-06-13 18:10:11 +00:00
parsed = YouTubeUrl ( source . url )
if parsed . url_type != YouTubeUrlType . VIDEO :
return Song ( )
song , _ = self . _fetch_song_from_id ( parsed . id )
return song
2023-06-12 19:53:40 +00:00
def fetch_album ( self , source : Source , stop_at_level : int = 1 ) - > Album :
2023-06-19 14:30:29 +00:00
self . LOGGER . info ( f " Getting the metadata of an album may take slightly longer, only panic in a couple minutes <333 " )
2023-06-13 18:10:11 +00:00
parsed = YouTubeUrl ( source . url )
if parsed . url_type != YouTubeUrlType . PLAYLIST :
return Album ( )
title = None
source_list = [ source ]
notes = None
song_list = [ ]
# https://yt.artemislena.eu/api/v1/playlists/OLAK5uy_kcUBiDv5ATbl-R20OjNaZ5G28XFanQOmM
r = self . connection . get ( get_invidious_url ( path = f " /api/v1/playlists/ { parsed . id } " ) )
if r is None :
return Album ( )
data = r . json ( )
if data [ " type " ] != " playlist " :
return Album ( )
title = data [ " title " ]
notes = FormattedText ( html = data [ " descriptionHtml " ] )
timestamps : List [ int ] = [ ]
"""
TODO
fetch the song and don ' t get it from there
"""
for video in data [ " videos " ] :
other_song = Song (
source_list = [
Source (
self . SOURCE_TYPE , get_invidious_url ( path = " /watch " , query = f " v= { video [ ' videoId ' ] } " )
)
] ,
tracksort = video [ " index " ] + 1
)
song , utc_timestamp = self . _fetch_song_from_id ( video [ " videoId " ] )
song . merge ( other_song )
if utc_timestamp is not None :
timestamps . append ( utc_timestamp )
song_list . append ( song )
return Album (
title = title ,
source_list = source_list ,
notes = notes ,
song_list = song_list ,
date = ID3Timestamp . fromtimestamp ( round ( sum ( timestamps ) / len ( timestamps ) ) )
)
2023-06-12 19:53:40 +00:00
2023-06-19 14:27:59 +00:00
def fetch_invidious_album_list ( self , yt_id : str ) :
2023-06-13 13:03:11 +00:00
artist_name = None
album_list = [ ]
# playlist
# https://yt.artemislena.eu/api/v1/channels/playlists/UCV0Ntl3lVR7xDXKoCU6uUXA
2023-06-19 14:27:59 +00:00
r = self . connection . get ( get_invidious_url ( f " /api/v1/channels/playlists/ { yt_id } " ) )
2023-06-13 18:10:11 +00:00
if r is None :
return Artist ( )
2023-06-13 13:03:11 +00:00
for playlist_json in r . json ( ) [ " playlists " ] :
if playlist_json [ " type " ] != " playlist " :
continue
artist_name = playlist_json [ " author " ] . replace ( " - Topic " , " " )
# /playlist?list=OLAK5uy_nbvQeskr8nbIuzeLxoceNLuCL_KjAmzVw
album_list . append ( Album (
title = playlist_json [ " title " ] ,
source_list = [ Source (
self . SOURCE_TYPE , get_invidious_url ( path = " /playlist " , query = f " list= { playlist_json [ ' playlistId ' ] } " )
) ] ,
artist_list = [ Artist (
name = artist_name ,
source_list = [
Source ( self . SOURCE_TYPE , get_invidious_url ( path = playlist_json [ " authorUrl " ] ) )
]
) ]
) )
2023-06-19 14:27:59 +00:00
return album_list , artist_name
def fetch_piped_album_list ( self , yt_id : str ) :
endpoint = get_piped_url ( path = f " /channels/tabs " , query = ' data= { " originalUrl " : " https://www.youtube.com/ ' + yt_id + ' /playlists " , " url " : " https://www.youtube.com/ ' + yt_id + ' playlists " , " id " : " ' + yt_id + ' " , " contentFilters " :[ " playlists " ], " sortFilter " : " " , " baseUrl " : " https://www.youtube.com " } ' )
r = self . piped_connection . get ( endpoint )
if r is None :
return [ ] , None
content = r . json ( ) [ " content " ]
artist_name = None
album_list = [ ]
for playlist in content :
if playlist [ " type " ] != " playlist " :
continue
artist_name = playlist [ " uploaderName " ] . replace ( " - Topic " , " " )
album_list . append ( Album (
title = playlist [ " name " ] ,
source_list = [ Source (
self . SOURCE_TYPE , get_invidious_url ( ) + playlist [ " url " ]
) ] ,
artist_list = [ Artist (
name = artist_name ,
source_list = [
Source ( self . SOURCE_TYPE , get_invidious_url ( path = playlist [ " uploaderUrl " ] ) )
]
) ]
) )
return album_list , artist_name
def fetch_artist ( self , source : Source , stop_at_level : int = 1 ) - > Artist :
parsed = YouTubeUrl ( source . url )
if parsed . url_type != YouTubeUrlType . CHANNEL :
return Artist ( source_list = [ source ] )
album_list , artist_name = self . fetch_piped_album_list ( parsed . id )
if len ( album_list ) < = 0 :
self . LOGGER . warning ( f " didn ' t found any playlists with piped, falling back to invidious. (it is unusual) " )
album_list , artist_name = self . fetch_invidious_album_list ( parsed . id )
2024-05-16 12:29:50 +00:00
return Artist ( name = artist_name , album_list = album_list , source_list = [ source ] )
2023-06-12 19:53:40 +00:00
def download_song_to_target ( self , source : Source , target : Target , desc : str = None ) - > DownloadResult :
2023-06-13 18:10:11 +00:00
"""
1. getting the optimal source
Only audio sources allowed
not a bitrate that is smaller than the selected bitrate , but not one that is wayyy huger
2. download it
: param source :
: param target :
: param desc :
: return :
"""
r = self . connection . get ( YouTubeUrl ( source . url ) . api )
if r is None :
return DownloadResult ( error_message = " Api didn ' t even respond, maybe try another invidious Instance " )
audio_format = None
best_bitrate = 0
for possible_format in r . json ( ) [ " adaptiveFormats " ] :
format_type : str = possible_format [ " type " ]
if not format_type . startswith ( " audio " ) :
continue
bitrate = int ( possible_format . get ( " bitrate " , 0 ) )
2023-09-10 14:27:09 +00:00
if bitrate > = main_settings [ " bitrate " ] :
2023-06-13 18:10:11 +00:00
best_bitrate = bitrate
audio_format = possible_format
break
if bitrate > best_bitrate :
best_bitrate = bitrate
audio_format = possible_format
if audio_format is None :
return DownloadResult ( error_message = " Couldn ' t find the download link. " )
endpoint = audio_format [ " url " ]
2023-06-22 12:30:26 +00:00
return self . download_connection . stream_into ( endpoint , target , description = desc , raw_url = True )
2023-06-13 18:10:11 +00:00
2023-06-14 13:25:28 +00:00
2023-06-15 07:58:48 +00:00
def get_skip_intervals ( self , song : Song , source : Source ) - > List [ Tuple [ float , float ] ] :
2023-09-10 14:27:09 +00:00
if not youtube_settings [ " use_sponsor_block " ] :
2023-06-15 07:58:48 +00:00
return [ ]
2023-06-14 13:25:28 +00:00
parsed = YouTubeUrl ( source . url )
if parsed . url_type != YouTubeUrlType . VIDEO :
self . LOGGER . warning ( f " { source . url } is no video url. " )
2023-06-15 07:58:48 +00:00
return [ ]
2023-06-14 13:25:28 +00:00
2023-06-14 15:43:20 +00:00
segments = [ ]
try :
2024-04-25 18:35:36 +00:00
segments = self . sponsorblock . get_segments ( parsed . id )
2023-06-14 15:43:20 +00:00
except NotFoundException :
self . LOGGER . debug ( f " No sponsor found for the video { parsed . id } . " )
except HTTPException as e :
self . LOGGER . warning ( f " { e } " )
2023-06-15 16:22:00 +00:00
2024-04-25 18:35:36 +00:00
return [ ( segment . segment [ 0 ] , segment . segment [ 1 ] ) for segment in segments ]