huuuuge progress on youtube scraper

This commit is contained in:
Hellow2 2023-06-13 15:03:11 +02:00
parent 65f7121837
commit 79c38387c8
2 changed files with 62 additions and 8 deletions

View File

@ -12,4 +12,9 @@ if __name__ == "__main__":
"d: https://musify.club/release/crystal-f-x-2012-795181" "d: https://musify.club/release/crystal-f-x-2012-795181"
] ]
music_kraken.cli(genre="test", command_list=direct_download) youtube_search = [
"s: #a Zombiez",
"10"
]
music_kraken.cli(genre="test", command_list=youtube_search)

View File

@ -28,9 +28,8 @@ from ..utils.shared import YOUTUBE_LOGGER, INVIDIOUS_INSTANCE
""" """
def get_invidious_url(path: str = "", query: str = "", fragment: str = "") -> str: def get_invidious_url(path: str = "", params: str = "", query: str = "", fragment: str = "") -> str:
_ = "" return urlunparse((INVIDIOUS_INSTANCE.scheme, INVIDIOUS_INSTANCE.netloc, path, params, query, fragment))
return urlunparse((INVIDIOUS_INSTANCE.scheme, INVIDIOUS_INSTANCE.netloc, path, query, fragment, _))
class YouTubeUrlType(Enum): class YouTubeUrlType(Enum):
@ -154,11 +153,31 @@ class YouTube(Page):
def label_search(self, label: Label) -> List[Label]: def label_search(self, label: Label) -> List[Label]:
return [] return []
def _json_to_artist(self, artist_json: dict) -> Artist:#
return Artist(
name=artist_json["author"].replace(" - Topic", ""),
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=artist_json["authorUrl"]))
]
)
def artist_search(self, artist: Artist) -> List[Artist]: def artist_search(self, artist: Artist) -> List[Artist]:
# https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance # https://yt.artemislena.eu/api/v1/search?q=Zombiez+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance
endpoint = get_invidious_url(path="/api/v1/search", query=f"q={artist.name.replace(' ', '+')}+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance") endpoint = get_invidious_url(path="/api/v1/search", query=f"q={artist.name.replace(' ', '+')}+-+Topic&page=1&date=none&type=channel&duration=none&sort=relevance")
print(endpoint)
return [] artist_list = []
r = self.connection.get(endpoint)
for search_result in r.json():
if search_result["type"] != "channel":
continue
author: str = search_result["author"]
if not author.endswith(" - Topic"):
continue
artist_list.append(self._json_to_artist(search_result))
return artist_list
def album_search(self, album: Album) -> List[Album]: def album_search(self, album: Album) -> List[Album]:
return [] return []
@ -173,7 +192,37 @@ class YouTube(Page):
return Album() return Album()
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist() parsed = YouTubeUrl(source.url)
if parsed.url_type != YouTubeUrlType.CHANNEL:
return Artist(source_list=[source])
artist_name = None
album_list = []
# playlist
# https://yt.artemislena.eu/api/v1/channels/playlists/UCV0Ntl3lVR7xDXKoCU6uUXA
r = self.connection.get(get_invidious_url(f"/api/v1/channels/playlists/{parsed.id}"))
for playlist_json in r.json()["playlists"]:
if playlist_json["type"] != "playlist":
continue
artist_name = playlist_json["author"].replace(" - Topic", "")
# /playlist?list=OLAK5uy_nbvQeskr8nbIuzeLxoceNLuCL_KjAmzVw
album_list.append(Album(
title=playlist_json["title"],
source_list=[Source(
self.SOURCE_TYPE, get_invidious_url(path="/playlist", query=f"list={playlist_json['playlistId']}")
)],
artist_list=[Artist(
name=artist_name,
source_list=[
Source(self.SOURCE_TYPE, get_invidious_url(path=playlist_json["authorUrl"]))
]
)]
))
return Artist(name=artist_name, main_album_list=album_list, source_list=[source])
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label() return Label()