21 Commits

Author SHA1 Message Date
49c3734526 feat: added hooks for collection on append
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-04 10:11:46 +02:00
bc19a94e7f feat: added parent artwork options 2024-06-04 10:09:17 +02:00
5d26fdbf94 Artwork gallery Musify
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-04 07:58:18 +02:00
465af49057 hotfix
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-06-03 10:19:32 +02:00
2aa0f02fa5 Merge branch 'adding_genius' into experimental
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-23 13:36:10 +02:00
7b0b830d64 feat: removed legacy key
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2024-05-23 13:24:25 +02:00
1ba6c97f5a feat: more extensive browse id 2024-05-23 13:20:34 +02:00
c8cbfc7cb9 feat: improved output of clearing the cache 2024-05-23 13:17:14 +02:00
344da0a0bf fix: converting pictures to rgb before saving
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-22 15:20:26 +02:00
49dc7093c8 fix: genius fallback
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-22 15:18:43 +02:00
90f70638b4 feat: better lyrics support
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 17:55:08 +02:00
7b4eee858a feat: parsed script json
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 17:14:58 +02:00
f61b34dd40 feat: improved feature artists by also adding writer and producer to it
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:52:01 +02:00
688b4fd357 feat: getting the album tracklist
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:47:38 +02:00
769d27dc5c feat: album details
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:43:52 +02:00
f5d953d9ce feat: theoretically fetching feature songs
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 16:34:04 +02:00
46b64b8f8d feat: fetched the flat artist details 2024-05-21 16:23:05 +02:00
adfce16d2a feat: fetched the flat artist details 2024-05-21 16:21:58 +02:00
e4fd9faf12 feat: detecting url type 2024-05-21 15:57:09 +02:00
f6caee41a8 feat: finished searching genious
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2024-05-21 15:52:41 +02:00
068c749c38 feat: implemented artist search 2024-05-21 15:27:10 +02:00
14 changed files with 441 additions and 46 deletions

View File

@@ -27,6 +27,7 @@
"Gitea", "Gitea",
"iframe", "iframe",
"isrc", "isrc",
"itemprop",
"levenshtein", "levenshtein",
"metallum", "metallum",
"MUSICBRAINZ", "MUSICBRAINZ",

View File

@@ -6,9 +6,10 @@ logging.getLogger().setLevel(logging.DEBUG)
if __name__ == "__main__": if __name__ == "__main__":
commands = [ commands = [
"s: #a I'm in a coffin", "s: #a Crystal F",
"0", "10",
"d: 0", "1",
"3",
] ]

View File

@@ -93,6 +93,10 @@ def write_metadata_to_target(metadata: Metadata, target: Target, song: Song):
# resize the image to the preferred resolution # resize the image to the preferred resolution
img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"])) img.thumbnail((main_settings["preferred_artwork_resolution"], main_settings["preferred_artwork_resolution"]))
# https://stackoverflow.com/a/59476938/16804841
if img.mode != 'RGB':
img = img.convert('RGB')
img.save(converted_target.file_path, "JPEG") img.save(converted_target.file_path, "JPEG")
# https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t # https://stackoverflow.com/questions/70228440/mutagen-how-can-i-correctly-embed-album-art-into-mp3-file-so-that-i-can-see-t

View File

@@ -6,6 +6,7 @@ from typing import List, Optional
from functools import lru_cache from functools import lru_cache
import logging import logging
from ..utils import output, BColors
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.string_processing import fit_to_file_system from ..utils.string_processing import fit_to_file_system
@@ -136,13 +137,13 @@ class Cache:
) )
self._write_attribute(cache_attribute) self._write_attribute(cache_attribute)
cache_path = fit_to_file_system(Path(module_path, name), hidden_ok=True) cache_path = fit_to_file_system(Path(module_path, name.replace("/", "_")), hidden_ok=True)
with cache_path.open("wb") as content_file: with cache_path.open("wb") as content_file:
self.logger.debug(f"writing cache to {cache_path}") self.logger.debug(f"writing cache to {cache_path}")
content_file.write(content) content_file.write(content)
def get(self, name: str) -> Optional[CacheResult]: def get(self, name: str) -> Optional[CacheResult]:
path = fit_to_file_system(Path(self._dir, self.module, name), hidden_ok=True) path = fit_to_file_system(Path(self._dir, self.module, name.replace("/", "_")), hidden_ok=True)
if not path.is_file(): if not path.is_file():
return None return None
@@ -165,7 +166,7 @@ class Cache:
if ca.name == "": if ca.name == "":
continue continue
file = fit_to_file_system(Path(self._dir, ca.module, ca.name), hidden_ok=True) file = fit_to_file_system(Path(self._dir, ca.module, ca.name.replace("/", "_")), hidden_ok=True)
if not ca.is_valid: if not ca.is_valid:
self.logger.debug(f"deleting cache {ca.id}") self.logger.debug(f"deleting cache {ca.id}")
@@ -204,9 +205,12 @@ class Cache:
for path in self._dir.iterdir(): for path in self._dir.iterdir():
if path.is_dir(): if path.is_dir():
for file in path.iterdir(): for file in path.iterdir():
output(f"Deleting file {file}", color=BColors.GREY)
file.unlink() file.unlink()
output(f"Deleting folder {path}", color=BColors.HEADER)
path.rmdir() path.rmdir()
else: else:
output(f"Deleting folder {path}", color=BColors.HEADER)
path.unlink() path.unlink()
self.cached_attributes.clear() self.cached_attributes.clear()

View File

@@ -30,11 +30,12 @@ from ..utils.exception import MKMissingNameException
from ..utils.exception.download import UrlNotFoundException from ..utils.exception.download import UrlNotFoundException
from ..utils.shared import DEBUG_PAGES from ..utils.shared import DEBUG_PAGES
from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, INDEPENDENT_DB_OBJECTS from ..pages import Page, EncyclopaediaMetallum, Musify, YouTube, YoutubeMusic, Bandcamp, Genius, INDEPENDENT_DB_OBJECTS
ALL_PAGES: Set[Type[Page]] = { ALL_PAGES: Set[Type[Page]] = {
# EncyclopaediaMetallum, # EncyclopaediaMetallum,
Genius,
Musify, Musify,
YoutubeMusic, YoutubeMusic,
Bandcamp Bandcamp

View File

@@ -1,18 +1,14 @@
from __future__ import annotations from __future__ import annotations
from typing import List, Optional, Dict, Tuple, Type, Union, TypedDict from typing import Dict, List, Optional, Set, Tuple, Type, TypedDict, Union
from .collection import Collection
from .metadata import (
Mapping as id3Mapping,
ID3Timestamp,
Metadata
)
from ..utils.string_processing import unify, hash_url
from .parents import OuterProxy as Base
from ..utils.config import main_settings from ..utils.config import main_settings
from ..utils.string_processing import hash_url, unify
from .collection import Collection
from .metadata import ID3Timestamp
from .metadata import Mapping as id3Mapping
from .metadata import Metadata
from .parents import OuterProxy as Base
class ArtworkVariant(TypedDict): class ArtworkVariant(TypedDict):
@@ -23,7 +19,9 @@ class ArtworkVariant(TypedDict):
class Artwork: class Artwork:
def __init__(self, *variants: List[ArtworkVariant]) -> None: def __init__(self, *variants: List[ArtworkVariant], parent_artworks: Set[Artwork] = None) -> None:
self.parent_artworks: Set[Artwork] = parent_artworks or set()
self._variant_mapping: Dict[str, ArtworkVariant] = {} self._variant_mapping: Dict[str, ArtworkVariant] = {}
for variant in variants: for variant in variants:
@@ -36,7 +34,7 @@ class Artwork:
def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None: def append(self, url: str, width: int = main_settings["preferred_artwork_resolution"], height: int = main_settings["preferred_artwork_resolution"], **kwargs) -> None:
if url is None: if url is None:
return return
self._variant_mapping[hash_url(url=url)] = { self._variant_mapping[hash_url(url=url)] = {
"url": url, "url": url,
"width": width, "width": width,
@@ -44,19 +42,36 @@ class Artwork:
"deviation": self._calculate_deviation(width, height), "deviation": self._calculate_deviation(width, height),
} }
@property
def flat_empty(self) -> bool:
return len(self._variant_mapping.keys()) <= 0
def _get_best_from_list(self, artwork_variants: List[ArtworkVariant]) -> Optional[ArtworkVariant]:
return min(artwork_variants, key=lambda x: x["deviation"])
@property @property
def best_variant(self) -> ArtworkVariant: def best_variant(self) -> ArtworkVariant:
if len(self._variant_mapping.keys()) <= 0: if self.flat_empty:
return None return self._get_best_from_list([parent.best_variant for parent in self.parent_artworks])
return min(self._variant_mapping.values(), key=lambda x: x["deviation"]) return self._get_best_from_list(self._variant_mapping.values())
def get_variant_name(self, variant: ArtworkVariant) -> str: def get_variant_name(self, variant: ArtworkVariant) -> str:
return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}" return f"artwork_{variant['width']}x{variant['height']}_{hash_url(variant['url']).replace('/', '_')}"
def __merge__(self, other: Artwork, **kwargs) -> None: def __merge__(self, other: Artwork, **kwargs) -> None:
self.parent_artworks.update(other.parent_artworks)
for key, value in other._variant_mapping.items(): for key, value in other._variant_mapping.items():
if key not in self._variant_mapping: if key not in self._variant_mapping:
self._variant_mapping[key] = value self._variant_mapping[key] = value
def __hash__(self) -> int:
return id(self)
def __eq__(self, other: Artwork) -> bool: def __eq__(self, other: Artwork) -> bool:
if hash(self) == hash(other):
return True
if not isinstance(other, Artwork):
return False
return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys())) return any(a == b for a, b in zip(self._variant_mapping.keys(), other._variant_mapping.keys()))

View File

@@ -1,16 +1,44 @@
from __future__ import annotations from __future__ import annotations
from collections import defaultdict
from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union, Any, Set
import copy import copy
from collections import defaultdict
from dataclasses import dataclass
from typing import (Any, Callable, Dict, Generator, Generic, Iterable,
Iterator, List, Optional, Set, Tuple, TypeVar, Union)
from .parents import OuterProxy from ..utils import BColors, object_trace, output
from ..utils import object_trace from .parents import InnerData, OuterProxy
from ..utils import output, BColors
T = TypeVar('T', bound=OuterProxy) T = TypeVar('T', bound=OuterProxy)
@dataclass
class AppendHookArguments:
"""
This class is used to store the arguments for the append hook.
The best explanation is with an examples:
```
# this is the action that triggers the append hook
album = Album()
song = Song()
album.song_collection.append(song)
```
In this case, the append hook is triggered with the following arguments:
```
AppendHookArguments(
collection=album.song_collection,
new_object=song,
collection_root_objects=[album]
)
```
"""
collection: Collection
new_object: T
collection_root_objects: Set[InnerData]
class Collection(Generic[T]): class Collection(Generic[T]):
__is_collection__ = True __is_collection__ = True
@@ -27,6 +55,7 @@ class Collection(Generic[T]):
sync_on_append: Dict[str, Collection] = None, sync_on_append: Dict[str, Collection] = None,
append_object_to_attribute: Dict[str, T] = None, append_object_to_attribute: Dict[str, T] = None,
extend_object_to_attribute: Dict[str, Collection] = None, extend_object_to_attribute: Dict[str, Collection] = None,
append_callbacks: List[Callable[[AppendHookArguments], None]] = None,
) -> None: ) -> None:
self._collection_for: dict = dict() self._collection_for: dict = dict()
@@ -41,6 +70,7 @@ class Collection(Generic[T]):
self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self.sync_on_append: Dict[str, Collection] = sync_on_append or {}
self.pull_from: List[Collection] = [] self.pull_from: List[Collection] = []
self.push_to: List[Collection] = [] self.push_to: List[Collection] = []
self.append_callbacks: List[Callable[[AppendHookArguments], None]] = append_callbacks or []
# This is to cleanly unmap previously mapped items by their id # This is to cleanly unmap previously mapped items by their id
self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict) self._indexed_from_id: Dict[int, Dict[str, Any]] = defaultdict(dict)
@@ -141,6 +171,14 @@ class Collection(Generic[T]):
for attribute, new_object in self.append_object_to_attribute.items(): for attribute, new_object in self.append_object_to_attribute.items():
other.__getattribute__(attribute).append(new_object, **kwargs) other.__getattribute__(attribute).append(new_object, **kwargs)
append_hook_args = AppendHookArguments(
collection=self,
new_object=other,
collection_root_objects=self._collection_for.keys(),
)
for callback in self.append_callbacks:
callback(append_hook_args)
def append(self, other: Optional[T], **kwargs): def append(self, other: Optional[T], **kwargs):
""" """
If an object, that represents the same entity exists in a relevant collection, If an object, that represents the same entity exists in a relevant collection,

View File

@@ -37,11 +37,19 @@ class FormattedText:
@property @property
def markdown(self) -> str: def markdown(self) -> str:
return md(self.html).strip() return md(self.html).strip()
@markdown.setter
def markdown(self, value: str) -> None:
self.html = mistune.markdown(value)
@property @property
def plain(self) -> str: def plain(self) -> str:
md = self.markdown md = self.markdown
return md.replace("\n\n", "\n") return md.replace("\n\n", "\n")
@plain.setter
def plain(self, value: str) -> None:
self.html = mistune.markdown(plain_to_markdown(value))
def __str__(self) -> str: def __str__(self) -> str:
return self.markdown return self.markdown

View File

@@ -477,6 +477,8 @@ class Artist(Base):
general_genre: str general_genre: str
unformatted_location: str unformatted_location: str
artwork: List[Artwork]
source_collection: SourceCollection source_collection: SourceCollection
contact_collection: Collection[Contact] contact_collection: Collection[Contact]
@@ -493,6 +495,8 @@ class Artist(Base):
"lyrical_themes": list, "lyrical_themes": list,
"general_genre": lambda: "", "general_genre": lambda: "",
"artwork": list,
"source_collection": SourceCollection, "source_collection": SourceCollection,
"album_collection": Collection, "album_collection": Collection,
"contact_collection": Collection, "contact_collection": Collection,
@@ -511,6 +515,7 @@ class Artist(Base):
notes: FormattedText = None, notes: FormattedText = None,
lyrical_themes: List[str] = None, lyrical_themes: List[str] = None,
general_genre: str = None, general_genre: str = None,
artwork: List[Artwork] = None,
unformatted_location: str = None, unformatted_location: str = None,
source_list: List[Source] = None, source_list: List[Source] = None,
contact_list: List[Contact] = None, contact_list: List[Contact] = None,

View File

@@ -3,5 +3,6 @@ from .musify import Musify
from .youtube import YouTube from .youtube import YouTube
from .youtube_music import YoutubeMusic from .youtube_music import YoutubeMusic
from .bandcamp import Bandcamp from .bandcamp import Bandcamp
from .genius import Genius
from .abstract import Page, INDEPENDENT_DB_OBJECTS from .abstract import Page, INDEPENDENT_DB_OBJECTS

View File

@@ -0,0 +1,297 @@
from typing import List, Optional, Type
from urllib.parse import urlparse, urlunparse, urlencode
import json
from enum import Enum
from bs4 import BeautifulSoup
import pycountry
from ..objects import Source, DatabaseObject
from .abstract import Page
from ..objects import (
Artist,
Source,
SourceType,
Song,
Album,
Label,
Target,
Contact,
ID3Timestamp,
Lyrics,
FormattedText,
Artwork,
)
from ..connection import Connection
from ..utils import dump_to_file, traverse_json_path
from ..utils.enums import SourceType, ALL_SOURCE_TYPES
from ..utils.support_classes.download_result import DownloadResult
from ..utils.string_processing import clean_song_title
from ..utils.config import main_settings, logging_settings
from ..utils.shared import DEBUG
if DEBUG:
from ..utils import dump_to_file
class Genius(Page):
SOURCE_TYPE = ALL_SOURCE_TYPES.GENIUS
HOST = "genius.com"
def __init__(self, *args, **kwargs):
self.connection: Connection = Connection(
host="https://genius.com/",
logger=self.LOGGER,
module="genius",
)
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
path = source.parsed_url.path.replace("/", "")
if path.startswith("artists"):
return Artist
if path.startswith("albums"):
return Album
return Song
def add_to_artwork(self, artwork: Artwork, url: str):
if url is None:
return
url_frags = url.split(".")
if len(url_frags) < 2:
artwork.append(url=url)
return
dimensions = url_frags[-2].split("x")
if len(dimensions) < 2:
artwork.append(url=url)
return
if len(dimensions) == 3:
dimensions = dimensions[:-1]
try:
artwork.append(url=url, width=int(dimensions[0]), height=int(dimensions[1]))
except ValueError:
artwork.append(url=url)
def parse_api_object(self, data: dict) -> Optional[DatabaseObject]:
if data is None:
return None
object_type = data.get("_type")
artwork = Artwork()
self.add_to_artwork(artwork, data.get("header_image_url"))
self.add_to_artwork(artwork, data.get("image_url"))
additional_sources: List[Source] = []
source: Source = Source(self.SOURCE_TYPE, data.get("url"), additional_data={
"id": data.get("id"),
"slug": data.get("slug"),
"api_path": data.get("api_path"),
})
notes = FormattedText()
description = data.get("description") or {}
if "html" in description:
notes.html = description["html"]
elif "markdown" in description:
notes.markdown = description["markdown"]
elif "description_preview" in data:
notes.plaintext = data["description_preview"]
if source.url is None:
return None
if object_type == "artist":
if data.get("instagram_name") is not None:
additional_sources.append(Source(ALL_SOURCE_TYPES.INSTAGRAM, f"https://www.instagram.com/{data['instagram_name']}/"))
if data.get("facebook_name") is not None:
additional_sources.append(Source(ALL_SOURCE_TYPES.FACEBOOK, f"https://www.facebook.com/{data['facebook_name']}/"))
if data.get("twitter_name") is not None:
additional_sources.append(Source(ALL_SOURCE_TYPES.TWITTER, f"https://x.com/{data['twitter_name']}/"))
return Artist(
name=data["name"].strip() if data.get("name") is not None else None,
source_list=[source],
artwork=artwork,
notes=notes,
)
if object_type == "album":
self.add_to_artwork(artwork, data.get("cover_art_thumbnail_url"))
self.add_to_artwork(artwork, data.get("cover_art_url"))
for cover_art in data.get("cover_arts", []):
self.add_to_artwork(artwork, cover_art.get("image_url"))
self.add_to_artwork(artwork, cover_art.get("thumbnail_image_url"))
return Album(
title=data.get("name").strip(),
source_list=[source],
artist_list=[self.parse_api_object(data.get("artist"))],
artwork=artwork,
date=ID3Timestamp(**(data.get("release_date_components") or {})),
)
if object_type == "song":
self.add_to_artwork(artwork, data.get("song_art_image_thumbnail_url"))
self.add_to_artwork(artwork, data.get("song_art_image_url"))
main_artist_list = []
featured_artist_list = []
_artist_name = None
primary_artist = self.parse_api_object(data.get("primary_artist"))
if primary_artist is not None:
_artist_name = primary_artist.name
main_artist_list.append(primary_artist)
for feature_artist in (*(data.get("featured_artists") or []), *(data.get("producer_artists") or []), *(data.get("writer_artists") or [])):
artist = self.parse_api_object(feature_artist)
if artist is not None:
featured_artist_list.append(artist)
return Song(
title=clean_song_title(data.get("title"), artist_name=_artist_name),
source_list=[source],
artwork=artwork,
feature_artist_list=featured_artist_list,
artist_list=main_artist_list,
)
return None
def general_search(self, search_query: str, **kwargs) -> List[DatabaseObject]:
results = []
search_params = {
"q": search_query,
}
r = self.connection.get("https://genius.com/api/search/multi?" + urlencode(search_params), name=f"search_{search_query}")
if r is None:
return results
dump_to_file("search_genius.json", r.text, is_json=True, exit_after_dump=False)
data = r.json()
for elements in traverse_json_path(data, "response.sections", default=[]):
hits = elements.get("hits", [])
for hit in hits:
parsed = self.parse_api_object(hit.get("result"))
if parsed is not None:
results.append(parsed)
return results
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist: Artist = Artist()
# https://genius.com/api/artists/24527/albums?page=1
r = self.connection.get(source.url, name=source.url)
if r is None:
return artist
soup = self.get_soup_from_response(r)
# find the content attribute in the meta tag which is contained in the head
data_container = soup.find("meta", {"itemprop": "page_data"})
if data_container is not None:
content = data_container["content"]
dump_to_file("genius_itemprop_artist.json", content, is_json=True, exit_after_dump=False)
data = json.loads(content)
artist = self.parse_api_object(data.get("artist"))
for e in (data.get("artist_albums") or []):
r = self.parse_api_object(e)
if not isinstance(r, Album):
continue
artist.album_collection.append(r)
for e in (data.get("artist_songs") or []):
r = self.parse_api_object(e)
if not isinstance(r, Song):
continue
"""
TODO
fetch the album for these songs, because the api doesn't
return them
"""
artist.album_collection.extend(r.album_collection)
artist.source_collection.append(source)
return artist
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
album: Album = Album()
# https://genius.com/api/artists/24527/albums?page=1
r = self.connection.get(source.url, name=source.url)
if r is None:
return album
soup = self.get_soup_from_response(r)
# find the content attribute in the meta tag which is contained in the head
data_container = soup.find("meta", {"itemprop": "page_data"})
if data_container is not None:
content = data_container["content"]
dump_to_file("genius_itemprop_album.json", content, is_json=True, exit_after_dump=False)
data = json.loads(content)
album = self.parse_api_object(data.get("album"))
for e in data.get("album_appearances", []):
r = self.parse_api_object(e.get("song"))
if not isinstance(r, Song):
continue
album.song_collection.append(r)
album.source_collection.append(source)
return album
def get_json_content_from_response(self, response, start: str, end: str) -> Optional[str]:
content = response.text
start_index = content.find(start)
if start_index < 0:
return None
start_index += len(start)
end_index = content.find(end, start_index)
if end_index < 0:
return None
return content[start_index:end_index]
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
song: Song = Song()
r = self.connection.get(source.url, name=source.url)
if r is None:
return song
# get the contents that are between `JSON.parse('` and `');`
content = self.get_json_content_from_response(r, start="window.__PRELOADED_STATE__ = JSON.parse('", end="');\n window.__APP_CONFIG__ = ")
if content is not None:
content = content.replace("\\\\", "\\").replace('\\"', '"').replace("\\'", "'")
data = json.loads(content)
lyrics_html = traverse_json_path(data, "songPage.lyricsData.body.html", default=None)
if lyrics_html is not None:
song.lyrics_collection.append(Lyrics(FormattedText(html=lyrics_html)))
dump_to_file("genius_song_script_json.json", content, is_json=True, exit_after_dump=False)
soup = self.get_soup_from_response(r)
for lyrics in soup.find_all("div", {"data-lyrics-container": "true"}):
lyrics_object = Lyrics(FormattedText(html=lyrics.prettify()))
song.lyrics_collection.append(lyrics_object)
song.source_collection.append(source)
return song

View File

@@ -457,17 +457,17 @@ class Musify(Page):
for album_info in soup.find_all("ul", {"class": "album-info"}): for album_info in soup.find_all("ul", {"class": "album-info"}):
list_element: BeautifulSoup = album_info.find("li") list_element: BeautifulSoup = album_info.find("li")
if list_element is not None: if list_element is not None:
artist_soup: BeautifulSoup artist_soup: BeautifulSoup
for artist_soup in list_element.find_all("a"): for artist_soup in list_element.find_all("a"):
artist_source_list = [] artist_source_list = []
href = artist_soup["href"] href = artist_soup["href"]
if href is not None: if href is not None:
artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)] artist_source_list = [Source(self.SOURCE_TYPE, self.HOST + href)]
artist_list.append(Artist( artist_list.append(Artist(
name=artist_soup.text.strip(), name=artist_soup.text.strip(),
source_list=artist_source_list source_list=artist_source_list
)) ))
# breadcrums # breadcrums
breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"}) breadcrumb_list_element_list: List[BeautifulSoup] = soup.find_all("ol", {"class": "breadcrumb"})
@@ -485,7 +485,7 @@ class Musify(Page):
track_name = list_points[4].text.strip() track_name = list_points[4].text.strip()
# artwork # album artwork
artwork: Artwork = Artwork() artwork: Artwork = Artwork()
album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"}) album_image_element_list: List[BeautifulSoup] = soup.find_all("img", {"class": "album-img"})
for album_image_element in album_image_element_list: for album_image_element in album_image_element_list:
@@ -918,7 +918,8 @@ class Musify(Page):
name=name, name=name,
country=country, country=country,
source_list=source_list, source_list=source_list,
notes=notes notes=notes,
artwork=self._fetch_artist_artwork(soup, **kwargs)
) )
def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album: def _parse_album_card(self, album_card: BeautifulSoup, artist_name: str = None, **kwargs) -> Album:
@@ -1056,6 +1057,20 @@ class Musify(Page):
artist.album_collection.append(album) artist.album_collection.append(album)
def _fetch_artist_artwork(self, soup: BeautifulSoup, **kwargs):
# artist artwork
artist_artwork: List[Artwork] = Artwork()
artist_a_element_list: List[BeautifulSoup] = soup.find_all("a")
for artist_a_element in artist_a_element_list:
if artist_a_element.find_all("img", {"class": "artist-img"}).count() > 0:
artwork_gallery = self.connection.get(artist_a_element("data-src", artist_a_element.get("href")))
if artwork_gallery is not None:
gallery_image_element_list: List[BeautifulSoup] = artwork_gallery.find_all("img", {"class": "artist-img"})
for gallery_image_element in gallery_image_element_list:
artist_artwork.push(Artwork(url=gallery_image_element.get("data-src", gallery_image_element.get("src"))))
return artist_artwork
def fetch_artist(self, source: Source, **kwargs) -> Artist: def fetch_artist(self, source: Source, **kwargs) -> Artist:
""" """
TODO TODO
@@ -1068,7 +1083,7 @@ class Musify(Page):
artist = self._fetch_initial_artist(url, source=source, **kwargs) artist = self._fetch_initial_artist(url, source=source, **kwargs)
self._fetch_artist_discography(artist, url, artist.name, **kwargs) self._fetch_artist_discography(artist, url, artist.name, **kwargs)
self._fetch_artist_artwork(artist, **kwargs)
return artist return artist
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label: def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:

View File

@@ -549,6 +549,11 @@ class YoutubeMusic(SuperYouTube):
return album return album
def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str: def fetch_lyrics(self, video_id: str, playlist_id: str = None) -> str:
"""
1. fetches the tabs of a song, to get the browse id
2. finds the browse id of the lyrics
3. fetches the lyrics with the browse id
"""
request_data = { request_data = {
"context": {**self.credentials.context, "adSignalsInfo": {"params": []}}, "context": {**self.credentials.context, "adSignalsInfo": {"params": []}},
"videoId": video_id, "videoId": video_id,
@@ -575,7 +580,8 @@ class YoutubeMusic(SuperYouTube):
pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="") pageType = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseEndpointContextSupportedConfigs.browseEndpointContextMusicConfig.pageType", default="")
if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower(): if pageType in ("MUSIC_TAB_TYPE_LYRICS", "MUSIC_PAGE_TYPE_TRACK_LYRICS") or "lyrics" in pageType.lower():
browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None) browse_id = traverse_json_path(tab, "tabRenderer.endpoint.browseEndpoint.browseId", default=None)
break if browse_id is not None:
break
if browse_id is None: if browse_id is None:
return None return None
@@ -721,7 +727,6 @@ class YoutubeMusic(SuperYouTube):
self.download_values_by_url[source.url] = { self.download_values_by_url[source.url] = {
"url": _best_format.get("url"), "url": _best_format.get("url"),
"chunk_size": _best_format.get("downloader_options", {}).get("http_chunk_size", main_settings["chunk_size"]),
"headers": _best_format.get("http_headers", {}), "headers": _best_format.get("http_headers", {}),
} }

View File

@@ -15,11 +15,11 @@ __stage__ = os.getenv("STAGE", "prod")
DEBUG = (__stage__ == "dev") and True DEBUG = (__stage__ == "dev") and True
DEBUG_LOGGING = DEBUG and False DEBUG_LOGGING = DEBUG and False
DEBUG_TRACE = DEBUG and True DEBUG_TRACE = DEBUG and True
DEBUG_OBJECT_TRACE = DEBUG and True DEBUG_OBJECT_TRACE = DEBUG and False
DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False DEBUG_OBJECT_TRACE_CALLSTACK = DEBUG_OBJECT_TRACE and False
DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False
DEBUG_PAGES = DEBUG and False DEBUG_PAGES = DEBUG and False
DEBUG_DUMP = DEBUG and False DEBUG_DUMP = DEBUG and True
DEBUG_PRINT_ID = DEBUG and True DEBUG_PRINT_ID = DEBUG and True
if DEBUG: if DEBUG: