draft: no metadata function for source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Hazel 2024-04-29 17:06:31 +02:00
parent 1971982d27
commit 8f9858da60
6 changed files with 81 additions and 84 deletions

View File

@ -169,6 +169,8 @@ class Song(Base):
id3Mapping.GENRE: [self.genre], id3Mapping.GENRE: [self.genre],
id3Mapping.TRACKNUMBER: [self.tracksort_str], id3Mapping.TRACKNUMBER: [self.tracksort_str],
id3Mapping.COMMENT: [self.note.markdown], id3Mapping.COMMENT: [self.note.markdown],
id3Mapping.FILE_WEBPAGE_URL: self.source_collection.url_list,
id3Mapping.SOURCE_WEBPAGE_URL: self.source_collection.homepage_list,
}) })
# metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3 # metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3
@ -555,7 +557,8 @@ class Artist(Base):
@property @property
def metadata(self) -> Metadata: def metadata(self) -> Metadata:
metadata = Metadata({ metadata = Metadata({
id3Mapping.ARTIST: [self.name] id3Mapping.ARTIST: [self.name],
id3Mapping.ARTIST_WEBPAGE_URL: self.source_collection.url_list,
}) })
metadata.merge_many([s.get_artist_metadata() for s in self.source_collection]) metadata.merge_many([s.get_artist_metadata() for s in self.source_collection])

View File

@ -7,10 +7,9 @@ from urllib.parse import urlparse, ParseResult
from dataclasses import dataclass, field from dataclasses import dataclass, field
from functools import cached_property from functools import cached_property
from ..utils import generate_id
from ..utils.enums.source import SourcePages, SourceTypes from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.config import youtube_settings from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url from ..utils.string_processing import hash_url, shorten_display_url
from .metadata import Mapping, Metadata from .metadata import Mapping, Metadata
from .parents import OuterProxy from .parents import OuterProxy
@ -20,12 +19,11 @@ from .collection import Collection
@dataclass @dataclass
class Source: class Source:
url: str
page_enum: SourcePages page_enum: SourcePages
referrer_page: SourcePages url: str
audio_url: Optional[str] referrer_page: SourcePages = None
audio_url: Optional[str] = None
id: int = field(default_factory=generate_id)
additional_data: dict = field(default_factory=dict) additional_data: dict = field(default_factory=dict)
def __post_init__(self): def __post_init__(self):
@ -36,7 +34,7 @@ class Source:
return urlparse(self.url) return urlparse(self.url)
@classmethod @classmethod
def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]: def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]:
""" """
this shouldn't be used, unlesse you are not certain what the source is for this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient the reason is that it is more inefficient
@ -78,73 +76,78 @@ class Source:
if url.startswith("https://myspace.com"): if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
def get_song_metadata(self) -> Metadata:
return Metadata({
Mapping.FILE_WEBPAGE_URL: [self.url],
Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
})
def get_artist_metadata(self) -> Metadata:
return Metadata({
Mapping.ARTIST_WEBPAGE_URL: [self.url]
})
@property @property
def hash_url(self) -> str: def hash_url(self) -> str:
return hash_url(self.url) return hash_url(self.url)
@property @property
def metadata(self) -> Metadata: def indexing_values(self) -> list:
return self.get_song_metadata() r = [hash_url(self.url)]
if self.audio_url:
@property r.append(hash_url(self.audio_url))
def indexing_values(self) -> List[Tuple[str, object]]: return r
return [
('id', self.id),
('url', self.url),
('audio_url', self.audio_url),
]
def __str__(self):
return self.__repr__()
def __repr__(self) -> str: def __repr__(self) -> str:
return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})" return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})"
@property def __merge__(self, other: Source, override: bool = False):
def title_string(self) -> str: if self.audio_url is None:
return self.url self.audio_url = other.audio_url
self.additional_data.update(other.additional_data)
page_str = property(fget=lambda self: self.page_enum.value) page_str = property(fget=lambda self: self.page_enum.value)
type_str = property(fget=lambda self: self.type_enum.value)
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
class SourceCollection: class SourceCollection:
_indexed_sources: Dict[str, Source]
_page_to_source_list: Dict[SourcePages, List[Source]] _page_to_source_list: Dict[SourcePages, List[Source]]
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
self._page_to_source_list = defaultdict(list) self._page_to_source_list = defaultdict(list)
self._indexed_sources = {}
self.extend(data or [])
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
if not len(source_pages):
source_pages = self.source_pages
for page in source_pages: for page in source_pages:
yield from self._page_to_source_list[page] yield from self._page_to_source_list[page]
def append(self, source: Source): def append(self, source: Source):
pass existing_source = None
for key in source.indexing_values:
if key in self._indexed_sources:
existing_source = self._indexed_sources[key]
break
if existing_source is not None:
existing_source.__merge__(source)
source = existing_source
for key in source.indexing_values:
self._indexed_sources[key] = source
self._page_to_source_list[source.page_enum].append(source)
def extend(self, sources: Iterable[Source]): def extend(self, sources: Iterable[Source]):
for source in sources: for source in sources:
self.append(source) self.append(source)
def __iter__(self):
yield from self.get_sources()
def __merge__(self, other: SourceCollection, override: bool = False):
self.extend(other)
@property @property
def source_pages(self) -> Set[SourcePages]: def source_pages(self) -> Iterable[SourcePages]:
return set(source.page_enum for source in self._data) return sorted(self._page_to_source_list.keys(), key=lambda page: page.value)
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]: @property
""" def url_list(self) -> List[str]:
getting the sources for a specific page like return [source.url for source in self.get_sources(SourcePages.ARTIST)]
YouTube or musify
""" @property
return self._page_to_source_list[source_page].copy() def homepage_list(self) -> List[str]:
return [source.homepage for source in self.source_pages]

View File

@ -246,7 +246,7 @@ class Page:
# only certain database objects, have a source list # only certain database objects, have a source list
if isinstance(music_object, INDEPENDENT_DB_OBJECTS): if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
source: Source source: Source
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE): for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
if music_object.already_fetched_from(source.hash_url): if music_object.already_fetched_from(source.hash_url):
continue continue
@ -419,7 +419,7 @@ class Page:
if song.target_collection.empty: if song.target_collection.empty:
song.target_collection.append(new_target) song.target_collection.append(new_target)
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE) sources = song.source_collection.get_sources(self.SOURCE_TYPE)
if len(sources) == 0: if len(sources) == 0:
return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.") return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.")

View File

@ -693,7 +693,7 @@ class Musify(Page):
if stop_at_level > 1: if stop_at_level > 1:
song: Song song: Song
for song in album.song_collection: for song in album.song_collection:
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE) sources = song.source_collection.get_sources(self.SOURCE_TYPE)
for source in sources: for source in sources:
song.merge(self.fetch_song(source=source)) song.merge(self.fetch_song(source=source))

View File

@ -9,42 +9,32 @@ class SourceTypes(Enum):
class SourcePages(Enum): class SourcePages(Enum):
YOUTUBE = "youtube" YOUTUBE = "youtube", "https://www.youtube.com/"
MUSIFY = "musify" MUSIFY = "musify", "https://musify.club/"
YOUTUBE_MUSIC = "youtube music" YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/"
GENIUS = "genius" GENIUS = "genius", "https://genius.com/"
MUSICBRAINZ = "musicbrainz" MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/"
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum" ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
BANDCAMP = "bandcamp" BANDCAMP = "bandcamp", "https://bandcamp.com/"
DEEZER = "deezer" DEEZER = "deezer", "https://www.deezer.com/"
SPOTIFY = "spotify" SPOTIFY = "spotify", "https://open.spotify.com/"
# This has nothing to do with audio, but bands can be here # This has nothing to do with audio, but bands can be here
WIKIPEDIA = "wikipedia" WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page"
INSTAGRAM = "instagram" INSTAGRAM = "instagram", "https://www.instagram.com/"
FACEBOOK = "facebook" FACEBOOK = "facebook", "https://www.facebook.com/"
TWITTER = "twitter" # I will use nitter though lol TWITTER = "twitter", "https://twitter.com/"
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE
MANUAL = "manual" MANUAL = "manual", ""
PRESET = "preset" PRESET = "preset", ""
@classmethod def __new__(cls, value, homepage = None):
def get_homepage(cls, attribute) -> str: member = object.__new__(cls)
homepage_map = {
cls.YOUTUBE: "https://www.youtube.com/", member._value_ = value
cls.MUSIFY: "https://musify.club/", member.homepage = homepage
cls.MUSICBRAINZ: "https://musicbrainz.org/",
cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/", return member
cls.GENIUS: "https://genius.com/",
cls.BANDCAMP: "https://bandcamp.com/",
cls.DEEZER: "https://www.deezer.com/",
cls.INSTAGRAM: "https://www.instagram.com/",
cls.FACEBOOK: "https://www.facebook.com/",
cls.SPOTIFY: "https://open.spotify.com/",
cls.TWITTER: "https://twitter.com/",
cls.MYSPACE: "https://myspace.com/",
cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page"
}
return homepage_map[attribute]

View File

@ -134,6 +134,7 @@ def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str:
to_unify = to_unify.replace(char, unify_to) to_unify = to_unify.replace(char, unify_to)
return to_unify return to_unify
@lru_cache(maxsize=128)
def hash_url(url: Union[str, ParseResult]) -> str: def hash_url(url: Union[str, ParseResult]) -> str:
if isinstance(url, str): if isinstance(url, str):
url = urlparse(url) url = urlparse(url)