draft: no metadata function for source
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
1971982d27
commit
8f9858da60
@ -169,6 +169,8 @@ class Song(Base):
|
|||||||
id3Mapping.GENRE: [self.genre],
|
id3Mapping.GENRE: [self.genre],
|
||||||
id3Mapping.TRACKNUMBER: [self.tracksort_str],
|
id3Mapping.TRACKNUMBER: [self.tracksort_str],
|
||||||
id3Mapping.COMMENT: [self.note.markdown],
|
id3Mapping.COMMENT: [self.note.markdown],
|
||||||
|
id3Mapping.FILE_WEBPAGE_URL: self.source_collection.url_list,
|
||||||
|
id3Mapping.SOURCE_WEBPAGE_URL: self.source_collection.homepage_list,
|
||||||
})
|
})
|
||||||
|
|
||||||
# metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3
|
# metadata.merge_many([s.get_song_metadata() for s in self.source_collection]) album sources have no relevant metadata for id3
|
||||||
@ -555,7 +557,8 @@ class Artist(Base):
|
|||||||
@property
|
@property
|
||||||
def metadata(self) -> Metadata:
|
def metadata(self) -> Metadata:
|
||||||
metadata = Metadata({
|
metadata = Metadata({
|
||||||
id3Mapping.ARTIST: [self.name]
|
id3Mapping.ARTIST: [self.name],
|
||||||
|
id3Mapping.ARTIST_WEBPAGE_URL: self.source_collection.url_list,
|
||||||
})
|
})
|
||||||
metadata.merge_many([s.get_artist_metadata() for s in self.source_collection])
|
metadata.merge_many([s.get_artist_metadata() for s in self.source_collection])
|
||||||
|
|
||||||
|
@ -7,10 +7,9 @@ from urllib.parse import urlparse, ParseResult
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
|
|
||||||
from ..utils import generate_id
|
|
||||||
from ..utils.enums.source import SourcePages, SourceTypes
|
from ..utils.enums.source import SourcePages, SourceTypes
|
||||||
from ..utils.config import youtube_settings
|
from ..utils.config import youtube_settings
|
||||||
from ..utils.string_processing import hash_url
|
from ..utils.string_processing import hash_url, shorten_display_url
|
||||||
|
|
||||||
from .metadata import Mapping, Metadata
|
from .metadata import Mapping, Metadata
|
||||||
from .parents import OuterProxy
|
from .parents import OuterProxy
|
||||||
@ -20,12 +19,11 @@ from .collection import Collection
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Source:
|
class Source:
|
||||||
url: str
|
|
||||||
page_enum: SourcePages
|
page_enum: SourcePages
|
||||||
referrer_page: SourcePages
|
url: str
|
||||||
audio_url: Optional[str]
|
referrer_page: SourcePages = None
|
||||||
|
audio_url: Optional[str] = None
|
||||||
|
|
||||||
id: int = field(default_factory=generate_id)
|
|
||||||
additional_data: dict = field(default_factory=dict)
|
additional_data: dict = field(default_factory=dict)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
@ -36,7 +34,7 @@ class Source:
|
|||||||
return urlparse(self.url)
|
return urlparse(self.url)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]:
|
def match_url(cls, url: str, referrer_page: SourcePages) -> Optional[Source]:
|
||||||
"""
|
"""
|
||||||
this shouldn't be used, unlesse you are not certain what the source is for
|
this shouldn't be used, unlesse you are not certain what the source is for
|
||||||
the reason is that it is more inefficient
|
the reason is that it is more inefficient
|
||||||
@ -78,73 +76,78 @@ class Source:
|
|||||||
if url.startswith("https://myspace.com"):
|
if url.startswith("https://myspace.com"):
|
||||||
return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
|
return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
|
||||||
|
|
||||||
def get_song_metadata(self) -> Metadata:
|
|
||||||
return Metadata({
|
|
||||||
Mapping.FILE_WEBPAGE_URL: [self.url],
|
|
||||||
Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
|
|
||||||
})
|
|
||||||
|
|
||||||
def get_artist_metadata(self) -> Metadata:
|
|
||||||
return Metadata({
|
|
||||||
Mapping.ARTIST_WEBPAGE_URL: [self.url]
|
|
||||||
})
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hash_url(self) -> str:
|
def hash_url(self) -> str:
|
||||||
return hash_url(self.url)
|
return hash_url(self.url)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def metadata(self) -> Metadata:
|
def indexing_values(self) -> list:
|
||||||
return self.get_song_metadata()
|
r = [hash_url(self.url)]
|
||||||
|
if self.audio_url:
|
||||||
@property
|
r.append(hash_url(self.audio_url))
|
||||||
def indexing_values(self) -> List[Tuple[str, object]]:
|
return r
|
||||||
return [
|
|
||||||
('id', self.id),
|
|
||||||
('url', self.url),
|
|
||||||
('audio_url', self.audio_url),
|
|
||||||
]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.__repr__()
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"
|
return f"Src({self.page_enum.value}: {shorten_display_url(self.url)})"
|
||||||
|
|
||||||
@property
|
def __merge__(self, other: Source, override: bool = False):
|
||||||
def title_string(self) -> str:
|
if self.audio_url is None:
|
||||||
return self.url
|
self.audio_url = other.audio_url
|
||||||
|
self.additional_data.update(other.additional_data)
|
||||||
|
|
||||||
page_str = property(fget=lambda self: self.page_enum.value)
|
page_str = property(fget=lambda self: self.page_enum.value)
|
||||||
type_str = property(fget=lambda self: self.type_enum.value)
|
|
||||||
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
|
|
||||||
|
|
||||||
|
|
||||||
class SourceCollection:
|
class SourceCollection:
|
||||||
|
_indexed_sources: Dict[str, Source]
|
||||||
_page_to_source_list: Dict[SourcePages, List[Source]]
|
_page_to_source_list: Dict[SourcePages, List[Source]]
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
|
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
|
||||||
self._page_to_source_list = defaultdict(list)
|
self._page_to_source_list = defaultdict(list)
|
||||||
|
self._indexed_sources = {}
|
||||||
|
|
||||||
|
self.extend(data or [])
|
||||||
|
|
||||||
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
|
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
|
||||||
|
if not len(source_pages):
|
||||||
|
source_pages = self.source_pages
|
||||||
|
|
||||||
for page in source_pages:
|
for page in source_pages:
|
||||||
yield from self._page_to_source_list[page]
|
yield from self._page_to_source_list[page]
|
||||||
|
|
||||||
def append(self, source: Source):
|
def append(self, source: Source):
|
||||||
pass
|
existing_source = None
|
||||||
|
for key in source.indexing_values:
|
||||||
|
if key in self._indexed_sources:
|
||||||
|
existing_source = self._indexed_sources[key]
|
||||||
|
break
|
||||||
|
|
||||||
|
if existing_source is not None:
|
||||||
|
existing_source.__merge__(source)
|
||||||
|
source = existing_source
|
||||||
|
|
||||||
|
for key in source.indexing_values:
|
||||||
|
self._indexed_sources[key] = source
|
||||||
|
self._page_to_source_list[source.page_enum].append(source)
|
||||||
|
|
||||||
def extend(self, sources: Iterable[Source]):
|
def extend(self, sources: Iterable[Source]):
|
||||||
for source in sources:
|
for source in sources:
|
||||||
self.append(source)
|
self.append(source)
|
||||||
|
|
||||||
@property
|
def __iter__(self):
|
||||||
def source_pages(self) -> Set[SourcePages]:
|
yield from self.get_sources()
|
||||||
return set(source.page_enum for source in self._data)
|
|
||||||
|
|
||||||
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
|
def __merge__(self, other: SourceCollection, override: bool = False):
|
||||||
"""
|
self.extend(other)
|
||||||
getting the sources for a specific page like
|
|
||||||
YouTube or musify
|
@property
|
||||||
"""
|
def source_pages(self) -> Iterable[SourcePages]:
|
||||||
return self._page_to_source_list[source_page].copy()
|
return sorted(self._page_to_source_list.keys(), key=lambda page: page.value)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url_list(self) -> List[str]:
|
||||||
|
return [source.url for source in self.get_sources(SourcePages.ARTIST)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def homepage_list(self) -> List[str]:
|
||||||
|
return [source.homepage for source in self.source_pages]
|
@ -246,7 +246,7 @@ class Page:
|
|||||||
# only certain database objects, have a source list
|
# only certain database objects, have a source list
|
||||||
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
if isinstance(music_object, INDEPENDENT_DB_OBJECTS):
|
||||||
source: Source
|
source: Source
|
||||||
for source in music_object.source_collection.get_sources_from_page(self.SOURCE_TYPE):
|
for source in music_object.source_collection.get_sources(self.SOURCE_TYPE):
|
||||||
if music_object.already_fetched_from(source.hash_url):
|
if music_object.already_fetched_from(source.hash_url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -419,7 +419,7 @@ class Page:
|
|||||||
if song.target_collection.empty:
|
if song.target_collection.empty:
|
||||||
song.target_collection.append(new_target)
|
song.target_collection.append(new_target)
|
||||||
|
|
||||||
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE)
|
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
|
||||||
if len(sources) == 0:
|
if len(sources) == 0:
|
||||||
return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.")
|
return DownloadResult(error_message=f"No source found for {song.title} as {self.__class__.__name__}.")
|
||||||
|
|
||||||
|
@ -693,7 +693,7 @@ class Musify(Page):
|
|||||||
if stop_at_level > 1:
|
if stop_at_level > 1:
|
||||||
song: Song
|
song: Song
|
||||||
for song in album.song_collection:
|
for song in album.song_collection:
|
||||||
sources = song.source_collection.get_sources_from_page(self.SOURCE_TYPE)
|
sources = song.source_collection.get_sources(self.SOURCE_TYPE)
|
||||||
for source in sources:
|
for source in sources:
|
||||||
song.merge(self.fetch_song(source=source))
|
song.merge(self.fetch_song(source=source))
|
||||||
|
|
||||||
|
@ -9,42 +9,32 @@ class SourceTypes(Enum):
|
|||||||
|
|
||||||
|
|
||||||
class SourcePages(Enum):
|
class SourcePages(Enum):
|
||||||
YOUTUBE = "youtube"
|
YOUTUBE = "youtube", "https://www.youtube.com/"
|
||||||
MUSIFY = "musify"
|
MUSIFY = "musify", "https://musify.club/"
|
||||||
YOUTUBE_MUSIC = "youtube music"
|
YOUTUBE_MUSIC = "youtube music", "https://music.youtube.com/"
|
||||||
GENIUS = "genius"
|
GENIUS = "genius", "https://genius.com/"
|
||||||
MUSICBRAINZ = "musicbrainz"
|
MUSICBRAINZ = "musicbrainz", "https://musicbrainz.org/"
|
||||||
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
|
ENCYCLOPAEDIA_METALLUM = "encyclopaedia metallum"
|
||||||
BANDCAMP = "bandcamp"
|
BANDCAMP = "bandcamp", "https://bandcamp.com/"
|
||||||
DEEZER = "deezer"
|
DEEZER = "deezer", "https://www.deezer.com/"
|
||||||
SPOTIFY = "spotify"
|
SPOTIFY = "spotify", "https://open.spotify.com/"
|
||||||
|
|
||||||
# This has nothing to do with audio, but bands can be here
|
# This has nothing to do with audio, but bands can be here
|
||||||
WIKIPEDIA = "wikipedia"
|
WIKIPEDIA = "wikipedia", "https://en.wikipedia.org/wiki/Main_Page"
|
||||||
INSTAGRAM = "instagram"
|
INSTAGRAM = "instagram", "https://www.instagram.com/"
|
||||||
FACEBOOK = "facebook"
|
FACEBOOK = "facebook", "https://www.facebook.com/"
|
||||||
TWITTER = "twitter" # I will use nitter though lol
|
TWITTER = "twitter", "https://twitter.com/"
|
||||||
MYSPACE = "myspace" # Yes somehow this ancient site is linked EVERYWHERE
|
MYSPACE = "myspace", "https://myspace.com/" # Yes somehow this ancient site is linked EVERYWHERE
|
||||||
|
|
||||||
MANUAL = "manual"
|
MANUAL = "manual", ""
|
||||||
|
|
||||||
PRESET = "preset"
|
PRESET = "preset", ""
|
||||||
|
|
||||||
|
def __new__(cls, value, homepage = None):
|
||||||
|
member = object.__new__(cls)
|
||||||
|
|
||||||
|
member._value_ = value
|
||||||
|
member.homepage = homepage
|
||||||
|
|
||||||
|
return member
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_homepage(cls, attribute) -> str:
|
|
||||||
homepage_map = {
|
|
||||||
cls.YOUTUBE: "https://www.youtube.com/",
|
|
||||||
cls.MUSIFY: "https://musify.club/",
|
|
||||||
cls.MUSICBRAINZ: "https://musicbrainz.org/",
|
|
||||||
cls.ENCYCLOPAEDIA_METALLUM: "https://www.metal-archives.com/",
|
|
||||||
cls.GENIUS: "https://genius.com/",
|
|
||||||
cls.BANDCAMP: "https://bandcamp.com/",
|
|
||||||
cls.DEEZER: "https://www.deezer.com/",
|
|
||||||
cls.INSTAGRAM: "https://www.instagram.com/",
|
|
||||||
cls.FACEBOOK: "https://www.facebook.com/",
|
|
||||||
cls.SPOTIFY: "https://open.spotify.com/",
|
|
||||||
cls.TWITTER: "https://twitter.com/",
|
|
||||||
cls.MYSPACE: "https://myspace.com/",
|
|
||||||
cls.WIKIPEDIA: "https://en.wikipedia.org/wiki/Main_Page"
|
|
||||||
}
|
|
||||||
return homepage_map[attribute]
|
|
@ -134,6 +134,7 @@ def unify_punctuation(to_unify: str, unify_to: str = UNIFY_TO) -> str:
|
|||||||
to_unify = to_unify.replace(char, unify_to)
|
to_unify = to_unify.replace(char, unify_to)
|
||||||
return to_unify
|
return to_unify
|
||||||
|
|
||||||
|
@lru_cache(maxsize=128)
|
||||||
def hash_url(url: Union[str, ParseResult]) -> str:
|
def hash_url(url: Union[str, ParseResult]) -> str:
|
||||||
if isinstance(url, str):
|
if isinstance(url, str):
|
||||||
url = urlparse(url)
|
url = urlparse(url)
|
||||||
|
Loading…
Reference in New Issue
Block a user