music-kraken-core/music_kraken/objects/source.py

151 lines
4.8 KiB
Python
Raw Normal View History

from __future__ import annotations
2023-03-09 21:14:39 +00:00
from collections import defaultdict
2023-01-12 15:25:50 +00:00
from enum import Enum
2024-04-29 12:40:49 +00:00
from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator
from urllib.parse import urlparse, ParseResult
from dataclasses import dataclass, field
from functools import cached_property
2023-01-12 15:25:50 +00:00
2024-04-29 12:40:49 +00:00
from ..utils import generate_id
2023-04-18 09:18:17 +00:00
from ..utils.enums.source import SourcePages, SourceTypes
2023-09-10 14:27:09 +00:00
from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url
2023-09-10 14:27:09 +00:00
2023-03-10 08:09:35 +00:00
from .metadata import Mapping, Metadata
2023-12-19 21:11:46 +00:00
from .parents import OuterProxy
2023-03-09 21:14:39 +00:00
from .collection import Collection
2023-01-12 15:25:50 +00:00
2023-01-20 22:05:15 +00:00
@dataclass
2024-04-29 12:40:49 +00:00
class Source:
2023-12-29 20:16:09 +00:00
url: str
2023-12-19 21:11:46 +00:00
page_enum: SourcePages
referrer_page: SourcePages
2024-04-29 12:40:49 +00:00
audio_url: Optional[str]
2023-12-19 21:11:46 +00:00
2024-04-29 12:40:49 +00:00
id: int = field(default_factory=generate_id)
additional_data: dict = field(default_factory=dict)
2023-12-19 21:11:46 +00:00
2024-04-29 12:40:49 +00:00
def __post_init__(self):
self.referrer_page = self.referrer_page or self.page_enum
@cached_property
def parsed_url(self) -> ParseResult:
return urlparse(self.url)
2023-01-12 15:25:50 +00:00
@classmethod
def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]:
"""
this shouldn't be used, unlesse you are not certain what the source is for
the reason is that it is more inefficient
"""
parsed = urlparse(url)
url = parsed.geturl()
2023-03-30 10:31:37 +00:00
if "musify" in parsed.netloc:
return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page)
2023-09-10 14:27:09 +00:00
if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page)
if url.startswith("https://www.deezer"):
return cls(SourcePages.DEEZER, url, referrer_page=referrer_page)
if url.startswith("https://open.spotify.com"):
return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page)
if "bandcamp" in url:
return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page)
2023-03-18 16:06:12 +00:00
if "wikipedia" in parsed.netloc:
return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page)
2023-03-18 16:06:12 +00:00
if url.startswith("https://www.metal-archives.com/"):
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
# the less important once
if url.startswith("https://www.facebook"):
return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page)
if url.startswith("https://www.instagram"):
return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page)
if url.startswith("https://twitter"):
return cls(SourcePages.TWITTER, url, referrer_page=referrer_page)
if url.startswith("https://myspace.com"):
return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page)
2023-03-10 09:13:35 +00:00
def get_song_metadata(self) -> Metadata:
return Metadata({
2023-01-30 13:41:02 +00:00
Mapping.FILE_WEBPAGE_URL: [self.url],
Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
})
2023-03-10 09:13:35 +00:00
def get_artist_metadata(self) -> Metadata:
return Metadata({
2023-01-30 13:41:02 +00:00
Mapping.ARTIST_WEBPAGE_URL: [self.url]
})
@property
def hash_url(self) -> str:
return hash_url(self.url)
2023-03-10 08:09:35 +00:00
@property
def metadata(self) -> Metadata:
return self.get_song_metadata()
2023-01-12 16:14:21 +00:00
@property
def indexing_values(self) -> List[Tuple[str, object]]:
return [
('id', self.id),
('url', self.url),
('audio_url', self.audio_url),
]
2024-04-18 13:30:04 +00:00
2023-01-12 15:25:50 +00:00
def __str__(self):
return self.__repr__()
2023-01-12 15:25:50 +00:00
2023-01-30 13:41:02 +00:00
def __repr__(self) -> str:
return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"
2023-01-30 13:41:02 +00:00
@property
def title_string(self) -> str:
return self.url
2023-01-20 22:05:15 +00:00
page_str = property(fget=lambda self: self.page_enum.value)
2023-01-20 09:56:40 +00:00
type_str = property(fget=lambda self: self.type_enum.value)
2023-01-20 22:05:15 +00:00
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
2023-01-25 13:14:15 +00:00
2024-04-29 12:40:49 +00:00
class SourceCollection:
_page_to_source_list: Dict[SourcePages, List[Source]]
2023-12-29 14:43:33 +00:00
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
2024-04-29 12:40:49 +00:00
self._page_to_source_list = defaultdict(list)
2023-03-09 21:14:39 +00:00
2024-04-29 12:40:49 +00:00
def get_sources(self, *source_pages: List[Source]) -> Generator[Source]:
for page in source_pages:
yield from self._page_to_source_list[page]
2023-03-10 09:13:35 +00:00
2024-04-29 12:40:49 +00:00
def append(self, source: Source):
pass
2023-03-09 21:14:39 +00:00
2024-04-29 12:40:49 +00:00
def extend(self, sources: Iterable[Source]):
for source in sources:
self.append(source)
@property
def source_pages(self) -> Set[SourcePages]:
return set(source.page_enum for source in self._data)
2023-03-09 21:14:39 +00:00
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
"""
getting the sources for a specific page like
YouTube or musify
"""
return self._page_to_source_list[source_page].copy()