from __future__ import annotations from collections import defaultdict from enum import Enum from typing import List, Dict, Set, Tuple, Optional, Iterable, Generator from urllib.parse import urlparse, ParseResult from dataclasses import dataclass, field from functools import cached_property from ..utils import generate_id from ..utils.enums.source import SourcePages, SourceTypes from ..utils.config import youtube_settings from ..utils.string_processing import hash_url from .metadata import Mapping, Metadata from .parents import OuterProxy from .collection import Collection @dataclass class Source: url: str page_enum: SourcePages referrer_page: SourcePages audio_url: Optional[str] id: int = field(default_factory=generate_id) additional_data: dict = field(default_factory=dict) def __post_init__(self): self.referrer_page = self.referrer_page or self.page_enum @cached_property def parsed_url(self) -> ParseResult: return urlparse(self.url) @classmethod def match_url(cls, url: str, referrer_page: SourcePages) -> Optional["Source"]: """ this shouldn't be used, unlesse you are not certain what the source is for the reason is that it is more inefficient """ parsed = urlparse(url) url = parsed.geturl() if "musify" in parsed.netloc: return cls(SourcePages.MUSIFY, url, referrer_page=referrer_page) if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]: return cls(SourcePages.YOUTUBE, url, referrer_page=referrer_page) if url.startswith("https://www.deezer"): return cls(SourcePages.DEEZER, url, referrer_page=referrer_page) if url.startswith("https://open.spotify.com"): return cls(SourcePages.SPOTIFY, url, referrer_page=referrer_page) if "bandcamp" in url: return cls(SourcePages.BANDCAMP, url, referrer_page=referrer_page) if "wikipedia" in parsed.netloc: return cls(SourcePages.WIKIPEDIA, url, referrer_page=referrer_page) if url.startswith("https://www.metal-archives.com/"): return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page) # the less important once if url.startswith("https://www.facebook"): return cls(SourcePages.FACEBOOK, url, referrer_page=referrer_page) if url.startswith("https://www.instagram"): return cls(SourcePages.INSTAGRAM, url, referrer_page=referrer_page) if url.startswith("https://twitter"): return cls(SourcePages.TWITTER, url, referrer_page=referrer_page) if url.startswith("https://myspace.com"): return cls(SourcePages.MYSPACE, url, referrer_page=referrer_page) def get_song_metadata(self) -> Metadata: return Metadata({ Mapping.FILE_WEBPAGE_URL: [self.url], Mapping.SOURCE_WEBPAGE_URL: [self.homepage] }) def get_artist_metadata(self) -> Metadata: return Metadata({ Mapping.ARTIST_WEBPAGE_URL: [self.url] }) @property def hash_url(self) -> str: return hash_url(self.url) @property def metadata(self) -> Metadata: return self.get_song_metadata() @property def indexing_values(self) -> List[Tuple[str, object]]: return [ ('id', self.id), ('url', self.url), ('audio_url', self.audio_url), ] def __str__(self): return self.__repr__() def __repr__(self) -> str: return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})" @property def title_string(self) -> str: return self.url page_str = property(fget=lambda self: self.page_enum.value) type_str = property(fget=lambda self: self.type_enum.value) homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum)) class SourceCollection: _page_to_source_list: Dict[SourcePages, List[Source]] def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs): self._page_to_source_list = defaultdict(list) def get_sources(self, *source_pages: List[Source]) -> Generator[Source]: for page in source_pages: yield from self._page_to_source_list[page] def append(self, source: Source): pass def extend(self, sources: Iterable[Source]): for source in sources: self.append(source) @property def source_pages(self) -> Set[SourcePages]: return set(source.page_enum for source in self._data) def get_sources_from_page(self, source_page: SourcePages) -> List[Source]: """ getting the sources for a specific page like YouTube or musify """ return self._page_to_source_list[source_page].copy()