2023-03-09 21:14:39 +00:00
|
|
|
from collections import defaultdict
|
2023-01-12 15:25:50 +00:00
|
|
|
from enum import Enum
|
2023-12-29 14:43:33 +00:00
|
|
|
from typing import List, Dict, Set, Tuple, Optional, Iterable
|
2023-03-18 12:01:27 +00:00
|
|
|
from urllib.parse import urlparse
|
2023-01-12 15:25:50 +00:00
|
|
|
|
2023-04-18 09:18:17 +00:00
|
|
|
from ..utils.enums.source import SourcePages, SourceTypes
|
2023-09-10 14:27:09 +00:00
|
|
|
from ..utils.config import youtube_settings
|
2024-04-10 14:39:46 +00:00
|
|
|
from ..utils.string_processing import hash_url
|
2023-09-10 14:27:09 +00:00
|
|
|
|
2023-03-10 08:09:35 +00:00
|
|
|
from .metadata import Mapping, Metadata
|
2023-12-19 21:11:46 +00:00
|
|
|
from .parents import OuterProxy
|
2023-03-09 21:14:39 +00:00
|
|
|
from .collection import Collection
|
2023-01-12 15:25:50 +00:00
|
|
|
|
2023-01-20 22:05:15 +00:00
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
class Source(OuterProxy):
|
2023-12-29 20:16:09 +00:00
|
|
|
url: str
|
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
page_enum: SourcePages
|
|
|
|
referer_page: SourcePages
|
|
|
|
|
|
|
|
audio_url: str
|
|
|
|
|
2023-12-20 08:55:09 +00:00
|
|
|
_default_factories = {
|
2023-12-29 20:16:09 +00:00
|
|
|
"audio_url": lambda: None,
|
2023-03-18 11:36:53 +00:00
|
|
|
}
|
2023-01-12 15:25:50 +00:00
|
|
|
|
2023-12-29 20:16:09 +00:00
|
|
|
# This is automatically generated
|
2023-12-29 20:50:40 +00:00
|
|
|
def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,
|
2023-12-29 20:16:09 +00:00
|
|
|
**kwargs) -> None:
|
|
|
|
|
2023-12-19 21:11:46 +00:00
|
|
|
if referer_page is None:
|
|
|
|
referer_page = page_enum
|
|
|
|
|
2023-12-29 20:16:09 +00:00
|
|
|
super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)
|
2023-01-12 15:25:50 +00:00
|
|
|
|
2023-02-01 08:10:05 +00:00
|
|
|
@classmethod
|
2023-04-18 11:35:00 +00:00
|
|
|
def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:
|
2023-02-01 08:10:05 +00:00
|
|
|
"""
|
|
|
|
this shouldn't be used, unlesse you are not certain what the source is for
|
|
|
|
the reason is that it is more inefficient
|
|
|
|
"""
|
2023-03-18 12:01:27 +00:00
|
|
|
parsed = urlparse(url)
|
|
|
|
url = parsed.geturl()
|
2023-03-30 10:31:37 +00:00
|
|
|
|
|
|
|
if "musify" in parsed.netloc:
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.MUSIFY, url, referer_page=referer_page)
|
2023-03-18 12:01:27 +00:00
|
|
|
|
2023-09-10 14:27:09 +00:00
|
|
|
if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
|
|
|
if url.startswith("https://www.deezer"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.DEEZER, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
|
|
|
if url.startswith("https://open.spotify.com"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
|
|
|
if "bandcamp" in url:
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
2023-03-18 16:06:12 +00:00
|
|
|
if "wikipedia" in parsed.netloc:
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)
|
2023-03-18 16:06:12 +00:00
|
|
|
|
2023-02-01 08:10:05 +00:00
|
|
|
if url.startswith("https://www.metal-archives.com/"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
|
|
|
# the less important once
|
|
|
|
if url.startswith("https://www.facebook"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
|
|
|
if url.startswith("https://www.instagram"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
|
|
|
if url.startswith("https://twitter"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.TWITTER, url, referer_page=referer_page)
|
2023-02-01 08:10:05 +00:00
|
|
|
|
2023-03-18 12:01:27 +00:00
|
|
|
if url.startswith("https://myspace.com"):
|
2023-04-18 11:35:00 +00:00
|
|
|
return cls(SourcePages.MYSPACE, url, referer_page=referer_page)
|
2023-03-18 12:01:27 +00:00
|
|
|
|
2023-03-10 09:13:35 +00:00
|
|
|
def get_song_metadata(self) -> Metadata:
|
|
|
|
return Metadata({
|
2023-01-30 13:41:02 +00:00
|
|
|
Mapping.FILE_WEBPAGE_URL: [self.url],
|
|
|
|
Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
|
|
|
|
})
|
|
|
|
|
2023-03-10 09:13:35 +00:00
|
|
|
def get_artist_metadata(self) -> Metadata:
|
|
|
|
return Metadata({
|
2023-01-30 13:41:02 +00:00
|
|
|
Mapping.ARTIST_WEBPAGE_URL: [self.url]
|
|
|
|
})
|
|
|
|
|
2024-04-10 09:20:49 +00:00
|
|
|
@property
|
|
|
|
def hash_url(self) -> str:
|
2024-04-10 14:39:46 +00:00
|
|
|
return hash_url(self.url)
|
2024-04-10 09:20:49 +00:00
|
|
|
|
2023-03-10 08:09:35 +00:00
|
|
|
@property
|
|
|
|
def metadata(self) -> Metadata:
|
2023-04-18 13:31:41 +00:00
|
|
|
return self.get_song_metadata()
|
2023-01-12 16:14:21 +00:00
|
|
|
|
2023-03-09 18:53:28 +00:00
|
|
|
@property
|
|
|
|
def indexing_values(self) -> List[Tuple[str, object]]:
|
|
|
|
return [
|
|
|
|
('id', self.id),
|
2023-04-18 13:24:39 +00:00
|
|
|
('url', self.url),
|
|
|
|
('audio_url', self.audio_url),
|
2023-03-09 18:53:28 +00:00
|
|
|
]
|
|
|
|
|
2023-01-12 15:25:50 +00:00
|
|
|
def __str__(self):
|
2023-02-01 08:10:05 +00:00
|
|
|
return self.__repr__()
|
2023-01-12 15:25:50 +00:00
|
|
|
|
2023-01-30 13:41:02 +00:00
|
|
|
def __repr__(self) -> str:
|
2023-04-18 13:24:39 +00:00
|
|
|
return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"
|
2023-01-30 13:41:02 +00:00
|
|
|
|
2023-01-20 22:05:15 +00:00
|
|
|
page_str = property(fget=lambda self: self.page_enum.value)
|
2023-01-20 09:56:40 +00:00
|
|
|
type_str = property(fget=lambda self: self.type_enum.value)
|
2023-01-20 22:05:15 +00:00
|
|
|
homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))
|
2023-01-25 13:14:15 +00:00
|
|
|
|
|
|
|
|
2023-03-09 21:14:39 +00:00
|
|
|
class SourceCollection(Collection):
|
2023-12-29 14:43:33 +00:00
|
|
|
def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
|
2023-03-09 21:14:39 +00:00
|
|
|
self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)
|
|
|
|
|
2023-12-29 14:43:33 +00:00
|
|
|
super().__init__(data=data, **kwargs)
|
2023-03-10 09:13:35 +00:00
|
|
|
|
2023-12-29 14:43:33 +00:00
|
|
|
def _map_element(self, __object: Source, **kwargs):
|
|
|
|
super()._map_element(__object, **kwargs)
|
2023-03-09 21:14:39 +00:00
|
|
|
|
2023-12-29 14:43:33 +00:00
|
|
|
self._page_to_source_list[__object.page_enum].append(__object)
|
2023-05-26 09:41:20 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def source_pages(self) -> Set[SourcePages]:
|
|
|
|
return set(source.page_enum for source in self._data)
|
2023-03-09 21:14:39 +00:00
|
|
|
|
|
|
|
def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
|
|
|
|
"""
|
|
|
|
getting the sources for a specific page like
|
|
|
|
YouTube or musify
|
|
|
|
"""
|
2023-03-18 12:01:27 +00:00
|
|
|
return self._page_to_source_list[source_page].copy()
|