music-kraken-core/music_kraken/objects/source.py

from collections import defaultdict
from enum import Enum
from typing import List, Dict, Set, Tuple, Optional, Iterable
from urllib.parse import urlparse

from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url

from .metadata import Mapping, Metadata
from .parents import OuterProxy
from .collection import Collection


class Source(OuterProxy):
    url: str

    page_enum: SourcePages
    referer_page: SourcePages

    audio_url: str

    _default_factories = {
        "audio_url": lambda: None,
    }

    # This is automatically generated
    def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,
                 **kwargs) -> None:

        if referer_page is None:
            referer_page = page_enum

        super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)

    @classmethod
    def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:
        """
        this shouldn't be used, unlesse you are not certain what the source is for
        the reason is that it is more inefficient
        """
        parsed = urlparse(url)
        url = parsed.geturl()
        
        if "musify" in parsed.netloc:
            return cls(SourcePages.MUSIFY, url, referer_page=referer_page)

        if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
            return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)

        if url.startswith("https://www.deezer"):
            return cls(SourcePages.DEEZER, url, referer_page=referer_page)
        
        if url.startswith("https://open.spotify.com"):
            return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)

        if "bandcamp" in url:
            return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)

        if "wikipedia" in parsed.netloc:
            return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)

        if url.startswith("https://www.metal-archives.com/"):
            return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)

        # the less important once
        if url.startswith("https://www.facebook"):
            return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)

        if url.startswith("https://www.instagram"):
            return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)

        if url.startswith("https://twitter"):
            return cls(SourcePages.TWITTER, url, referer_page=referer_page)

        if url.startswith("https://myspace.com"):
            return cls(SourcePages.MYSPACE, url, referer_page=referer_page)

    def get_song_metadata(self) -> Metadata:
        return Metadata({
            Mapping.FILE_WEBPAGE_URL: [self.url],
            Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
        })

    def get_artist_metadata(self) -> Metadata:
        return Metadata({
            Mapping.ARTIST_WEBPAGE_URL: [self.url]
        })

    @property
    def hash_url(self) -> str:
        return hash_url(self.url)

    @property
    def metadata(self) -> Metadata:
        return self.get_song_metadata()

    @property
    def indexing_values(self) -> List[Tuple[str, object]]:
        return [
            ('id', self.id),
            ('url', self.url),
            ('audio_url', self.audio_url),
        ]

    def __str__(self):
        return self.__repr__()

    def __repr__(self) -> str:
        return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"

    page_str = property(fget=lambda self: self.page_enum.value)
    type_str = property(fget=lambda self: self.type_enum.value)
    homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))


class SourceCollection(Collection):
    def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
        self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)

        super().__init__(data=data, **kwargs)

    def _map_element(self, __object: Source, **kwargs):
        super()._map_element(__object, **kwargs)

        self._page_to_source_list[__object.page_enum].append(__object)
        
    @property
    def source_pages(self) -> Set[SourcePages]:
        return set(source.page_enum for source in self._data)

    def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
        """
        getting the sources for a specific page like
        YouTube or musify
        """
        return self._page_to_source_list[source_page].copy()
refactorings 2023-03-09 21:14:39 +00:00			`from collections import defaultdict`
dasfh 2023-01-12 15:25:50 +00:00			`from enum import Enum`
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`from typing import List, Dict, Set, Tuple, Optional, Iterable`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`from urllib.parse import urlparse`
dasfh 2023-01-12 15:25:50 +00:00
refactored source 2023-04-18 09:18:17 +00:00			`from ..utils.enums.source import SourcePages, SourceTypes`
started to migrate to new config 2023-09-10 14:27:09 +00:00			`from ..utils.config import youtube_settings`
feat: added base functionality of artwork class 2024-04-10 14:39:46 +00:00			`from ..utils.string_processing import hash_url`
started to migrate to new config 2023-09-10 14:27:09 +00:00
refactor 2023-03-10 08:09:35 +00:00			`from .metadata import Mapping, Metadata`
feat: fixed bugs 2023-12-19 21:11:46 +00:00			`from .parents import OuterProxy`
refactorings 2023-03-09 21:14:39 +00:00			`from .collection import Collection`
dasfh 2023-01-12 15:25:50 +00:00
added source to artist 2023-01-20 22:05:15 +00:00
feat: fixed bugs 2023-12-19 21:11:46 +00:00			`class Source(OuterProxy):`
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`url: str`

feat: fixed bugs 2023-12-19 21:11:46 +00:00			`page_enum: SourcePages`
			`referer_page: SourcePages`

			`audio_url: str`

feat: completed the default factories 2023-12-20 08:55:09 +00:00			`_default_factories = {`
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`"audio_url": lambda: None,`
refactored merging function to take default values other than None into account parents.py 2023-03-18 11:36:53 +00:00			`}`
dasfh 2023-01-12 15:25:50 +00:00
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`# This is automatically generated`
fix: fixed previous introduced bugs 2023-12-29 20:50:40 +00:00			`def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,`
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`**kwargs) -> None:`

feat: fixed bugs 2023-12-19 21:11:46 +00:00			`if referer_page is None:`
			`referer_page = page_enum`

feat: added annotations for init 2023-12-29 20:16:09 +00:00			`super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)`
dasfh 2023-01-12 15:25:50 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`@classmethod`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`"""`
			`this shouldn't be used, unlesse you are not certain what the source is for`
			`the reason is that it is more inefficient`
			`"""`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`parsed = urlparse(url)`
			`url = parsed.geturl()`
made the direct downloads work 2023-03-30 10:31:37 +00:00
			`if "musify" in parsed.netloc:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.MUSIFY, url, referer_page=referer_page)`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00
started to migrate to new config 2023-09-10 14:27:09 +00:00			`if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://www.deezer"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.DEEZER, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://open.spotify.com"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if "bandcamp" in url:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
added wikipedia 2023-03-18 16:06:12 +00:00			`if "wikipedia" in parsed.netloc:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)`
added wikipedia 2023-03-18 16:06:12 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`if url.startswith("https://www.metal-archives.com/"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`# the less important once`
			`if url.startswith("https://www.facebook"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://www.instagram"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://twitter"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.TWITTER, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`if url.startswith("https://myspace.com"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.MYSPACE, url, referer_page=referer_page)`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00
fixed exceptions 2023-03-10 09:13:35 +00:00			`def get_song_metadata(self) -> Metadata:`
			`return Metadata({`
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`Mapping.FILE_WEBPAGE_URL: [self.url],`
			`Mapping.SOURCE_WEBPAGE_URL: [self.homepage]`
			`})`

fixed exceptions 2023-03-10 09:13:35 +00:00			`def get_artist_metadata(self) -> Metadata:`
			`return Metadata({`
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`Mapping.ARTIST_WEBPAGE_URL: [self.url]`
			`})`

feat: massive improvements to the fetch and download order 2024-04-10 09:20:49 +00:00			`@property`
			`def hash_url(self) -> str:`
feat: added base functionality of artwork class 2024-04-10 14:39:46 +00:00			`return hash_url(self.url)`
feat: massive improvements to the fetch and download order 2024-04-10 09:20:49 +00:00
refactor 2023-03-10 08:09:35 +00:00			`@property`
			`def metadata(self) -> Metadata:`
fixed crash while tagging that arouse from refactor 2023-04-18 13:31:41 +00:00			`return self.get_song_metadata()`
dfsa 2023-01-12 16:14:21 +00:00
implemented DatabaseObject.indexing_values for each data objects 2023-03-09 18:53:28 +00:00			`@property`
			`def indexing_values(self) -> List[Tuple[str, object]]:`
			`return [`
			`('id', self.id),`
Fixed bug with inconsistent dynamic creation of direct download links 2023-04-18 13:24:39 +00:00			`('url', self.url),`
			`('audio_url', self.audio_url),`
implemented DatabaseObject.indexing_values for each data objects 2023-03-09 18:53:28 +00:00			`]`

dasfh 2023-01-12 15:25:50 +00:00			`def __str__(self):`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`return self.__repr__()`
dasfh 2023-01-12 15:25:50 +00:00
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`def __repr__(self) -> str:`
Fixed bug with inconsistent dynamic creation of direct download links 2023-04-18 13:24:39 +00:00			`return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"`
finished refactoring metadata 2023-01-30 13:41:02 +00:00
added source to artist 2023-01-20 22:05:15 +00:00			`page_str = property(fget=lambda self: self.page_enum.value)`
added type to src 2023-01-20 09:56:40 +00:00			`type_str = property(fget=lambda self: self.type_enum.value)`
added source to artist 2023-01-20 22:05:15 +00:00			`homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))`
source 2023-01-25 13:14:15 +00:00

refactorings 2023-03-09 21:14:39 +00:00			`class SourceCollection(Collection):`
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):`
refactorings 2023-03-09 21:14:39 +00:00			`self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)`

fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`super().__init__(data=data, **kwargs)`
fixed exceptions 2023-03-10 09:13:35 +00:00
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`def _map_element(self, __object: Source, **kwargs):`
			`super()._map_element(__object, **kwargs)`
refactorings 2023-03-09 21:14:39 +00:00
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`self._page_to_source_list[__object.page_enum].append(__object)`
added wrapper methods, for the basic fuctionalities of the webscraper 2023-05-26 09:41:20 +00:00
			`@property`
			`def source_pages(self) -> Set[SourcePages]:`
			`return set(source.page_enum for source in self._data)`
refactorings 2023-03-09 21:14:39 +00:00
			`def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:`
			`"""`
			`getting the sources for a specific page like`
			`YouTube or musify`
			`"""`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`return self._page_to_source_list[source_page].copy()`