music-kraken-core/music_kraken/objects/source.py

from __future__ import annotations

from collections import defaultdict
from enum import Enum
from typing import List, Dict, Set, Tuple, Optional, Iterable
from urllib.parse import urlparse

from ..utils.enums.source import SourcePages, SourceTypes
from ..utils.config import youtube_settings
from ..utils.string_processing import hash_url

from .metadata import Mapping, Metadata
from .parents import OuterProxy
from .collection import Collection


class Source(OuterProxy):
    url: str

    page_enum: SourcePages
    referer_page: SourcePages

    audio_url: str

    _default_factories = {
        "audio_url": lambda: None,
    }

    # This is automatically generated
    def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,
                 **kwargs) -> None:

        if referer_page is None:
            referer_page = page_enum

        super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)

    @classmethod
    def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:
        """
        this shouldn't be used, unlesse you are not certain what the source is for
        the reason is that it is more inefficient
        """
        parsed = urlparse(url)
        url = parsed.geturl()
        
        if "musify" in parsed.netloc:
            return cls(SourcePages.MUSIFY, url, referer_page=referer_page)

        if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
            return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)

        if url.startswith("https://www.deezer"):
            return cls(SourcePages.DEEZER, url, referer_page=referer_page)
        
        if url.startswith("https://open.spotify.com"):
            return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)

        if "bandcamp" in url:
            return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)

        if "wikipedia" in parsed.netloc:
            return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)

        if url.startswith("https://www.metal-archives.com/"):
            return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)

        # the less important once
        if url.startswith("https://www.facebook"):
            return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)

        if url.startswith("https://www.instagram"):
            return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)

        if url.startswith("https://twitter"):
            return cls(SourcePages.TWITTER, url, referer_page=referer_page)

        if url.startswith("https://myspace.com"):
            return cls(SourcePages.MYSPACE, url, referer_page=referer_page)

    def get_song_metadata(self) -> Metadata:
        return Metadata({
            Mapping.FILE_WEBPAGE_URL: [self.url],
            Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
        })

    def get_artist_metadata(self) -> Metadata:
        return Metadata({
            Mapping.ARTIST_WEBPAGE_URL: [self.url]
        })

    @property
    def hash_url(self) -> str:
        return hash_url(self.url)

    @property
    def metadata(self) -> Metadata:
        return self.get_song_metadata()

    @property
    def indexing_values(self) -> List[Tuple[str, object]]:
        return [
            ('id', self.id),
            ('url', self.url),
            ('audio_url', self.audio_url),
        ]
        
    def __str__(self):
        return self.__repr__()

    def __repr__(self) -> str:
        return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"

    @property
    def title_string(self) -> str:
        return self.url

    page_str = property(fget=lambda self: self.page_enum.value)
    type_str = property(fget=lambda self: self.type_enum.value)
    homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))


class SourceCollection(Collection):
    def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
        self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)

        super().__init__(data=data, **kwargs)

    def _map_element(self, __object: Source, **kwargs):
        super()._map_element(__object, **kwargs)

        self._page_to_source_list[__object.page_enum].append(__object)
        
    @property
    def source_pages(self) -> Set[SourcePages]:
        return set(source.page_enum for source in self._data)

    def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
        """
        getting the sources for a specific page like
        YouTube or musify
        """
        return self._page_to_source_list[source_page].copy()
fix: raised the recursion limit in debug to 500 2024-04-16 11:23:20 +00:00			`from __future__ import annotations`

refactorings 2023-03-09 21:14:39 +00:00			`from collections import defaultdict`
dasfh 2023-01-12 15:25:50 +00:00			`from enum import Enum`
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`from typing import List, Dict, Set, Tuple, Optional, Iterable`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`from urllib.parse import urlparse`
dasfh 2023-01-12 15:25:50 +00:00
refactored source 2023-04-18 09:18:17 +00:00			`from ..utils.enums.source import SourcePages, SourceTypes`
started to migrate to new config 2023-09-10 14:27:09 +00:00			`from ..utils.config import youtube_settings`
feat: added base functionality of artwork class 2024-04-10 14:39:46 +00:00			`from ..utils.string_processing import hash_url`
started to migrate to new config 2023-09-10 14:27:09 +00:00
refactor 2023-03-10 08:09:35 +00:00			`from .metadata import Mapping, Metadata`
feat: fixed bugs 2023-12-19 21:11:46 +00:00			`from .parents import OuterProxy`
refactorings 2023-03-09 21:14:39 +00:00			`from .collection import Collection`
dasfh 2023-01-12 15:25:50 +00:00
added source to artist 2023-01-20 22:05:15 +00:00
feat: fixed bugs 2023-12-19 21:11:46 +00:00			`class Source(OuterProxy):`
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`url: str`

feat: fixed bugs 2023-12-19 21:11:46 +00:00			`page_enum: SourcePages`
			`referer_page: SourcePages`

			`audio_url: str`

feat: completed the default factories 2023-12-20 08:55:09 +00:00			`_default_factories = {`
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`"audio_url": lambda: None,`
refactored merging function to take default values other than None into account parents.py 2023-03-18 11:36:53 +00:00			`}`
dasfh 2023-01-12 15:25:50 +00:00
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`# This is automatically generated`
fix: fixed previous introduced bugs 2023-12-29 20:50:40 +00:00			`def __init__(self, page_enum: SourcePages, url: str, referer_page: SourcePages = None, audio_url: str = None,`
feat: added annotations for init 2023-12-29 20:16:09 +00:00			`**kwargs) -> None:`

feat: fixed bugs 2023-12-19 21:11:46 +00:00			`if referer_page is None:`
			`referer_page = page_enum`

feat: added annotations for init 2023-12-29 20:16:09 +00:00			`super().__init__(url=url, page_enum=page_enum, referer_page=referer_page, audio_url=audio_url, **kwargs)`
dasfh 2023-01-12 15:25:50 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`@classmethod`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`"""`
			`this shouldn't be used, unlesse you are not certain what the source is for`
			`the reason is that it is more inefficient`
			`"""`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`parsed = urlparse(url)`
			`url = parsed.geturl()`
made the direct downloads work 2023-03-30 10:31:37 +00:00
			`if "musify" in parsed.netloc:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.MUSIFY, url, referer_page=referer_page)`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00
started to migrate to new config 2023-09-10 14:27:09 +00:00			`if parsed.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://www.deezer"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.DEEZER, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://open.spotify.com"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if "bandcamp" in url:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
added wikipedia 2023-03-18 16:06:12 +00:00			`if "wikipedia" in parsed.netloc:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)`
added wikipedia 2023-03-18 16:06:12 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`if url.startswith("https://www.metal-archives.com/"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`# the less important once`
			`if url.startswith("https://www.facebook"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://www.instagram"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://twitter"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.TWITTER, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`if url.startswith("https://myspace.com"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.MYSPACE, url, referer_page=referer_page)`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00
fixed exceptions 2023-03-10 09:13:35 +00:00			`def get_song_metadata(self) -> Metadata:`
			`return Metadata({`
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`Mapping.FILE_WEBPAGE_URL: [self.url],`
			`Mapping.SOURCE_WEBPAGE_URL: [self.homepage]`
			`})`

fixed exceptions 2023-03-10 09:13:35 +00:00			`def get_artist_metadata(self) -> Metadata:`
			`return Metadata({`
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`Mapping.ARTIST_WEBPAGE_URL: [self.url]`
			`})`

feat: massive improvements to the fetch and download order 2024-04-10 09:20:49 +00:00			`@property`
			`def hash_url(self) -> str:`
feat: added base functionality of artwork class 2024-04-10 14:39:46 +00:00			`return hash_url(self.url)`
feat: massive improvements to the fetch and download order 2024-04-10 09:20:49 +00:00
refactor 2023-03-10 08:09:35 +00:00			`@property`
			`def metadata(self) -> Metadata:`
fixed crash while tagging that arouse from refactor 2023-04-18 13:31:41 +00:00			`return self.get_song_metadata()`
dfsa 2023-01-12 16:14:21 +00:00
implemented DatabaseObject.indexing_values for each data objects 2023-03-09 18:53:28 +00:00			`@property`
			`def indexing_values(self) -> List[Tuple[str, object]]:`
			`return [`
			`('id', self.id),`
Fixed bug with inconsistent dynamic creation of direct download links 2023-04-18 13:24:39 +00:00			`('url', self.url),`
			`('audio_url', self.audio_url),`
implemented DatabaseObject.indexing_values for each data objects 2023-03-09 18:53:28 +00:00			`]`
fix: recursion depth error 2024-04-18 13:30:04 +00:00
dasfh 2023-01-12 15:25:50 +00:00			`def __str__(self):`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`return self.__repr__()`
dasfh 2023-01-12 15:25:50 +00:00
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`def __repr__(self) -> str:`
Fixed bug with inconsistent dynamic creation of direct download links 2023-04-18 13:24:39 +00:00			`return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"`
finished refactoring metadata 2023-01-30 13:41:02 +00:00
fix: raised the recursion limit in debug to 500 2024-04-16 11:23:20 +00:00			`@property`
			`def title_string(self) -> str:`
			`return self.url`

added source to artist 2023-01-20 22:05:15 +00:00			`page_str = property(fget=lambda self: self.page_enum.value)`
added type to src 2023-01-20 09:56:40 +00:00			`type_str = property(fget=lambda self: self.type_enum.value)`
added source to artist 2023-01-20 22:05:15 +00:00			`homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))`
source 2023-01-25 13:14:15 +00:00

refactorings 2023-03-09 21:14:39 +00:00			`class SourceCollection(Collection):`
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):`
refactorings 2023-03-09 21:14:39 +00:00			`self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)`

fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`super().__init__(data=data, **kwargs)`
fixed exceptions 2023-03-10 09:13:35 +00:00
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`def _map_element(self, __object: Source, **kwargs):`
			`super()._map_element(__object, **kwargs)`
refactorings 2023-03-09 21:14:39 +00:00
fixed the mapping in source maps 2023-12-29 14:43:33 +00:00			`self._page_to_source_list[__object.page_enum].append(__object)`
added wrapper methods, for the basic fuctionalities of the webscraper 2023-05-26 09:41:20 +00:00
			`@property`
			`def source_pages(self) -> Set[SourcePages]:`
			`return set(source.page_enum for source in self._data)`
refactorings 2023-03-09 21:14:39 +00:00
			`def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:`
			`"""`
			`getting the sources for a specific page like`
			`YouTube or musify`
			`"""`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`return self._page_to_source_list[source_page].copy()`