music-kraken-core/src/music_kraken/objects/source.py

from collections import defaultdict
from enum import Enum
from typing import List, Dict, Set, Tuple, Optional
from urllib.parse import urlparse

from ..utils.enums.source import SourcePages, SourceTypes
from .metadata import Mapping, Metadata
from .parents import DatabaseObject
from .collection import Collection


class Source(DatabaseObject):
    """
    create somehow like that
    ```python
    # url won't be a valid one due to it being just an example
    Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")
    ```
    """
    COLLECTION_ATTRIBUTES = tuple()
    SIMPLE_ATTRIBUTES = {
        "page_enum": None,
        "url": None,
        "referer_page": None,
        "audio_url": None
    }

    def __init__(
        self,
        page_enum: SourcePages,
        url: str = None,
        id_: str = None,
        referer_page: SourcePages = None,
        adio_url: str = None
    ) -> None:
        DatabaseObject.__init__(self, id_=id_)

        self.page_enum = page_enum
        self.referer_page = page_enum if referer_page is None else referer_page

        self.url = url
        self.audio_url = adio_url

    @classmethod
    def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:
        """
        this shouldn't be used, unlesse you are not certain what the source is for
        the reason is that it is more inefficient
        """
        parsed = urlparse(url)
        url = parsed.geturl()
        
        if "musify" in parsed.netloc:
            return cls(SourcePages.MUSIFY, url, referer_page=referer_page)

        if url.startswith("https://www.youtube"):
            return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)

        if url.startswith("https://www.deezer"):
            return cls(SourcePages.DEEZER, url, referer_page=referer_page)
        
        if url.startswith("https://open.spotify.com"):
            return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)

        if "bandcamp" in url:
            return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)

        if "wikipedia" in parsed.netloc:
            return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)

        if url.startswith("https://www.metal-archives.com/"):
            return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)

        # the less important once
        if url.startswith("https://www.facebook"):
            return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)

        if url.startswith("https://www.instagram"):
            return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)

        if url.startswith("https://twitter"):
            return cls(SourcePages.TWITTER, url, referer_page=referer_page)

        if url.startswith("https://myspace.com"):
            return cls(SourcePages.MYSPACE, url, referer_page=referer_page)

    def get_song_metadata(self) -> Metadata:
        return Metadata({
            Mapping.FILE_WEBPAGE_URL: [self.url],
            Mapping.SOURCE_WEBPAGE_URL: [self.homepage]
        })

    def get_artist_metadata(self) -> Metadata:
        return Metadata({
            Mapping.ARTIST_WEBPAGE_URL: [self.url]
        })

    @property
    def metadata(self) -> Metadata:
        return self.get_song_metadata()

    @property
    def indexing_values(self) -> List[Tuple[str, object]]:
        return [
            ('id', self.id),
            ('url', self.url),
            ('audio_url', self.audio_url),
        ]

    def __str__(self):
        return self.__repr__()

    def __repr__(self) -> str:
        return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"

    page_str = property(fget=lambda self: self.page_enum.value)
    type_str = property(fget=lambda self: self.type_enum.value)
    homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))


class SourceCollection(Collection):
    def __init__(self, source_list: List[Source]):
        self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)

        super().__init__(data=source_list, element_type=Source)

    def map_element(self, source: Source):
        super().map_element(source)

        self._page_to_source_list[source.page_enum].append(source)
        
    @property
    def source_pages(self) -> Set[SourcePages]:
        return set(source.page_enum for source in self._data)

    def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:
        """
        getting the sources for a specific page like
        YouTube or musify
        """
        return self._page_to_source_list[source_page].copy()
refactorings 2023-03-09 21:14:39 +00:00			`from collections import defaultdict`
dasfh 2023-01-12 15:25:50 +00:00			`from enum import Enum`
added wrapper methods, for the basic fuctionalities of the webscraper 2023-05-26 09:41:20 +00:00			`from typing import List, Dict, Set, Tuple, Optional`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`from urllib.parse import urlparse`
dasfh 2023-01-12 15:25:50 +00:00
refactored source 2023-04-18 09:18:17 +00:00			`from ..utils.enums.source import SourcePages, SourceTypes`
refactor 2023-03-10 08:09:35 +00:00			`from .metadata import Mapping, Metadata`
refactorings 2023-03-09 21:14:39 +00:00			`from .parents import DatabaseObject`
			`from .collection import Collection`
dasfh 2023-01-12 15:25:50 +00:00
added source to artist 2023-01-20 22:05:15 +00:00
fixed exceptions 2023-03-10 09:13:35 +00:00			`class Source(DatabaseObject):`
dasfh 2023-01-12 15:25:50 +00:00			`"""`
			`create somehow like that`
			```python
			`# url won't be a valid one due to it being just an example`
			`Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")`
			```
			`"""`
attribute 2023-03-13 13:33:17 +00:00			`COLLECTION_ATTRIBUTES = tuple()`
refactored merging function to take default values other than None into account parents.py 2023-03-18 11:36:53 +00:00			`SIMPLE_ATTRIBUTES = {`
			`"page_enum": None,`
refactored source 2023-04-18 09:18:17 +00:00			`"url": None,`
			`"referer_page": None,`
			`"audio_url": None`
refactored merging function to take default values other than None into account parents.py 2023-03-18 11:36:53 +00:00			`}`
dasfh 2023-01-12 15:25:50 +00:00
refactored source 2023-04-18 09:18:17 +00:00			`def __init__(`
			`self,`
			`page_enum: SourcePages,`
added parsing of the real download link 2023-04-18 13:13:18 +00:00			`url: str = None,`
refactored source 2023-04-18 09:18:17 +00:00			`id_: str = None,`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`referer_page: SourcePages = None,`
refactored source 2023-04-18 09:18:17 +00:00			`adio_url: str = None`
			`) -> None:`
dasfh 2023-01-12 15:25:50 +00:00			`DatabaseObject.__init__(self, id_=id_)`

added source to artist 2023-01-20 22:05:15 +00:00			`self.page_enum = page_enum`
refactored source 2023-04-18 09:18:17 +00:00			`self.referer_page = page_enum if referer_page is None else referer_page`
added source to artist 2023-01-20 22:05:15 +00:00
dasfh 2023-01-12 15:25:50 +00:00			`self.url = url`
refactored source 2023-04-18 09:18:17 +00:00			`self.audio_url = adio_url`
dasfh 2023-01-12 15:25:50 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`@classmethod`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`def match_url(cls, url: str, referer_page: SourcePages) -> Optional["Source"]:`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`"""`
			`this shouldn't be used, unlesse you are not certain what the source is for`
			`the reason is that it is more inefficient`
			`"""`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`parsed = urlparse(url)`
			`url = parsed.geturl()`
made the direct downloads work 2023-03-30 10:31:37 +00:00
			`if "musify" in parsed.netloc:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.MUSIFY, url, referer_page=referer_page)`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`if url.startswith("https://www.youtube"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.YOUTUBE, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://www.deezer"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.DEEZER, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://open.spotify.com"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.SPOTIFY, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if "bandcamp" in url:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.BANDCAMP, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
added wikipedia 2023-03-18 16:06:12 +00:00			`if "wikipedia" in parsed.netloc:`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.WIKIPEDIA, url, referer_page=referer_page)`
added wikipedia 2023-03-18 16:06:12 +00:00
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`if url.startswith("https://www.metal-archives.com/"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.ENCYCLOPAEDIA_METALLUM, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`# the less important once`
			`if url.startswith("https://www.facebook"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.FACEBOOK, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://www.instagram"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.INSTAGRAM, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
			`if url.startswith("https://twitter"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.TWITTER, url, referer_page=referer_page)`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`if url.startswith("https://myspace.com"):`
made the referer in sources more concistent 2023-04-18 11:35:00 +00:00			`return cls(SourcePages.MYSPACE, url, referer_page=referer_page)`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00
fixed exceptions 2023-03-10 09:13:35 +00:00			`def get_song_metadata(self) -> Metadata:`
			`return Metadata({`
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`Mapping.FILE_WEBPAGE_URL: [self.url],`
			`Mapping.SOURCE_WEBPAGE_URL: [self.homepage]`
			`})`

fixed exceptions 2023-03-10 09:13:35 +00:00			`def get_artist_metadata(self) -> Metadata:`
			`return Metadata({`
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`Mapping.ARTIST_WEBPAGE_URL: [self.url]`
			`})`

refactor 2023-03-10 08:09:35 +00:00			`@property`
			`def metadata(self) -> Metadata:`
fixed crash while tagging that arouse from refactor 2023-04-18 13:31:41 +00:00			`return self.get_song_metadata()`
dfsa 2023-01-12 16:14:21 +00:00
implemented DatabaseObject.indexing_values for each data objects 2023-03-09 18:53:28 +00:00			`@property`
			`def indexing_values(self) -> List[Tuple[str, object]]:`
			`return [`
			`('id', self.id),`
Fixed bug with inconsistent dynamic creation of direct download links 2023-04-18 13:24:39 +00:00			`('url', self.url),`
			`('audio_url', self.audio_url),`
implemented DatabaseObject.indexing_values for each data objects 2023-03-09 18:53:28 +00:00			`]`

dasfh 2023-01-12 15:25:50 +00:00			`def __str__(self):`
added the fetching of the sources from ma regarding artists 2023-02-01 08:10:05 +00:00			`return self.__repr__()`
dasfh 2023-01-12 15:25:50 +00:00
finished refactoring metadata 2023-01-30 13:41:02 +00:00			`def __repr__(self) -> str:`
Fixed bug with inconsistent dynamic creation of direct download links 2023-04-18 13:24:39 +00:00			`return f"Src({self.page_enum.value}: {self.url}, {self.audio_url})"`
finished refactoring metadata 2023-01-30 13:41:02 +00:00
added source to artist 2023-01-20 22:05:15 +00:00			`page_str = property(fget=lambda self: self.page_enum.value)`
added type to src 2023-01-20 09:56:40 +00:00			`type_str = property(fget=lambda self: self.type_enum.value)`
added source to artist 2023-01-20 22:05:15 +00:00			`homepage = property(fget=lambda self: SourcePages.get_homepage(self.page_enum))`
source 2023-01-25 13:14:15 +00:00

refactorings 2023-03-09 21:14:39 +00:00			`class SourceCollection(Collection):`
			`def __init__(self, source_list: List[Source]):`
			`self._page_to_source_list: Dict[SourcePages, List[Source]] = defaultdict(list)`

fixed exceptions 2023-03-10 09:13:35 +00:00			`super().__init__(data=source_list, element_type=Source)`

refactorings 2023-03-09 21:14:39 +00:00			`def map_element(self, source: Source):`
			`super().map_element(source)`

			`self._page_to_source_list[source.page_enum].append(source)`
added wrapper methods, for the basic fuctionalities of the webscraper 2023-05-26 09:41:20 +00:00
			`@property`
			`def source_pages(self) -> Set[SourcePages]:`
			`return set(source.page_enum for source in self._data)`
refactorings 2023-03-09 21:14:39 +00:00
			`def get_sources_from_page(self, source_page: SourcePages) -> List[Source]:`
			`"""`
			`getting the sources for a specific page like`
			`YouTube or musify`
			`"""`
added fetching of linked sources to musify 2023-03-18 12:01:27 +00:00			`return self._page_to_source_list[source_page].copy()`