added fetching of linked sources to musify
This commit is contained in:
		| @@ -377,7 +377,7 @@ All objects dependent on Artist | |||||||
|  |  | ||||||
|  |  | ||||||
| class Artist(MainObject): | class Artist(MainObject): | ||||||
|     COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection") |     COLLECTION_ATTRIBUTES = ("feature_song_collection", "main_album_collection", "label_collection", "source_collection") | ||||||
|     SIMPLE_ATTRIBUTES = { |     SIMPLE_ATTRIBUTES = { | ||||||
|         "name": None, |         "name": None, | ||||||
|         "unified_name": None, |         "unified_name": None, | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| from enum import Enum | from enum import Enum | ||||||
| from typing import List, Dict, Tuple | from typing import List, Dict, Tuple, Optional | ||||||
|  | from urllib.parse import urlparse | ||||||
|  |  | ||||||
| from .metadata import Mapping, Metadata | from .metadata import Mapping, Metadata | ||||||
| from .parents import DatabaseObject | from .parents import DatabaseObject | ||||||
| @@ -27,7 +28,8 @@ class SourcePages(Enum): | |||||||
|     # This has nothing to do with audio, but bands can be here |     # This has nothing to do with audio, but bands can be here | ||||||
|     INSTAGRAM = "instagram" |     INSTAGRAM = "instagram" | ||||||
|     FACEBOOK = "facebook" |     FACEBOOK = "facebook" | ||||||
|     TWITTER = "twitter" # I will use nitter though lol |     TWITTER = "twitter"     # I will use nitter though lol | ||||||
|  |     MYSPACE = "myspace"     # Yes somehow this ancient site is linked EVERYWHERE | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_homepage(cls, attribute) -> str: |     def get_homepage(cls, attribute) -> str: | ||||||
| @@ -42,7 +44,8 @@ class SourcePages(Enum): | |||||||
|             cls.INSTAGRAM: "https://www.instagram.com/", |             cls.INSTAGRAM: "https://www.instagram.com/", | ||||||
|             cls.FACEBOOK: "https://www.facebook.com/", |             cls.FACEBOOK: "https://www.facebook.com/", | ||||||
|             cls.SPOTIFY: "https://open.spotify.com/", |             cls.SPOTIFY: "https://open.spotify.com/", | ||||||
|             cls.TWITTER: "https://twitter.com/" |             cls.TWITTER: "https://twitter.com/", | ||||||
|  |             cls.MYSPACE: "https://myspace.com/" | ||||||
|         } |         } | ||||||
|         return homepage_map[attribute] |         return homepage_map[attribute] | ||||||
|  |  | ||||||
| @@ -71,11 +74,14 @@ class Source(DatabaseObject): | |||||||
|         self.url = url |         self.url = url | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def match_url(cls, url: str): |     def match_url(cls, url: str) -> Optional["Source"]: | ||||||
|         """ |         """ | ||||||
|         this shouldn't be used, unlesse you are not certain what the source is for |         this shouldn't be used, unlesse you are not certain what the source is for | ||||||
|         the reason is that it is more inefficient |         the reason is that it is more inefficient | ||||||
|         """ |         """ | ||||||
|  |         parsed = urlparse(url) | ||||||
|  |         url = parsed.geturl() | ||||||
|  |  | ||||||
|         if url.startswith("https://www.youtube"): |         if url.startswith("https://www.youtube"): | ||||||
|             return cls(SourcePages.YOUTUBE, url) |             return cls(SourcePages.YOUTUBE, url) | ||||||
|  |  | ||||||
| @@ -101,6 +107,9 @@ class Source(DatabaseObject): | |||||||
|         if url.startswith("https://twitter"): |         if url.startswith("https://twitter"): | ||||||
|             return cls(SourcePages.TWITTER, url) |             return cls(SourcePages.TWITTER, url) | ||||||
|  |  | ||||||
|  |         if url.startswith("https://myspace.com"): | ||||||
|  |             return cls(SourcePages.MYSPACE, url) | ||||||
|  |  | ||||||
|     def get_song_metadata(self) -> Metadata: |     def get_song_metadata(self) -> Metadata: | ||||||
|         return Metadata({ |         return Metadata({ | ||||||
|             Mapping.FILE_WEBPAGE_URL: [self.url], |             Mapping.FILE_WEBPAGE_URL: [self.url], | ||||||
| @@ -157,4 +166,4 @@ class SourceCollection(Collection): | |||||||
|         getting the sources for a specific page like |         getting the sources for a specific page like | ||||||
|         YouTube or musify |         YouTube or musify | ||||||
|         """ |         """ | ||||||
|         return self._page_to_source_list[source_page] |         return self._page_to_source_list[source_page].copy() | ||||||
|   | |||||||
| @@ -102,7 +102,7 @@ class Musify(Page): | |||||||
|     @classmethod |     @classmethod | ||||||
|     def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: |     def parse_artist_contact(cls, contact: BeautifulSoup) -> Artist: | ||||||
|         source_list: List[Source] = [] |         source_list: List[Source] = [] | ||||||
|         name = "" |         name = None | ||||||
|         _id = None |         _id = None | ||||||
|  |  | ||||||
|         # source |         # source | ||||||
| @@ -153,12 +153,15 @@ class Musify(Page): | |||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         source_list: List[Source] = [] |         source_list: List[Source] = [] | ||||||
|         title = "" |         title = None | ||||||
|         _id = None |         _id = None | ||||||
|         year = None |         year = None | ||||||
|         artist_list: List[Artist] = [] |         artist_list: List[Artist] = [] | ||||||
|  |  | ||||||
|         def parse_title_date(title_date: Optional[str], delimiter: str = " - "): |         def parse_title_date(title_date: Optional[str], delimiter: str = " - "): | ||||||
|  |             nonlocal year | ||||||
|  |             nonlocal title | ||||||
|  |  | ||||||
|             if title_date is None: |             if title_date is None: | ||||||
|                 return |                 return | ||||||
|  |  | ||||||
| @@ -262,7 +265,7 @@ class Musify(Page): | |||||||
|     @classmethod |     @classmethod | ||||||
|     def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: |     def parse_playlist_item(cls, playlist_item_soup: BeautifulSoup) -> Song: | ||||||
|         _id = None |         _id = None | ||||||
|         song_title = playlist_item_soup.get("data-name") or "" |         song_title = playlist_item_soup.get("data-name") | ||||||
|         artist_list: List[Artist] = [] |         artist_list: List[Artist] = [] | ||||||
|         source_list: List[Source] = [] |         source_list: List[Source] = [] | ||||||
|  |  | ||||||
| @@ -415,7 +418,7 @@ class Musify(Page): | |||||||
|         }) |         }) | ||||||
|  |  | ||||||
|         _id: Optional[str] = None |         _id: Optional[str] = None | ||||||
|         name: str = "" |         name: str = None | ||||||
|         source_list: List[Source] = [] |         source_list: List[Source] = [] | ||||||
|         timestamp: Optional[ID3Timestamp] = None |         timestamp: Optional[ID3Timestamp] = None | ||||||
|         album_status = None |         album_status = None | ||||||
| @@ -586,7 +589,7 @@ class Musify(Page): | |||||||
|             </li> |             </li> | ||||||
|         </ul> |         </ul> | ||||||
|         """ |         """ | ||||||
|         name = "" |         name = None | ||||||
|         source_list: List[Source] = [] |         source_list: List[Source] = [] | ||||||
|         country = None |         country = None | ||||||
|         notes: FormattedText = None |         notes: FormattedText = None | ||||||
| @@ -625,6 +628,7 @@ class Musify(Page): | |||||||
|             if h1_name is not None: |             if h1_name is not None: | ||||||
|                 name = h1_name.get_text(strip=True) |                 name = h1_name.get_text(strip=True) | ||||||
|  |  | ||||||
|  |         # country and sources | ||||||
|         icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"}) |         icon_list: BeautifulSoup = soup.find("ul", {"class": "icon-list"}) | ||||||
|         if icon_list is not None: |         if icon_list is not None: | ||||||
|             country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"}) |             country_italic: BeautifulSoup = icon_list.find("i", {"class", "flag-icon"}) | ||||||
| @@ -646,10 +650,20 @@ class Musify(Page): | |||||||
|  |  | ||||||
|                     country = pycountry.countries.get(alpha_2=list(country_set)[0]) |                     country = pycountry.countries.get(alpha_2=list(country_set)[0]) | ||||||
|  |  | ||||||
|  |             # get all additional sources | ||||||
|  |             additional_source: BeautifulSoup | ||||||
|  |             for additional_source in icon_list.find_all("a", {"class", "link"}): | ||||||
|  |                 href = additional_source.get("href") | ||||||
|  |                 if href is None: | ||||||
|  |                     continue | ||||||
|  |                 new_src = Source.match_url(href) | ||||||
|  |                 if new_src is None: | ||||||
|  |                     continue | ||||||
|  |                 source_list.append(new_src) | ||||||
|  |  | ||||||
|         note_soup: BeautifulSoup = soup.find(id="text-main") |         note_soup: BeautifulSoup = soup.find(id="text-main") | ||||||
|         if note_soup is not None: |         if note_soup is not None: | ||||||
|             notes = FormattedText(html=note_soup.decode_contents()) |             notes = FormattedText(html=note_soup.decode_contents()) | ||||||
|             print(notes.plaintext) |  | ||||||
|  |  | ||||||
|         return Artist( |         return Artist( | ||||||
|             _id=url.musify_id, |             _id=url.musify_id, | ||||||
|   | |||||||
| @@ -9,7 +9,6 @@ def search(): | |||||||
|  |  | ||||||
| def fetch_artist(): | def fetch_artist(): | ||||||
|     artist = objects.Artist( |     artist = objects.Artist( | ||||||
|         name="Ghost Bath", |  | ||||||
|         source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] |         source_list=[objects.Source(objects.SourcePages.MUSIFY, "https://musify.club/artist/psychonaut-4-83193")] | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user