diff --git a/.idea/misc.xml b/.idea/misc.xml index 6468d4f..dc9ea49 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index 14e8adf..7b56bcd 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -3,6 +3,7 @@ + \ No newline at end of file diff --git a/.idea/music-downloader.iml b/.idea/music-downloader.iml index f40abef..7a7b74c 100644 --- a/.idea/music-downloader.iml +++ b/.idea/music-downloader.iml @@ -6,5 +6,6 @@ + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 94a25f7..35eb1dd 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -1,6 +1,6 @@ - + \ No newline at end of file diff --git a/src/lyrics.py b/src/lyrics.py index 52bece2..2beb1d5 100644 --- a/src/lyrics.py +++ b/src/lyrics.py @@ -14,8 +14,8 @@ would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT # https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3 # https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/ -MP3_PATH = "/home/lars/Music/metalcore/I Prevail/TRAUMA/Breaking Down.mp3" -LYRICS = """ +MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3" +LYRICS_BREAKING_DOWN = """ [Chorus: Brian Burkheiser] I think, I think too much I'm a little bit paranoid, I think I’m breaking @@ -78,6 +78,32 @@ I don't really like myself I don't really like myself I think I'm breaking down """ +LYRICS_ECLIPSE = """ +Your offerings have consecrated +They are marked by the brand +The sun has seen it's fifth death +For the red lake to flow again + +He will +Feel their pain in order to +Complete the final transformation +A name new and old + +Your offerings have been consecrated by the laws of Causality +Falcon of Darkness +Send us into an age of abyss +Blinded by beauty +With stacks of bodies as high as the eye can see +Feast, apostles, feast + +The one chosen by the hand of God +The master of the sinful black sheep +And the king of the faithful blind + +Welcome to the new age +Welcome to the new age +We are the branded ones""" + def add_lyrics(file_name, lyrics=""): tags = ID3(file_name) @@ -91,5 +117,5 @@ def get_lyrics(file_name): return tags.getall("USLT") if __name__ == "__main__": - add_lyrics(MP3_PATH, lyrics=LYRICS) + add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE) print(get_lyrics(MP3_PATH)) diff --git a/src/lyrics/genius.py b/src/lyrics/genius.py index 0794edf..e6e4a75 100644 --- a/src/lyrics/genius.py +++ b/src/lyrics/genius.py @@ -1,4 +1,127 @@ +import requests +import sys +import os +import logging +from typing import List + +current = os.path.dirname(os.path.realpath(__file__)) +parent = os.path.dirname(current) +sys.path.append(parent) +from tools import phonetic_compares +from tools.object_handeling import get_elem_from_obj + # search doesn't support isrc # https://genius.com/api/search/multi?q=I Prevail - Breaking Down # https://genius.com/api/songs/6192944 -# https://docs.genius.com/ \ No newline at end of file +# https://docs.genius.com/ + +session = requests.Session() +session.headers = { + "Connection": "keep-alive", + "Referer": "https://genius.com/search/embed" +} +logger = logging.getLogger("genius") + + +def set_proxy(proxies: dict): + session.proxies = proxies + + +def set_logger(logger_: logging.Logger): + global logger + logger = logger_ + + +class Song: + def __init__(self, raw_data: dict, desirered_data: dict): + self.raw_data = raw_data + self.desired_data = desirered_data + + song_data = get_elem_from_obj(self.raw_data, ['result'], return_if_none={}) + self.id = get_elem_from_obj(song_data, ['id']) + self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name']) + self.title = get_elem_from_obj(song_data, ['title']) + + self.language = get_elem_from_obj(song_data, ['language']) + self.url = get_elem_from_obj(song_data, ['url']) + + # maybe could be implemented + self.lyricist: str + + if get_elem_from_obj(song_data, ['lyrics_state']) != "complete": + logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}") + + self.valid = self.is_valid() + if self.valid: + logger.info(f"found lyrics for \"{self.__repr__()}\"") + + self.lyrics: str + + def is_valid(self) -> bool: + title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track']) + artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist']) + + return title_match and artist_match + + def __repr__(self) -> str: + return f"{self.title} by {self.artist}" + + def fetch_lyrics(self) -> str: + if not self.valid: + logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.") + lyrics = "" + + + self.lyrics = lyrics + return lyrics + + +def build_search_query(artist: str, track: str) -> str: + return f"{artist} - {track}" + + +def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]: + all_songs = [Song(song_data, desired_data) for song_data in song_datas] + return [song for song in all_songs if not song.valid] + + +def search_song_list(artist: str, track: str) -> List[Song]: + endpoint = "https://genius.com/api/search/multi?q=" + url = endpoint + build_search_query(artist, track) + logging.info(f"requesting {url}") + + desired_data = { + 'artist': artist, + 'track': track + } + + r = session.get(url) + if r.status_code != 200: + logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}") + return [] + content = r.json() + if get_elem_from_obj(content, ['meta', 'status']) != 200: + logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}") + return [] + + # print(r.status_code) + # print(r.json()) + + sections = get_elem_from_obj(content, ['response', 'sections']) + for section in sections: + section_type = get_elem_from_obj(section, ['type']) + print(section_type) + if section_type == "song": + return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data) + + return [] + +def search(artist: str, track: str): + return search_song_list(artist, track) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + + songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura") + print(songs) diff --git a/src/tools/object_handeling.py b/src/tools/object_handeling.py new file mode 100644 index 0000000..7922603 --- /dev/null +++ b/src/tools/object_handeling.py @@ -0,0 +1,24 @@ +from datetime import date + + +def get_elem_from_obj(current_object, keys: list, after_process=lambda x: x, return_if_none=None): + current_object = current_object + for key in keys: + if key in current_object or (type(key) == int and key < len(current_object)): + current_object = current_object[key] + else: + return return_if_none + return after_process(current_object) + + +def parse_music_brainz_date(mb_date: str) -> date: + year = 1 + month = 1 + day = 1 + + first_release_date = mb_date + if first_release_date.count("-") == 2: + year, month, day = [int(i) for i in first_release_date.split("-")] + elif first_release_date.count("-") == 0 and first_release_date.isdigit(): + year = int(first_release_date) + return date(year, month, day) diff --git a/src/tools/phonetic_compares.py b/src/tools/phonetic_compares.py new file mode 100644 index 0000000..facb851 --- /dev/null +++ b/src/tools/phonetic_compares.py @@ -0,0 +1,22 @@ +import jellyfish + +TITLE_THRESHOLD_LEVENSHTEIN = 2 + + +def match_titles(title_1: str, title_2: str) -> (bool, int): + distance = jellyfish.levenshtein_distance(title_1, title_2) + return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance + + +def match_artists(artist_1, artist_2: str) -> (bool, int): + if type(artist_1) == list: + distances = [] + + for artist_1_ in artist_1: + match, distance = match_titles(artist_1_, artist_2) + if not match: + return match, distance + + distances.append(distance) + return True, min(distances) + return match_titles(artist_1, artist_2)