startet genius lyrics fetcher

2022-11-09 19:01:13 +01:00 · 2022-11-09 19:01:13 +01:00 · 3bc133ef62
commit 3bc133ef62
parent 527af38098
8 changed files with 203 additions and 6 deletions
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (music-downloader)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -3,6 +3,7 @@
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/music-downloader.iml" filepath="$PROJECT_DIR$/.idea/music-downloader.iml" />
+      <module fileurl="file://$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" filepath="$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/music-downloader.iml
+++ b/.idea/music-downloader.iml
@ -6,5 +6,6 @@
    </content>
    <orderEntry type="jdk" jdkName="Python 3.10 (music-downloader)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module" module-name="rythmbox-id3-lyrics-support" />
  </component>
 </module>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+    <mapping directory="" vcs="Git" />
  </component>
 </project>
--- a/src/lyrics.py
+++ b/src/lyrics.py
@ -14,8 +14,8 @@ would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
 # https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
 # https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/

-MP3_PATH = "/home/lars/Music/metalcore/I Prevail/TRAUMA/Breaking Down.mp3"
-LYRICS = """
+MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3"
+LYRICS_BREAKING_DOWN = """
 [Chorus: Brian Burkheiser]
 I think, I think too much
 I'm a little bit paranoid, I think I’m breaking
@ -78,6 +78,32 @@ I don't really like myself
 I don't really like myself
 I think I'm breaking down
 """
+LYRICS_ECLIPSE = """
+Your offerings have consecrated
+They are marked by the brand
+The sun has seen it's fifth death
+For the red lake to flow again
+
+He will
+Feel their pain in order to
+Complete the final transformation
+A name new and old
+
+Your offerings have been consecrated by the laws of Causality
+Falcon of Darkness
+Send us into an age of abyss
+Blinded by beauty
+With stacks of bodies as high as the eye can see
+Feast, apostles, feast
+
+The one chosen by the hand of God
+The master of the sinful black sheep
+And the king of the faithful blind
+
+Welcome to the new age
+Welcome to the new age
+We are the branded ones"""
+

 def add_lyrics(file_name, lyrics=""):
    tags = ID3(file_name)
@ -91,5 +117,5 @@ def get_lyrics(file_name):
    return tags.getall("USLT")

 if __name__ == "__main__":
-    add_lyrics(MP3_PATH, lyrics=LYRICS)
+    add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE)
    print(get_lyrics(MP3_PATH))
--- a/src/lyrics/genius.py
+++ b/src/lyrics/genius.py
@ -1,4 +1,127 @@
+import requests
+import sys
+import os
+import logging
+from typing import List
+
+current = os.path.dirname(os.path.realpath(__file__))
+parent = os.path.dirname(current)
+sys.path.append(parent)
+from tools import phonetic_compares
+from tools.object_handeling import get_elem_from_obj
+
 # search doesn't support isrc
 # https://genius.com/api/search/multi?q=I Prevail - Breaking Down
 # https://genius.com/api/songs/6192944
 # https://docs.genius.com/
+
+session = requests.Session()
+session.headers = {
+    "Connection": "keep-alive",
+    "Referer": "https://genius.com/search/embed"
+}
+logger = logging.getLogger("genius")
+
+
+def set_proxy(proxies: dict):
+    session.proxies = proxies
+
+
+def set_logger(logger_: logging.Logger):
+    global logger
+    logger = logger_
+
+
+class Song:
+    def __init__(self, raw_data: dict, desirered_data: dict):
+        self.raw_data = raw_data
+        self.desired_data = desirered_data
+
+        song_data = get_elem_from_obj(self.raw_data, ['result'], return_if_none={})
+        self.id = get_elem_from_obj(song_data, ['id'])
+        self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name'])
+        self.title = get_elem_from_obj(song_data, ['title'])
+
+        self.language = get_elem_from_obj(song_data, ['language'])
+        self.url = get_elem_from_obj(song_data, ['url'])
+
+        # maybe could be implemented
+        self.lyricist: str
+
+        if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
+            logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
+
+        self.valid = self.is_valid()
+        if self.valid:
+            logger.info(f"found lyrics for \"{self.__repr__()}\"")
+
+        self.lyrics: str
+
+    def is_valid(self) -> bool:
+        title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
+        artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
+
+        return title_match and artist_match
+
+    def __repr__(self) -> str:
+        return f"{self.title} by {self.artist}"
+
+    def fetch_lyrics(self) -> str:
+        if not self.valid:
+            logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
+        lyrics = ""
+
+
+        self.lyrics = lyrics
+        return lyrics
+
+
+def build_search_query(artist: str, track: str) -> str:
+    return f"{artist} - {track}"
+
+
+def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]:
+    all_songs = [Song(song_data, desired_data) for song_data in song_datas]
+    return [song for song in all_songs if not song.valid]
+
+
+def search_song_list(artist: str, track: str) -> List[Song]:
+    endpoint = "https://genius.com/api/search/multi?q="
+    url = endpoint + build_search_query(artist, track)
+    logging.info(f"requesting {url}")
+
+    desired_data = {
+        'artist': artist,
+        'track': track
+    }
+
+    r = session.get(url)
+    if r.status_code != 200:
+        logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
+        return []
+    content = r.json()
+    if get_elem_from_obj(content, ['meta', 'status']) != 200:
+        logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
+        return []
+
+    # print(r.status_code)
+    # print(r.json())
+
+    sections = get_elem_from_obj(content, ['response', 'sections'])
+    for section in sections:
+        section_type = get_elem_from_obj(section, ['type'])
+        print(section_type)
+        if section_type == "song":
+            return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
+
+    return []
+
+def search(artist: str, track: str):
+    return search_song_list(artist, track)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+
+    songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
+    print(songs)
--- a/src/tools/object_handeling.py
+++ b/src/tools/object_handeling.py
@ -0,0 +1,24 @@
+from datetime import date
+
+
+def get_elem_from_obj(current_object, keys: list, after_process=lambda x: x, return_if_none=None):
+    current_object = current_object
+    for key in keys:
+        if key in current_object or (type(key) == int and key < len(current_object)):
+            current_object = current_object[key]
+        else:
+            return return_if_none
+    return after_process(current_object)
+
+
+def parse_music_brainz_date(mb_date: str) -> date:
+    year = 1
+    month = 1
+    day = 1
+
+    first_release_date = mb_date
+    if first_release_date.count("-") == 2:
+        year, month, day = [int(i) for i in first_release_date.split("-")]
+    elif first_release_date.count("-") == 0 and first_release_date.isdigit():
+        year = int(first_release_date)
+    return date(year, month, day)
--- a/src/tools/phonetic_compares.py
+++ b/src/tools/phonetic_compares.py
@ -0,0 +1,22 @@
+import jellyfish
+
+TITLE_THRESHOLD_LEVENSHTEIN = 2
+
+
+def match_titles(title_1: str, title_2: str) -> (bool, int):
+    distance = jellyfish.levenshtein_distance(title_1, title_2)
+    return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
+
+
+def match_artists(artist_1, artist_2: str) -> (bool, int):
+    if type(artist_1) == list:
+        distances = []
+
+        for artist_1_ in artist_1:
+            match, distance = match_titles(artist_1_, artist_2)
+            if not match:
+                return match, distance
+
+            distances.append(distance)
+        return True, min(distances)
+    return match_titles(artist_1, artist_2)