startet genius lyrics fetcher

2022-11-09 19:01:13 +01:00
parent 527af38098
commit 3bc133ef62
8 changed files with 203 additions and 6 deletions
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (music-downloader)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -3,6 +3,7 @@
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/music-downloader.iml" filepath="$PROJECT_DIR$/.idea/music-downloader.iml" />
      <module fileurl="file://$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" filepath="$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/music-downloader.iml
+++ b/.idea/music-downloader.iml
@@ -6,5 +6,6 @@
    </content>
    <orderEntry type="jdk" jdkName="Python 3.10 (music-downloader)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
    <orderEntry type="module" module-name="rythmbox-id3-lyrics-support" />
  </component>
 </module>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+    <mapping directory="" vcs="Git" />
  </component>
 </project>
--- a/src/lyrics.py
+++ b/src/lyrics.py
@@ -14,8 +14,8 @@ would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
 # https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
 # https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
-MP3_PATH = "/home/lars/Music/metalcore/I Prevail/TRAUMA/Breaking Down.mp3"
+MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3"
-LYRICS = """
+LYRICS_BREAKING_DOWN = """
 [Chorus: Brian Burkheiser]
 I think, I think too much
 I'm a little bit paranoid, I think I’m breaking
@@ -78,6 +78,32 @@ I don't really like myself
 I don't really like myself
 I think I'm breaking down
 """
 LYRICS_ECLIPSE = """
 Your offerings have consecrated
 They are marked by the brand
 The sun has seen it's fifth death
 For the red lake to flow again
 He will
 Feel their pain in order to
 Complete the final transformation
 A name new and old
 Your offerings have been consecrated by the laws of Causality
 Falcon of Darkness
 Send us into an age of abyss
 Blinded by beauty
 With stacks of bodies as high as the eye can see
 Feast, apostles, feast
 The one chosen by the hand of God
 The master of the sinful black sheep
 And the king of the faithful blind
 Welcome to the new age
 Welcome to the new age
 We are the branded ones"""
 def add_lyrics(file_name, lyrics=""):
    tags = ID3(file_name)
@@ -91,5 +117,5 @@ def get_lyrics(file_name):
    return tags.getall("USLT")
 if __name__ == "__main__":
-    add_lyrics(MP3_PATH, lyrics=LYRICS)
+    add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE)
    print(get_lyrics(MP3_PATH))
--- a/src/lyrics/genius.py
+++ b/src/lyrics/genius.py
@@ -1,4 +1,127 @@
 import requests
 import sys
 import os
 import logging
 from typing import List
 current = os.path.dirname(os.path.realpath(__file__))
 parent = os.path.dirname(current)
 sys.path.append(parent)
 from tools import phonetic_compares
 from tools.object_handeling import get_elem_from_obj
 # search doesn't support isrc
 # https://genius.com/api/search/multi?q=I Prevail - Breaking Down
 # https://genius.com/api/songs/6192944
 # https://docs.genius.com/
 session = requests.Session()
 session.headers = {
    "Connection": "keep-alive",
    "Referer": "https://genius.com/search/embed"
 }
 logger = logging.getLogger("genius")
 def set_proxy(proxies: dict):
    session.proxies = proxies
 def set_logger(logger_: logging.Logger):
    global logger
    logger = logger_
 class Song:
    def __init__(self, raw_data: dict, desirered_data: dict):
        self.raw_data = raw_data
        self.desired_data = desirered_data
        song_data = get_elem_from_obj(self.raw_data, ['result'], return_if_none={})
        self.id = get_elem_from_obj(song_data, ['id'])
        self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name'])
        self.title = get_elem_from_obj(song_data, ['title'])
        self.language = get_elem_from_obj(song_data, ['language'])
        self.url = get_elem_from_obj(song_data, ['url'])
        # maybe could be implemented
        self.lyricist: str
        if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
            logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
        self.valid = self.is_valid()
        if self.valid:
            logger.info(f"found lyrics for \"{self.__repr__()}\"")
        self.lyrics: str
    def is_valid(self) -> bool:
        title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
        artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
        return title_match and artist_match
    def __repr__(self) -> str:
        return f"{self.title} by {self.artist}"
    def fetch_lyrics(self) -> str:
        if not self.valid:
            logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
        lyrics = ""
        self.lyrics = lyrics
        return lyrics
 def build_search_query(artist: str, track: str) -> str:
    return f"{artist} - {track}"
 def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]:
    all_songs = [Song(song_data, desired_data) for song_data in song_datas]
    return [song for song in all_songs if not song.valid]
 def search_song_list(artist: str, track: str) -> List[Song]:
    endpoint = "https://genius.com/api/search/multi?q="
    url = endpoint + build_search_query(artist, track)
    logging.info(f"requesting {url}")
    desired_data = {
        'artist': artist,
        'track': track
    }
    r = session.get(url)
    if r.status_code != 200:
        logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
        return []
    content = r.json()
    if get_elem_from_obj(content, ['meta', 'status']) != 200:
        logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
        return []
    # print(r.status_code)
    # print(r.json())
    sections = get_elem_from_obj(content, ['response', 'sections'])
    for section in sections:
        section_type = get_elem_from_obj(section, ['type'])
        print(section_type)
        if section_type == "song":
            return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
    return []
 def search(artist: str, track: str):
    return search_song_list(artist, track)
 if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
    print(songs)
--- a/src/tools/object_handeling.py
+++ b/src/tools/object_handeling.py
@@ -0,0 +1,24 @@
 from datetime import date
 def get_elem_from_obj(current_object, keys: list, after_process=lambda x: x, return_if_none=None):
    current_object = current_object
    for key in keys:
        if key in current_object or (type(key) == int and key < len(current_object)):
            current_object = current_object[key]
        else:
            return return_if_none
    return after_process(current_object)
 def parse_music_brainz_date(mb_date: str) -> date:
    year = 1
    month = 1
    day = 1
    first_release_date = mb_date
    if first_release_date.count("-") == 2:
        year, month, day = [int(i) for i in first_release_date.split("-")]
    elif first_release_date.count("-") == 0 and first_release_date.isdigit():
        year = int(first_release_date)
    return date(year, month, day)
--- a/src/tools/phonetic_compares.py
+++ b/src/tools/phonetic_compares.py
@@ -0,0 +1,22 @@
 import jellyfish
 TITLE_THRESHOLD_LEVENSHTEIN = 2
 def match_titles(title_1: str, title_2: str) -> (bool, int):
    distance = jellyfish.levenshtein_distance(title_1, title_2)
    return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
 def match_artists(artist_1, artist_2: str) -> (bool, int):
    if type(artist_1) == list:
        distances = []
        for artist_1_ in artist_1:
            match, distance = match_titles(artist_1_, artist_2)
            if not match:
                return match, distance
            distances.append(distance)
        return True, min(distances)
    return match_titles(artist_1, artist_2)