startet genius lyrics fetcher

This commit is contained in:
lars 2022-11-09 19:01:13 +01:00
parent 527af38098
commit 3bc133ef62
8 changed files with 203 additions and 6 deletions

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (music-downloader)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
</project>

View File

@ -3,6 +3,7 @@
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/music-downloader.iml" filepath="$PROJECT_DIR$/.idea/music-downloader.iml" />
<module fileurl="file://$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" filepath="$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" />
</modules>
</component>
</project>

View File

@ -6,5 +6,6 @@
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (music-downloader)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module" module-name="rythmbox-id3-lyrics-support" />
</component>
</module>

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -14,8 +14,8 @@ would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
MP3_PATH = "/home/lars/Music/metalcore/I Prevail/TRAUMA/Breaking Down.mp3"
LYRICS = """
MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3"
LYRICS_BREAKING_DOWN = """
[Chorus: Brian Burkheiser]
I think, I think too much
I'm a little bit paranoid, I think Im breaking
@ -78,6 +78,32 @@ I don't really like myself
I don't really like myself
I think I'm breaking down
"""
LYRICS_ECLIPSE = """
Your offerings have consecrated
They are marked by the brand
The sun has seen it's fifth death
For the red lake to flow again
He will
Feel their pain in order to
Complete the final transformation
A name new and old
Your offerings have been consecrated by the laws of Causality
Falcon of Darkness
Send us into an age of abyss
Blinded by beauty
With stacks of bodies as high as the eye can see
Feast, apostles, feast
The one chosen by the hand of God
The master of the sinful black sheep
And the king of the faithful blind
Welcome to the new age
Welcome to the new age
We are the branded ones"""
def add_lyrics(file_name, lyrics=""):
tags = ID3(file_name)
@ -91,5 +117,5 @@ def get_lyrics(file_name):
return tags.getall("USLT")
if __name__ == "__main__":
add_lyrics(MP3_PATH, lyrics=LYRICS)
add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE)
print(get_lyrics(MP3_PATH))

View File

@ -1,4 +1,127 @@
import requests
import sys
import os
import logging
from typing import List
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from tools import phonetic_compares
from tools.object_handeling import get_elem_from_obj
# search doesn't support isrc
# https://genius.com/api/search/multi?q=I Prevail - Breaking Down
# https://genius.com/api/songs/6192944
# https://docs.genius.com/
session = requests.Session()
session.headers = {
"Connection": "keep-alive",
"Referer": "https://genius.com/search/embed"
}
logger = logging.getLogger("genius")
def set_proxy(proxies: dict):
session.proxies = proxies
def set_logger(logger_: logging.Logger):
global logger
logger = logger_
class Song:
def __init__(self, raw_data: dict, desirered_data: dict):
self.raw_data = raw_data
self.desired_data = desirered_data
song_data = get_elem_from_obj(self.raw_data, ['result'], return_if_none={})
self.id = get_elem_from_obj(song_data, ['id'])
self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name'])
self.title = get_elem_from_obj(song_data, ['title'])
self.language = get_elem_from_obj(song_data, ['language'])
self.url = get_elem_from_obj(song_data, ['url'])
# maybe could be implemented
self.lyricist: str
if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
self.valid = self.is_valid()
if self.valid:
logger.info(f"found lyrics for \"{self.__repr__()}\"")
self.lyrics: str
def is_valid(self) -> bool:
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
return title_match and artist_match
def __repr__(self) -> str:
return f"{self.title} by {self.artist}"
def fetch_lyrics(self) -> str:
if not self.valid:
logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
lyrics = ""
self.lyrics = lyrics
return lyrics
def build_search_query(artist: str, track: str) -> str:
return f"{artist} - {track}"
def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]:
all_songs = [Song(song_data, desired_data) for song_data in song_datas]
return [song for song in all_songs if not song.valid]
def search_song_list(artist: str, track: str) -> List[Song]:
endpoint = "https://genius.com/api/search/multi?q="
url = endpoint + build_search_query(artist, track)
logging.info(f"requesting {url}")
desired_data = {
'artist': artist,
'track': track
}
r = session.get(url)
if r.status_code != 200:
logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
return []
content = r.json()
if get_elem_from_obj(content, ['meta', 'status']) != 200:
logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
return []
# print(r.status_code)
# print(r.json())
sections = get_elem_from_obj(content, ['response', 'sections'])
for section in sections:
section_type = get_elem_from_obj(section, ['type'])
print(section_type)
if section_type == "song":
return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
return []
def search(artist: str, track: str):
return search_song_list(artist, track)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
print(songs)

View File

@ -0,0 +1,24 @@
from datetime import date
def get_elem_from_obj(current_object, keys: list, after_process=lambda x: x, return_if_none=None):
current_object = current_object
for key in keys:
if key in current_object or (type(key) == int and key < len(current_object)):
current_object = current_object[key]
else:
return return_if_none
return after_process(current_object)
def parse_music_brainz_date(mb_date: str) -> date:
year = 1
month = 1
day = 1
first_release_date = mb_date
if first_release_date.count("-") == 2:
year, month, day = [int(i) for i in first_release_date.split("-")]
elif first_release_date.count("-") == 0 and first_release_date.isdigit():
year = int(first_release_date)
return date(year, month, day)

View File

@ -0,0 +1,22 @@
import jellyfish
TITLE_THRESHOLD_LEVENSHTEIN = 2
def match_titles(title_1: str, title_2: str) -> (bool, int):
distance = jellyfish.levenshtein_distance(title_1, title_2)
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
def match_artists(artist_1, artist_2: str) -> (bool, int):
if type(artist_1) == list:
distances = []
for artist_1_ in artist_1:
match, distance = match_titles(artist_1_, artist_2)
if not match:
return match, distance
distances.append(distance)
return True, min(distances)
return match_titles(artist_1, artist_2)