startet genius lyrics fetcher
This commit is contained in:
parent
527af38098
commit
3bc133ef62
@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (music-downloader)" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
@ -3,6 +3,7 @@
|
|||||||
<component name="ProjectModuleManager">
|
<component name="ProjectModuleManager">
|
||||||
<modules>
|
<modules>
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/music-downloader.iml" filepath="$PROJECT_DIR$/.idea/music-downloader.iml" />
|
<module fileurl="file://$PROJECT_DIR$/.idea/music-downloader.iml" filepath="$PROJECT_DIR$/.idea/music-downloader.iml" />
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" filepath="$PROJECT_DIR$/../rythmbox-id3-lyrics-support/.idea/rythmbox-id3-lyrics-support.iml" />
|
||||||
</modules>
|
</modules>
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
@ -6,5 +6,6 @@
|
|||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.10 (music-downloader)" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="Python 3.10 (music-downloader)" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
<orderEntry type="module" module-name="rythmbox-id3-lyrics-support" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
@ -1,6 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
<mapping directory="" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
@ -14,8 +14,8 @@ would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
|
|||||||
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
||||||
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
||||||
|
|
||||||
MP3_PATH = "/home/lars/Music/metalcore/I Prevail/TRAUMA/Breaking Down.mp3"
|
MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3"
|
||||||
LYRICS = """
|
LYRICS_BREAKING_DOWN = """
|
||||||
[Chorus: Brian Burkheiser]
|
[Chorus: Brian Burkheiser]
|
||||||
I think, I think too much
|
I think, I think too much
|
||||||
I'm a little bit paranoid, I think I’m breaking
|
I'm a little bit paranoid, I think I’m breaking
|
||||||
@ -78,6 +78,32 @@ I don't really like myself
|
|||||||
I don't really like myself
|
I don't really like myself
|
||||||
I think I'm breaking down
|
I think I'm breaking down
|
||||||
"""
|
"""
|
||||||
|
LYRICS_ECLIPSE = """
|
||||||
|
Your offerings have consecrated
|
||||||
|
They are marked by the brand
|
||||||
|
The sun has seen it's fifth death
|
||||||
|
For the red lake to flow again
|
||||||
|
|
||||||
|
He will
|
||||||
|
Feel their pain in order to
|
||||||
|
Complete the final transformation
|
||||||
|
A name new and old
|
||||||
|
|
||||||
|
Your offerings have been consecrated by the laws of Causality
|
||||||
|
Falcon of Darkness
|
||||||
|
Send us into an age of abyss
|
||||||
|
Blinded by beauty
|
||||||
|
With stacks of bodies as high as the eye can see
|
||||||
|
Feast, apostles, feast
|
||||||
|
|
||||||
|
The one chosen by the hand of God
|
||||||
|
The master of the sinful black sheep
|
||||||
|
And the king of the faithful blind
|
||||||
|
|
||||||
|
Welcome to the new age
|
||||||
|
Welcome to the new age
|
||||||
|
We are the branded ones"""
|
||||||
|
|
||||||
|
|
||||||
def add_lyrics(file_name, lyrics=""):
|
def add_lyrics(file_name, lyrics=""):
|
||||||
tags = ID3(file_name)
|
tags = ID3(file_name)
|
||||||
@ -91,5 +117,5 @@ def get_lyrics(file_name):
|
|||||||
return tags.getall("USLT")
|
return tags.getall("USLT")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
add_lyrics(MP3_PATH, lyrics=LYRICS)
|
add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE)
|
||||||
print(get_lyrics(MP3_PATH))
|
print(get_lyrics(MP3_PATH))
|
||||||
|
@ -1,4 +1,127 @@
|
|||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
sys.path.append(parent)
|
||||||
|
from tools import phonetic_compares
|
||||||
|
from tools.object_handeling import get_elem_from_obj
|
||||||
|
|
||||||
# search doesn't support isrc
|
# search doesn't support isrc
|
||||||
# https://genius.com/api/search/multi?q=I Prevail - Breaking Down
|
# https://genius.com/api/search/multi?q=I Prevail - Breaking Down
|
||||||
# https://genius.com/api/songs/6192944
|
# https://genius.com/api/songs/6192944
|
||||||
# https://docs.genius.com/
|
# https://docs.genius.com/
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers = {
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"Referer": "https://genius.com/search/embed"
|
||||||
|
}
|
||||||
|
logger = logging.getLogger("genius")
|
||||||
|
|
||||||
|
|
||||||
|
def set_proxy(proxies: dict):
|
||||||
|
session.proxies = proxies
|
||||||
|
|
||||||
|
|
||||||
|
def set_logger(logger_: logging.Logger):
|
||||||
|
global logger
|
||||||
|
logger = logger_
|
||||||
|
|
||||||
|
|
||||||
|
class Song:
|
||||||
|
def __init__(self, raw_data: dict, desirered_data: dict):
|
||||||
|
self.raw_data = raw_data
|
||||||
|
self.desired_data = desirered_data
|
||||||
|
|
||||||
|
song_data = get_elem_from_obj(self.raw_data, ['result'], return_if_none={})
|
||||||
|
self.id = get_elem_from_obj(song_data, ['id'])
|
||||||
|
self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name'])
|
||||||
|
self.title = get_elem_from_obj(song_data, ['title'])
|
||||||
|
|
||||||
|
self.language = get_elem_from_obj(song_data, ['language'])
|
||||||
|
self.url = get_elem_from_obj(song_data, ['url'])
|
||||||
|
|
||||||
|
# maybe could be implemented
|
||||||
|
self.lyricist: str
|
||||||
|
|
||||||
|
if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
|
||||||
|
logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
|
||||||
|
|
||||||
|
self.valid = self.is_valid()
|
||||||
|
if self.valid:
|
||||||
|
logger.info(f"found lyrics for \"{self.__repr__()}\"")
|
||||||
|
|
||||||
|
self.lyrics: str
|
||||||
|
|
||||||
|
def is_valid(self) -> bool:
|
||||||
|
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
|
||||||
|
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
|
||||||
|
|
||||||
|
return title_match and artist_match
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"{self.title} by {self.artist}"
|
||||||
|
|
||||||
|
def fetch_lyrics(self) -> str:
|
||||||
|
if not self.valid:
|
||||||
|
logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
|
||||||
|
lyrics = ""
|
||||||
|
|
||||||
|
|
||||||
|
self.lyrics = lyrics
|
||||||
|
return lyrics
|
||||||
|
|
||||||
|
|
||||||
|
def build_search_query(artist: str, track: str) -> str:
|
||||||
|
return f"{artist} - {track}"
|
||||||
|
|
||||||
|
|
||||||
|
def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]:
|
||||||
|
all_songs = [Song(song_data, desired_data) for song_data in song_datas]
|
||||||
|
return [song for song in all_songs if not song.valid]
|
||||||
|
|
||||||
|
|
||||||
|
def search_song_list(artist: str, track: str) -> List[Song]:
|
||||||
|
endpoint = "https://genius.com/api/search/multi?q="
|
||||||
|
url = endpoint + build_search_query(artist, track)
|
||||||
|
logging.info(f"requesting {url}")
|
||||||
|
|
||||||
|
desired_data = {
|
||||||
|
'artist': artist,
|
||||||
|
'track': track
|
||||||
|
}
|
||||||
|
|
||||||
|
r = session.get(url)
|
||||||
|
if r.status_code != 200:
|
||||||
|
logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
|
||||||
|
return []
|
||||||
|
content = r.json()
|
||||||
|
if get_elem_from_obj(content, ['meta', 'status']) != 200:
|
||||||
|
logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# print(r.status_code)
|
||||||
|
# print(r.json())
|
||||||
|
|
||||||
|
sections = get_elem_from_obj(content, ['response', 'sections'])
|
||||||
|
for section in sections:
|
||||||
|
section_type = get_elem_from_obj(section, ['type'])
|
||||||
|
print(section_type)
|
||||||
|
if section_type == "song":
|
||||||
|
return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
def search(artist: str, track: str):
|
||||||
|
return search_song_list(artist, track)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
|
||||||
|
print(songs)
|
||||||
|
24
src/tools/object_handeling.py
Normal file
24
src/tools/object_handeling.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from datetime import date
|
||||||
|
|
||||||
|
|
||||||
|
def get_elem_from_obj(current_object, keys: list, after_process=lambda x: x, return_if_none=None):
|
||||||
|
current_object = current_object
|
||||||
|
for key in keys:
|
||||||
|
if key in current_object or (type(key) == int and key < len(current_object)):
|
||||||
|
current_object = current_object[key]
|
||||||
|
else:
|
||||||
|
return return_if_none
|
||||||
|
return after_process(current_object)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_music_brainz_date(mb_date: str) -> date:
|
||||||
|
year = 1
|
||||||
|
month = 1
|
||||||
|
day = 1
|
||||||
|
|
||||||
|
first_release_date = mb_date
|
||||||
|
if first_release_date.count("-") == 2:
|
||||||
|
year, month, day = [int(i) for i in first_release_date.split("-")]
|
||||||
|
elif first_release_date.count("-") == 0 and first_release_date.isdigit():
|
||||||
|
year = int(first_release_date)
|
||||||
|
return date(year, month, day)
|
22
src/tools/phonetic_compares.py
Normal file
22
src/tools/phonetic_compares.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import jellyfish
|
||||||
|
|
||||||
|
TITLE_THRESHOLD_LEVENSHTEIN = 2
|
||||||
|
|
||||||
|
|
||||||
|
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
||||||
|
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||||
|
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
||||||
|
|
||||||
|
|
||||||
|
def match_artists(artist_1, artist_2: str) -> (bool, int):
|
||||||
|
if type(artist_1) == list:
|
||||||
|
distances = []
|
||||||
|
|
||||||
|
for artist_1_ in artist_1:
|
||||||
|
match, distance = match_titles(artist_1_, artist_2)
|
||||||
|
if not match:
|
||||||
|
return match, distance
|
||||||
|
|
||||||
|
distances.append(distance)
|
||||||
|
return True, min(distances)
|
||||||
|
return match_titles(artist_1, artist_2)
|
Loading…
Reference in New Issue
Block a user