refactored lyrics fetching

This commit is contained in:
Lars Noack 2022-11-14 15:44:32 +01:00
parent 06ae9750ce
commit d30838e20b
7 changed files with 116 additions and 12 deletions

View File

@ -8,7 +8,7 @@ from . import url_to_path
from . import download
# NEEDS REFACTORING
from .lyrics_ import fetch_lyrics
from .lyrics.lyrics import fetch_lyrics
import logging
import os

View File

@ -56,7 +56,7 @@ class Song:
def is_valid(self) -> bool:
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist)
return not title_match and not artist_match

92
src/lyrics/lyrics.py Normal file
View File

@ -0,0 +1,92 @@
import mutagen
from mutagen.id3 import ID3, USLT
from ..metadata import database as db
from ..utils.shared import *
from . import genius
logger = LYRICS_LOGGER
"""
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
Just wanted to get it to work.
- lyrics need to be put in the database and everything should continue from there then
"""
"""
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
"""
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
def add_lyrics(file_name, lyrics):
file_path = os.path.join(MUSIC_DIR, file_name)
if not os.path.exists(file_path):
return
try:
tags = ID3(file_path)
except mutagen.id3.ID3NoHeaderError:
return
logger.info(f"adding lyrics to the file {file_path}")
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
tags["USLT::'eng'"] = uslt_output
tags.save(file_path)
def fetch_single_lyrics(row: dict):
artists = [artist['name'] for artist in row['artists']]
track = row['title']
id_ = row['id']
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
lyrics = genius.search(artists, track)
if len(lyrics) == 0:
return
logger.info("found lyrics")
database.add_lyrics(id_, lyrics=lyrics[0])
add_lyrics(row['file'], lyrics[0])
def fetch_lyrics():
for row in database.get_tracks_for_lyrics():
fetch_single_lyrics(row)
if __name__ == "__main__":
import tempfile
import os
temp_folder = "music-downloader"
temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
if not os.path.exists(temp_dir):
os.mkdir(temp_dir)
logging.basicConfig(level=logging.DEBUG)
db_logger = logging.getLogger("database")
db_logger.setLevel(logging.DEBUG)
database = db.Database(os.path.join(temp_dir, "metadata.db"),
os.path.join(temp_dir, "database_structure.sql"),
"https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
db_logger,
reset_anyways=False)
fetch_lyrics()

View File

@ -6,6 +6,8 @@ from .utils.shared import *
from .lyrics import genius
from .utils.shared import *
logger = LYRICS_LOGGER
"""
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
Just wanted to get it to work.
@ -31,7 +33,12 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly
def add_lyrics(file_name, lyrics):
tags = ID3(file_name)
file_path = os.path.join(MUSIC_DIR, file_name)
if not os.path.exists(file_path):
return
logger.info(f"adding lyrics to the file {file_path}")
tags = ID3(file_path)
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
tags["USLT::'eng'"] = uslt_output
@ -46,13 +53,15 @@ def get_lyrics(file_name):
def fetch_single_lyrics(row: dict):
if "file" in row:
return
file_ = os.path.join(MUSIC_DIR, row['file'])
artist = row['artists'][0]['name']
file_ =
artists = [artist['name'] for artist in row['artists']]
track = row['title']
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
if not os.path.exists(file_):
return
lyrics = genius.search(artist, track)
lyrics = genius.search(artists, track)
if len(lyrics) == 0:
return
print("found something")
@ -60,8 +69,7 @@ def fetch_single_lyrics(row: dict):
def fetch_lyrics():
for row in database.get_custom_track([]):
print(row['title'])
for row in database.get_tracks_for_lyrics():
fetch_single_lyrics(row)

View File

@ -205,13 +205,16 @@ GROUP BY track.id;
def get_tracks_without_filepath(self):
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
def get_tracks_for_lyrics(self):
return self.get_custom_track(["track.lyrics IS NULL"])
def add_lyrics(self, track_id: str, lyrics: str):
query = f"""
UPDATE track
SET lyrics = ?
WHERE '{track_id}' == id;
"""
self.cursor.execute(query, (lyrics, ))
self.cursor.execute(query, (str(lyrics), ))
self.connection.commit()
def update_download_status(self, track_id: str):

View File

@ -28,13 +28,13 @@ def modify_title(to_modify: str) -> str:
return to_modify
def match_titles(title_1: str, title_2: str) -> (bool, int):
def match_titles(title_1: str, title_2: str):
title_1, title_2 = modify_title(title_1), modify_title(title_2)
distance = jellyfish.levenshtein_distance(title_1, title_2)
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
def match_artists(artist_1, artist_2: str) -> (bool, int):
def match_artists(artist_1, artist_2: str):
if type(artist_1) == list:
distances = []

View File

@ -9,7 +9,7 @@ TEMP_FOLDER = "music-downloader"
LOG_FILE = "download_logs.log"
DATABASE_FILE = "metadata.db"
DATABASE_STRUCTURE_FILE = "database_structure.sql"
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql"
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql"
SEARCH_LOGGER = logging.getLogger("mb-cli")
DATABASE_LOGGER = logging.getLogger("database")
@ -17,6 +17,7 @@ METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
PATH_LOGGER = logging.getLogger("create-paths")
DOWNLOAD_LOGGER = logging.getLogger("download")
LYRICS_LOGGER = logging.getLogger("lyrics")
GENIUS_LOGGER = logging.getLogger("genius")
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"