From d30838e20ba836ba60d332ecd1263ddf65080588 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Mon, 14 Nov 2022 15:44:32 +0100 Subject: [PATCH] refactored lyrics fetching --- src/__main__.py | 2 +- src/lyrics/genius.py | 2 +- src/lyrics/lyrics.py | 92 ++++++++++++++++++++++++++++++++++ src/lyrics_.py | 20 +++++--- src/metadata/database.py | 5 +- src/utils/phonetic_compares.py | 4 +- src/utils/shared.py | 3 +- 7 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 src/lyrics/lyrics.py diff --git a/src/__main__.py b/src/__main__.py index 7215c4e..53425d9 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -8,7 +8,7 @@ from . import url_to_path from . import download # NEEDS REFACTORING -from .lyrics_ import fetch_lyrics +from .lyrics.lyrics import fetch_lyrics import logging import os diff --git a/src/lyrics/genius.py b/src/lyrics/genius.py index 6a4040e..9c8a241 100644 --- a/src/lyrics/genius.py +++ b/src/lyrics/genius.py @@ -56,7 +56,7 @@ class Song: def is_valid(self) -> bool: title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track']) - artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist']) + artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist) return not title_match and not artist_match diff --git a/src/lyrics/lyrics.py b/src/lyrics/lyrics.py new file mode 100644 index 0000000..3eb4ec7 --- /dev/null +++ b/src/lyrics/lyrics.py @@ -0,0 +1,92 @@ +import mutagen +from mutagen.id3 import ID3, USLT + + +from ..metadata import database as db +from ..utils.shared import * +from . import genius + +logger = LYRICS_LOGGER + +""" +This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3 +Just wanted to get it to work. + - lyrics need to be put in the database and everything should continue from there then +""" + +""" +https://cweiske.de/tagebuch/rhythmbox-lyrics.htm +Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else +Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet. + +One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox +'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.' +Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN) +would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT + +I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support +""" + + +# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3 +# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/ + + +def add_lyrics(file_name, lyrics): + file_path = os.path.join(MUSIC_DIR, file_name) + if not os.path.exists(file_path): + return + + try: + tags = ID3(file_path) + except mutagen.id3.ID3NoHeaderError: + return + + logger.info(f"adding lyrics to the file {file_path}") + + uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics) + tags["USLT::'eng'"] = uslt_output + tags.save(file_path) + + +def fetch_single_lyrics(row: dict): + artists = [artist['name'] for artist in row['artists']] + track = row['title'] + id_ = row['id'] + + logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}") + + lyrics = genius.search(artists, track) + if len(lyrics) == 0: + return + + logger.info("found lyrics") + database.add_lyrics(id_, lyrics=lyrics[0]) + add_lyrics(row['file'], lyrics[0]) + + +def fetch_lyrics(): + for row in database.get_tracks_for_lyrics(): + fetch_single_lyrics(row) + + +if __name__ == "__main__": + import tempfile + import os + + temp_folder = "music-downloader" + temp_dir = os.path.join(tempfile.gettempdir(), temp_folder) + if not os.path.exists(temp_dir): + os.mkdir(temp_dir) + + logging.basicConfig(level=logging.DEBUG) + db_logger = logging.getLogger("database") + db_logger.setLevel(logging.DEBUG) + + database = db.Database(os.path.join(temp_dir, "metadata.db"), + os.path.join(temp_dir, "database_structure.sql"), + "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql", + db_logger, + reset_anyways=False) + + fetch_lyrics() diff --git a/src/lyrics_.py b/src/lyrics_.py index c2a49bf..648daab 100644 --- a/src/lyrics_.py +++ b/src/lyrics_.py @@ -6,6 +6,8 @@ from .utils.shared import * from .lyrics import genius from .utils.shared import * +logger = LYRICS_LOGGER + """ This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3 Just wanted to get it to work. @@ -31,7 +33,12 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly def add_lyrics(file_name, lyrics): - tags = ID3(file_name) + file_path = os.path.join(MUSIC_DIR, file_name) + if not os.path.exists(file_path): + return + + logger.info(f"adding lyrics to the file {file_path}") + tags = ID3(file_path) uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics) tags["USLT::'eng'"] = uslt_output @@ -46,13 +53,15 @@ def get_lyrics(file_name): def fetch_single_lyrics(row: dict): if "file" in row: return - file_ = os.path.join(MUSIC_DIR, row['file']) - artist = row['artists'][0]['name'] + file_ = + artists = [artist['name'] for artist in row['artists']] track = row['title'] + logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}") + if not os.path.exists(file_): return - lyrics = genius.search(artist, track) + lyrics = genius.search(artists, track) if len(lyrics) == 0: return print("found something") @@ -60,8 +69,7 @@ def fetch_single_lyrics(row: dict): def fetch_lyrics(): - for row in database.get_custom_track([]): - print(row['title']) + for row in database.get_tracks_for_lyrics(): fetch_single_lyrics(row) diff --git a/src/metadata/database.py b/src/metadata/database.py index fad96b2..d32a416 100644 --- a/src/metadata/database.py +++ b/src/metadata/database.py @@ -205,13 +205,16 @@ GROUP BY track.id; def get_tracks_without_filepath(self): return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"]) + def get_tracks_for_lyrics(self): + return self.get_custom_track(["track.lyrics IS NULL"]) + def add_lyrics(self, track_id: str, lyrics: str): query = f""" UPDATE track SET lyrics = ? WHERE '{track_id}' == id; """ - self.cursor.execute(query, (lyrics, )) + self.cursor.execute(query, (str(lyrics), )) self.connection.commit() def update_download_status(self, track_id: str): diff --git a/src/utils/phonetic_compares.py b/src/utils/phonetic_compares.py index d2d18d5..034eb81 100644 --- a/src/utils/phonetic_compares.py +++ b/src/utils/phonetic_compares.py @@ -28,13 +28,13 @@ def modify_title(to_modify: str) -> str: return to_modify -def match_titles(title_1: str, title_2: str) -> (bool, int): +def match_titles(title_1: str, title_2: str): title_1, title_2 = modify_title(title_1), modify_title(title_2) distance = jellyfish.levenshtein_distance(title_1, title_2) return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance -def match_artists(artist_1, artist_2: str) -> (bool, int): +def match_artists(artist_1, artist_2: str): if type(artist_1) == list: distances = [] diff --git a/src/utils/shared.py b/src/utils/shared.py index a69a969..6c6f190 100644 --- a/src/utils/shared.py +++ b/src/utils/shared.py @@ -9,7 +9,7 @@ TEMP_FOLDER = "music-downloader" LOG_FILE = "download_logs.log" DATABASE_FILE = "metadata.db" DATABASE_STRUCTURE_FILE = "database_structure.sql" -DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql" +DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql" SEARCH_LOGGER = logging.getLogger("mb-cli") DATABASE_LOGGER = logging.getLogger("database") @@ -17,6 +17,7 @@ METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download") URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download") PATH_LOGGER = logging.getLogger("create-paths") DOWNLOAD_LOGGER = logging.getLogger("download") +LYRICS_LOGGER = logging.getLogger("lyrics") GENIUS_LOGGER = logging.getLogger("genius") NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"