From 0abc49f01c192764fb9dbb657d4c0b126184da81 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 10 Nov 2022 10:01:26 +0100 Subject: [PATCH] made the lyrics downloader actually working. Still needs much refactoring attention though --- src/lyrics/genius.py | 27 +++++++++++++++++++-------- src/lyrics_.py | 9 ++++----- src/main.py | 4 +++- src/metadata/search.py | 1 + src/tools/phonetic_compares.py | 9 +++++++++ 5 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/lyrics/genius.py b/src/lyrics/genius.py index 46900d1..32c3ba7 100644 --- a/src/lyrics/genius.py +++ b/src/lyrics/genius.py @@ -4,6 +4,7 @@ import os import logging from typing import List from bs4 import BeautifulSoup +import pycountry current = os.path.dirname(os.path.realpath(__file__)) parent = os.path.dirname(current) @@ -44,7 +45,9 @@ class Song: self.artist = get_elem_from_obj(song_data, ['primary_artist', 'name']) self.title = get_elem_from_obj(song_data, ['title']) - self.language = get_elem_from_obj(song_data, ['language']) + lang_code = get_elem_from_obj(song_data, ['language']) or "en" + self.language = pycountry.languages.get(alpha_2=lang_code) + self.lang = self.language.alpha_3 self.url = get_elem_from_obj(song_data, ['url']) # maybe could be implemented @@ -57,8 +60,12 @@ class Song: self.valid = self.is_valid() if self.valid: logger.info(f"found lyrics for \"{self.__repr__()}\"") + else: + return - self.lyrics: str + self.lyrics = self.fetch_lyrics() + if self.lyrics is None: + self.valid = False def is_valid(self) -> bool: title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track']) @@ -96,7 +103,7 @@ class Song: def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]: all_songs = [Song(song_data, desired_data) for song_data in song_datas] - return [song_ for song_ in all_songs if not song_.valid] + return all_songs def search_song_list(artist: str, track: str) -> List[Song]: @@ -128,14 +135,18 @@ def search_song_list(artist: str, track: str) -> List[Song]: def search(artist: str, track: str): - raw_songs = search_song_list(artist, track) - all_lyrics = [raw_song.fetch_lyrics() for raw_song in raw_songs] - return [i for i in all_lyrics if i is not None] + results = [] + r = search_song_list(artist, track) + for r_ in r: + if r_.valid: + results.append(r_) + return results if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) + """ song = Song( {'highlights': [], 'index': 'song', 'type': 'song', 'result': {'_type': 'song', 'annotation_count': 0, 'api_path': '/songs/6142483', @@ -166,8 +177,8 @@ if __name__ == "__main__": {'artist': 'Psychonaut 4', 'track': 'Sana Sana Sana, Cura Cura Cura'} ) print(song.fetch_lyrics()) + """ - exit() - songs = search("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura") + songs = search("Psychonaut 4", "Sana-sana-sana - Cura-cura-cura") for song in songs: print(song) diff --git a/src/lyrics_.py b/src/lyrics_.py index 2a939ef..696c349 100644 --- a/src/lyrics_.py +++ b/src/lyrics_.py @@ -29,10 +29,9 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly # https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/ -def add_lyrics(file_name, lyrics=""): - print(lyrics) +def add_lyrics(file_name, lyrics): tags = ID3(file_name) - uslt_output = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics) + uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics) tags["USLT::'eng'"] = uslt_output tags.save(file_name) @@ -50,16 +49,16 @@ def fetch_single_lyrics(row: dict): if not os.path.exists(file_): return - print(file_, artist, track) lyrics = genius.search(artist, track) if len(lyrics) == 0: return + print("found something") add_lyrics(file_, lyrics[0]) def fetch_lyrics(database: db.Database): for row in database.get_custom_track([]): - print(row) + print(row['title']) fetch_single_lyrics(row) diff --git a/src/main.py b/src/main.py index a56e0c4..31b2d18 100644 --- a/src/main.py +++ b/src/main.py @@ -75,6 +75,7 @@ def search_for_metadata(): print() print(search.search_from_query(input_)) + print(search.current_option) return search.current_option @@ -109,7 +110,8 @@ def cli(start_at: int = 0): logging.info(f"{genre} has been set as genre.") if start_at <= 0: - search = search_for_metadata() + # search = search_for_metadata() + search = metadata.search.Option("release", "f8d4b24d-2c46-4e9c-8078-0c0f337c84dd", "Beautyfall") logging.info("Starting Downloading of metadata") metadata_downloader = MetadataDownloader(database, METADATA_DOWNLOAD_LOGGER) metadata_downloader.download(search) diff --git a/src/metadata/search.py b/src/metadata/search.py index 158fb31..2a7cb76 100644 --- a/src/metadata/search.py +++ b/src/metadata/search.py @@ -18,6 +18,7 @@ OPTION_TYPES = ['artist', 'release_group', 'release', 'recording'] class Option: def __init__(self, type_: str, id_: str, name: str, additional_info: str = "") -> None: + # print(type_, id_, name) if type_ not in OPTION_TYPES: raise ValueError(f"type: {type_} doesn't exist. Leagal Values: {OPTION_TYPES}") self.type = type_ diff --git a/src/tools/phonetic_compares.py b/src/tools/phonetic_compares.py index facb851..96da52c 100644 --- a/src/tools/phonetic_compares.py +++ b/src/tools/phonetic_compares.py @@ -1,9 +1,18 @@ import jellyfish +import string TITLE_THRESHOLD_LEVENSHTEIN = 2 +UNIFY_TO = " " + + +def unify_punctuation(to_unify: str) -> str: + for char in string.punctuation: + to_unify = to_unify.replace(char, UNIFY_TO) + return to_unify def match_titles(title_1: str, title_2: str) -> (bool, int): + title_1, title_2 = unify_punctuation(title_1).lower(), unify_punctuation(title_2).lower() distance = jellyfish.levenshtein_distance(title_1, title_2) return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance