refactored lyrics fetching
This commit is contained in:
parent
06ae9750ce
commit
d30838e20b
@ -8,7 +8,7 @@ from . import url_to_path
|
||||
from . import download
|
||||
|
||||
# NEEDS REFACTORING
|
||||
from .lyrics_ import fetch_lyrics
|
||||
from .lyrics.lyrics import fetch_lyrics
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
@ -56,7 +56,7 @@ class Song:
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
|
||||
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
|
||||
artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist)
|
||||
|
||||
return not title_match and not artist_match
|
||||
|
||||
|
92
src/lyrics/lyrics.py
Normal file
92
src/lyrics/lyrics.py
Normal file
@ -0,0 +1,92 @@
|
||||
import mutagen
|
||||
from mutagen.id3 import ID3, USLT
|
||||
|
||||
|
||||
from ..metadata import database as db
|
||||
from ..utils.shared import *
|
||||
from . import genius
|
||||
|
||||
logger = LYRICS_LOGGER
|
||||
|
||||
"""
|
||||
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||
Just wanted to get it to work.
|
||||
- lyrics need to be put in the database and everything should continue from there then
|
||||
"""
|
||||
|
||||
"""
|
||||
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
|
||||
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
|
||||
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
|
||||
|
||||
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
|
||||
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
|
||||
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
|
||||
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
|
||||
|
||||
I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
|
||||
"""
|
||||
|
||||
|
||||
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
||||
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
||||
|
||||
|
||||
def add_lyrics(file_name, lyrics):
|
||||
file_path = os.path.join(MUSIC_DIR, file_name)
|
||||
if not os.path.exists(file_path):
|
||||
return
|
||||
|
||||
try:
|
||||
tags = ID3(file_path)
|
||||
except mutagen.id3.ID3NoHeaderError:
|
||||
return
|
||||
|
||||
logger.info(f"adding lyrics to the file {file_path}")
|
||||
|
||||
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
||||
tags["USLT::'eng'"] = uslt_output
|
||||
tags.save(file_path)
|
||||
|
||||
|
||||
def fetch_single_lyrics(row: dict):
|
||||
artists = [artist['name'] for artist in row['artists']]
|
||||
track = row['title']
|
||||
id_ = row['id']
|
||||
|
||||
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
|
||||
|
||||
lyrics = genius.search(artists, track)
|
||||
if len(lyrics) == 0:
|
||||
return
|
||||
|
||||
logger.info("found lyrics")
|
||||
database.add_lyrics(id_, lyrics=lyrics[0])
|
||||
add_lyrics(row['file'], lyrics[0])
|
||||
|
||||
|
||||
def fetch_lyrics():
|
||||
for row in database.get_tracks_for_lyrics():
|
||||
fetch_single_lyrics(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
temp_folder = "music-downloader"
|
||||
temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
|
||||
if not os.path.exists(temp_dir):
|
||||
os.mkdir(temp_dir)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
db_logger = logging.getLogger("database")
|
||||
db_logger.setLevel(logging.DEBUG)
|
||||
|
||||
database = db.Database(os.path.join(temp_dir, "metadata.db"),
|
||||
os.path.join(temp_dir, "database_structure.sql"),
|
||||
"https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
|
||||
db_logger,
|
||||
reset_anyways=False)
|
||||
|
||||
fetch_lyrics()
|
@ -6,6 +6,8 @@ from .utils.shared import *
|
||||
from .lyrics import genius
|
||||
from .utils.shared import *
|
||||
|
||||
logger = LYRICS_LOGGER
|
||||
|
||||
"""
|
||||
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||
Just wanted to get it to work.
|
||||
@ -31,7 +33,12 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly
|
||||
|
||||
|
||||
def add_lyrics(file_name, lyrics):
|
||||
tags = ID3(file_name)
|
||||
file_path = os.path.join(MUSIC_DIR, file_name)
|
||||
if not os.path.exists(file_path):
|
||||
return
|
||||
|
||||
logger.info(f"adding lyrics to the file {file_path}")
|
||||
tags = ID3(file_path)
|
||||
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
||||
tags["USLT::'eng'"] = uslt_output
|
||||
|
||||
@ -46,13 +53,15 @@ def get_lyrics(file_name):
|
||||
def fetch_single_lyrics(row: dict):
|
||||
if "file" in row:
|
||||
return
|
||||
file_ = os.path.join(MUSIC_DIR, row['file'])
|
||||
artist = row['artists'][0]['name']
|
||||
file_ =
|
||||
artists = [artist['name'] for artist in row['artists']]
|
||||
track = row['title']
|
||||
|
||||
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
|
||||
|
||||
if not os.path.exists(file_):
|
||||
return
|
||||
lyrics = genius.search(artist, track)
|
||||
lyrics = genius.search(artists, track)
|
||||
if len(lyrics) == 0:
|
||||
return
|
||||
print("found something")
|
||||
@ -60,8 +69,7 @@ def fetch_single_lyrics(row: dict):
|
||||
|
||||
|
||||
def fetch_lyrics():
|
||||
for row in database.get_custom_track([]):
|
||||
print(row['title'])
|
||||
for row in database.get_tracks_for_lyrics():
|
||||
fetch_single_lyrics(row)
|
||||
|
||||
|
||||
|
@ -205,13 +205,16 @@ GROUP BY track.id;
|
||||
def get_tracks_without_filepath(self):
|
||||
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
|
||||
|
||||
def get_tracks_for_lyrics(self):
|
||||
return self.get_custom_track(["track.lyrics IS NULL"])
|
||||
|
||||
def add_lyrics(self, track_id: str, lyrics: str):
|
||||
query = f"""
|
||||
UPDATE track
|
||||
SET lyrics = ?
|
||||
WHERE '{track_id}' == id;
|
||||
"""
|
||||
self.cursor.execute(query, (lyrics, ))
|
||||
self.cursor.execute(query, (str(lyrics), ))
|
||||
self.connection.commit()
|
||||
|
||||
def update_download_status(self, track_id: str):
|
||||
|
@ -28,13 +28,13 @@ def modify_title(to_modify: str) -> str:
|
||||
return to_modify
|
||||
|
||||
|
||||
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
||||
def match_titles(title_1: str, title_2: str):
|
||||
title_1, title_2 = modify_title(title_1), modify_title(title_2)
|
||||
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
||||
|
||||
|
||||
def match_artists(artist_1, artist_2: str) -> (bool, int):
|
||||
def match_artists(artist_1, artist_2: str):
|
||||
if type(artist_1) == list:
|
||||
distances = []
|
||||
|
||||
|
@ -9,7 +9,7 @@ TEMP_FOLDER = "music-downloader"
|
||||
LOG_FILE = "download_logs.log"
|
||||
DATABASE_FILE = "metadata.db"
|
||||
DATABASE_STRUCTURE_FILE = "database_structure.sql"
|
||||
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql"
|
||||
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql"
|
||||
|
||||
SEARCH_LOGGER = logging.getLogger("mb-cli")
|
||||
DATABASE_LOGGER = logging.getLogger("database")
|
||||
@ -17,6 +17,7 @@ METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
|
||||
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
|
||||
PATH_LOGGER = logging.getLogger("create-paths")
|
||||
DOWNLOAD_LOGGER = logging.getLogger("download")
|
||||
LYRICS_LOGGER = logging.getLogger("lyrics")
|
||||
GENIUS_LOGGER = logging.getLogger("genius")
|
||||
|
||||
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"
|
||||
|
Loading…
Reference in New Issue
Block a user