refactored lyrics fetching
This commit is contained in:
parent
06ae9750ce
commit
d30838e20b
@ -8,7 +8,7 @@ from . import url_to_path
|
|||||||
from . import download
|
from . import download
|
||||||
|
|
||||||
# NEEDS REFACTORING
|
# NEEDS REFACTORING
|
||||||
from .lyrics_ import fetch_lyrics
|
from .lyrics.lyrics import fetch_lyrics
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
@ -56,7 +56,7 @@ class Song:
|
|||||||
|
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
|
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
|
||||||
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
|
artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist)
|
||||||
|
|
||||||
return not title_match and not artist_match
|
return not title_match and not artist_match
|
||||||
|
|
||||||
|
92
src/lyrics/lyrics.py
Normal file
92
src/lyrics/lyrics.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
import mutagen
|
||||||
|
from mutagen.id3 import ID3, USLT
|
||||||
|
|
||||||
|
|
||||||
|
from ..metadata import database as db
|
||||||
|
from ..utils.shared import *
|
||||||
|
from . import genius
|
||||||
|
|
||||||
|
logger = LYRICS_LOGGER
|
||||||
|
|
||||||
|
"""
|
||||||
|
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||||
|
Just wanted to get it to work.
|
||||||
|
- lyrics need to be put in the database and everything should continue from there then
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
|
||||||
|
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
|
||||||
|
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
|
||||||
|
|
||||||
|
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
|
||||||
|
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
|
||||||
|
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
|
||||||
|
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
|
||||||
|
|
||||||
|
I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
||||||
|
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
||||||
|
|
||||||
|
|
||||||
|
def add_lyrics(file_name, lyrics):
|
||||||
|
file_path = os.path.join(MUSIC_DIR, file_name)
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
tags = ID3(file_path)
|
||||||
|
except mutagen.id3.ID3NoHeaderError:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"adding lyrics to the file {file_path}")
|
||||||
|
|
||||||
|
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
||||||
|
tags["USLT::'eng'"] = uslt_output
|
||||||
|
tags.save(file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_single_lyrics(row: dict):
|
||||||
|
artists = [artist['name'] for artist in row['artists']]
|
||||||
|
track = row['title']
|
||||||
|
id_ = row['id']
|
||||||
|
|
||||||
|
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
|
||||||
|
|
||||||
|
lyrics = genius.search(artists, track)
|
||||||
|
if len(lyrics) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("found lyrics")
|
||||||
|
database.add_lyrics(id_, lyrics=lyrics[0])
|
||||||
|
add_lyrics(row['file'], lyrics[0])
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_lyrics():
|
||||||
|
for row in database.get_tracks_for_lyrics():
|
||||||
|
fetch_single_lyrics(row)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
|
||||||
|
temp_folder = "music-downloader"
|
||||||
|
temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
|
||||||
|
if not os.path.exists(temp_dir):
|
||||||
|
os.mkdir(temp_dir)
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
db_logger = logging.getLogger("database")
|
||||||
|
db_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
database = db.Database(os.path.join(temp_dir, "metadata.db"),
|
||||||
|
os.path.join(temp_dir, "database_structure.sql"),
|
||||||
|
"https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
|
||||||
|
db_logger,
|
||||||
|
reset_anyways=False)
|
||||||
|
|
||||||
|
fetch_lyrics()
|
@ -6,6 +6,8 @@ from .utils.shared import *
|
|||||||
from .lyrics import genius
|
from .lyrics import genius
|
||||||
from .utils.shared import *
|
from .utils.shared import *
|
||||||
|
|
||||||
|
logger = LYRICS_LOGGER
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||||
Just wanted to get it to work.
|
Just wanted to get it to work.
|
||||||
@ -31,7 +33,12 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly
|
|||||||
|
|
||||||
|
|
||||||
def add_lyrics(file_name, lyrics):
|
def add_lyrics(file_name, lyrics):
|
||||||
tags = ID3(file_name)
|
file_path = os.path.join(MUSIC_DIR, file_name)
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"adding lyrics to the file {file_path}")
|
||||||
|
tags = ID3(file_path)
|
||||||
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
||||||
tags["USLT::'eng'"] = uslt_output
|
tags["USLT::'eng'"] = uslt_output
|
||||||
|
|
||||||
@ -46,13 +53,15 @@ def get_lyrics(file_name):
|
|||||||
def fetch_single_lyrics(row: dict):
|
def fetch_single_lyrics(row: dict):
|
||||||
if "file" in row:
|
if "file" in row:
|
||||||
return
|
return
|
||||||
file_ = os.path.join(MUSIC_DIR, row['file'])
|
file_ =
|
||||||
artist = row['artists'][0]['name']
|
artists = [artist['name'] for artist in row['artists']]
|
||||||
track = row['title']
|
track = row['title']
|
||||||
|
|
||||||
|
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
|
||||||
|
|
||||||
if not os.path.exists(file_):
|
if not os.path.exists(file_):
|
||||||
return
|
return
|
||||||
lyrics = genius.search(artist, track)
|
lyrics = genius.search(artists, track)
|
||||||
if len(lyrics) == 0:
|
if len(lyrics) == 0:
|
||||||
return
|
return
|
||||||
print("found something")
|
print("found something")
|
||||||
@ -60,8 +69,7 @@ def fetch_single_lyrics(row: dict):
|
|||||||
|
|
||||||
|
|
||||||
def fetch_lyrics():
|
def fetch_lyrics():
|
||||||
for row in database.get_custom_track([]):
|
for row in database.get_tracks_for_lyrics():
|
||||||
print(row['title'])
|
|
||||||
fetch_single_lyrics(row)
|
fetch_single_lyrics(row)
|
||||||
|
|
||||||
|
|
||||||
|
@ -205,13 +205,16 @@ GROUP BY track.id;
|
|||||||
def get_tracks_without_filepath(self):
|
def get_tracks_without_filepath(self):
|
||||||
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
|
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
|
||||||
|
|
||||||
|
def get_tracks_for_lyrics(self):
|
||||||
|
return self.get_custom_track(["track.lyrics IS NULL"])
|
||||||
|
|
||||||
def add_lyrics(self, track_id: str, lyrics: str):
|
def add_lyrics(self, track_id: str, lyrics: str):
|
||||||
query = f"""
|
query = f"""
|
||||||
UPDATE track
|
UPDATE track
|
||||||
SET lyrics = ?
|
SET lyrics = ?
|
||||||
WHERE '{track_id}' == id;
|
WHERE '{track_id}' == id;
|
||||||
"""
|
"""
|
||||||
self.cursor.execute(query, (lyrics, ))
|
self.cursor.execute(query, (str(lyrics), ))
|
||||||
self.connection.commit()
|
self.connection.commit()
|
||||||
|
|
||||||
def update_download_status(self, track_id: str):
|
def update_download_status(self, track_id: str):
|
||||||
|
@ -28,13 +28,13 @@ def modify_title(to_modify: str) -> str:
|
|||||||
return to_modify
|
return to_modify
|
||||||
|
|
||||||
|
|
||||||
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
def match_titles(title_1: str, title_2: str):
|
||||||
title_1, title_2 = modify_title(title_1), modify_title(title_2)
|
title_1, title_2 = modify_title(title_1), modify_title(title_2)
|
||||||
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||||
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
||||||
|
|
||||||
|
|
||||||
def match_artists(artist_1, artist_2: str) -> (bool, int):
|
def match_artists(artist_1, artist_2: str):
|
||||||
if type(artist_1) == list:
|
if type(artist_1) == list:
|
||||||
distances = []
|
distances = []
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ TEMP_FOLDER = "music-downloader"
|
|||||||
LOG_FILE = "download_logs.log"
|
LOG_FILE = "download_logs.log"
|
||||||
DATABASE_FILE = "metadata.db"
|
DATABASE_FILE = "metadata.db"
|
||||||
DATABASE_STRUCTURE_FILE = "database_structure.sql"
|
DATABASE_STRUCTURE_FILE = "database_structure.sql"
|
||||||
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql"
|
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql"
|
||||||
|
|
||||||
SEARCH_LOGGER = logging.getLogger("mb-cli")
|
SEARCH_LOGGER = logging.getLogger("mb-cli")
|
||||||
DATABASE_LOGGER = logging.getLogger("database")
|
DATABASE_LOGGER = logging.getLogger("database")
|
||||||
@ -17,6 +17,7 @@ METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
|
|||||||
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
|
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
|
||||||
PATH_LOGGER = logging.getLogger("create-paths")
|
PATH_LOGGER = logging.getLogger("create-paths")
|
||||||
DOWNLOAD_LOGGER = logging.getLogger("download")
|
DOWNLOAD_LOGGER = logging.getLogger("download")
|
||||||
|
LYRICS_LOGGER = logging.getLogger("lyrics")
|
||||||
GENIUS_LOGGER = logging.getLogger("genius")
|
GENIUS_LOGGER = logging.getLogger("genius")
|
||||||
|
|
||||||
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"
|
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"
|
||||||
|
Loading…
Reference in New Issue
Block a user