Merge remote-tracking branch 'refs/remotes/origin/master'
cuz merge
This commit is contained in:
@@ -1 +0,0 @@
|
||||
__name__ = "music downloader"
|
||||
|
@@ -1,14 +1,14 @@
|
||||
from utils.shared import *
|
||||
from .utils.shared import *
|
||||
|
||||
from metadata.download import MetadataDownloader
|
||||
import metadata.download
|
||||
import metadata.search
|
||||
import download_links
|
||||
import url_to_path
|
||||
import download
|
||||
from .metadata.download import MetadataDownloader
|
||||
from .metadata import download
|
||||
from .metadata import search as s
|
||||
from . import download_links
|
||||
from . import url_to_path
|
||||
from . import download
|
||||
|
||||
# NEEDS REFACTORING
|
||||
from lyrics_ import fetch_lyrics
|
||||
from .lyrics.lyrics import fetch_lyrics
|
||||
|
||||
import logging
|
||||
import os
|
||||
@@ -34,7 +34,7 @@ def get_existing_genre():
|
||||
|
||||
|
||||
def search_for_metadata():
|
||||
search = metadata.search.Search()
|
||||
search = s.Search()
|
||||
|
||||
while True:
|
||||
input_ = input(
|
||||
@@ -107,4 +107,4 @@ def cli(start_at: int = 0, only_lyrics: bool = False):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli(start_at=3, only_lyrics=True)
|
||||
cli(start_at=0, only_lyrics=False)
|
||||
|
@@ -4,8 +4,8 @@ import os.path
|
||||
from mutagen.easyid3 import EasyID3
|
||||
from pydub import AudioSegment
|
||||
|
||||
from src.utils.shared import *
|
||||
from src.scraping import musify, youtube_music
|
||||
from .utils.shared import *
|
||||
from .scraping import musify, youtube_music
|
||||
|
||||
"""
|
||||
https://en.wikipedia.org/wiki/ID3
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import requests
|
||||
|
||||
from src.utils.shared import *
|
||||
from src.scraping import musify, youtube_music, file_system
|
||||
from .utils.shared import *
|
||||
from .scraping import musify, youtube_music, file_system
|
||||
|
||||
logger = URL_DOWNLOAD_LOGGER
|
||||
|
||||
|
@@ -3,9 +3,9 @@ from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
import pycountry
|
||||
|
||||
from src.utils.shared import *
|
||||
from src.utils import phonetic_compares
|
||||
from src.utils.object_handeling import get_elem_from_obj
|
||||
from ..utils.shared import *
|
||||
from ..utils import phonetic_compares
|
||||
from ..utils.object_handeling import get_elem_from_obj
|
||||
|
||||
# search doesn't support isrc
|
||||
# https://genius.com/api/search/multi?q=I Prevail - Breaking Down
|
||||
@@ -56,7 +56,7 @@ class Song:
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
|
||||
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
|
||||
artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist)
|
||||
|
||||
return not title_match and not artist_match
|
||||
|
||||
|
92
src/lyrics/lyrics.py
Normal file
92
src/lyrics/lyrics.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import mutagen
|
||||
from mutagen.id3 import ID3, USLT
|
||||
|
||||
|
||||
from ..metadata import database as db
|
||||
from ..utils.shared import *
|
||||
from . import genius
|
||||
|
||||
logger = LYRICS_LOGGER
|
||||
|
||||
"""
|
||||
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||
Just wanted to get it to work.
|
||||
- lyrics need to be put in the database and everything should continue from there then
|
||||
"""
|
||||
|
||||
"""
|
||||
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
|
||||
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
|
||||
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
|
||||
|
||||
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
|
||||
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
|
||||
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
|
||||
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
|
||||
|
||||
I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
|
||||
"""
|
||||
|
||||
|
||||
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
||||
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
||||
|
||||
|
||||
def add_lyrics(file_name, lyrics):
|
||||
file_path = os.path.join(MUSIC_DIR, file_name)
|
||||
if not os.path.exists(file_path):
|
||||
return
|
||||
|
||||
try:
|
||||
tags = ID3(file_path)
|
||||
except mutagen.id3.ID3NoHeaderError:
|
||||
return
|
||||
|
||||
logger.info(f"adding lyrics to the file {file_path}")
|
||||
|
||||
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
||||
tags["USLT::'eng'"] = uslt_output
|
||||
tags.save(file_path)
|
||||
|
||||
|
||||
def fetch_single_lyrics(row: dict):
|
||||
artists = [artist['name'] for artist in row['artists']]
|
||||
track = row['title']
|
||||
id_ = row['id']
|
||||
|
||||
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
|
||||
|
||||
lyrics = genius.search(artists, track)
|
||||
if len(lyrics) == 0:
|
||||
return
|
||||
|
||||
logger.info("found lyrics")
|
||||
database.add_lyrics(id_, lyrics=lyrics[0])
|
||||
add_lyrics(row['file'], lyrics[0])
|
||||
|
||||
|
||||
def fetch_lyrics():
|
||||
for row in database.get_tracks_for_lyrics():
|
||||
fetch_single_lyrics(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
temp_folder = "music-downloader"
|
||||
temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
|
||||
if not os.path.exists(temp_dir):
|
||||
os.mkdir(temp_dir)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
db_logger = logging.getLogger("database")
|
||||
db_logger.setLevel(logging.DEBUG)
|
||||
|
||||
database = db.Database(os.path.join(temp_dir, "metadata.db"),
|
||||
os.path.join(temp_dir, "database_structure.sql"),
|
||||
"https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
|
||||
db_logger,
|
||||
reset_anyways=False)
|
||||
|
||||
fetch_lyrics()
|
@@ -1,10 +1,12 @@
|
||||
from mutagen.id3 import ID3, USLT
|
||||
from metadata import database as db
|
||||
from .metadata import database as db
|
||||
|
||||
from src.utils.shared import *
|
||||
from .utils.shared import *
|
||||
|
||||
from lyrics import genius
|
||||
from src.utils.shared import *
|
||||
from .lyrics import genius
|
||||
from .utils.shared import *
|
||||
|
||||
logger = LYRICS_LOGGER
|
||||
|
||||
"""
|
||||
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||
@@ -31,7 +33,12 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly
|
||||
|
||||
|
||||
def add_lyrics(file_name, lyrics):
|
||||
tags = ID3(file_name)
|
||||
file_path = os.path.join(MUSIC_DIR, file_name)
|
||||
if not os.path.exists(file_path):
|
||||
return
|
||||
|
||||
logger.info(f"adding lyrics to the file {file_path}")
|
||||
tags = ID3(file_path)
|
||||
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
|
||||
tags["USLT::'eng'"] = uslt_output
|
||||
|
||||
@@ -48,13 +55,15 @@ def fetch_single_lyrics(row: dict):
|
||||
return
|
||||
if row["file"] is None:
|
||||
return
|
||||
file_ = os.path.join(MUSIC_DIR, row['file'])
|
||||
artist = row['artists'][0]['name']
|
||||
file_ =
|
||||
artists = [artist['name'] for artist in row['artists']]
|
||||
track = row['title']
|
||||
|
||||
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
|
||||
|
||||
if not os.path.exists(file_):
|
||||
return
|
||||
lyrics = genius.search(artist, track)
|
||||
lyrics = genius.search(artists, track)
|
||||
if len(lyrics) == 0:
|
||||
return
|
||||
print("found something")
|
||||
@@ -62,8 +71,7 @@ def fetch_single_lyrics(row: dict):
|
||||
|
||||
|
||||
def fetch_lyrics():
|
||||
for row in database.get_custom_track([]):
|
||||
print(row['title'])
|
||||
for row in database.get_tracks_for_lyrics():
|
||||
fetch_single_lyrics(row)
|
||||
|
||||
|
||||
|
@@ -171,7 +171,8 @@ SELECT DISTINCT
|
||||
'file', track.file,
|
||||
'genre', track.genre,
|
||||
'url', track.url,
|
||||
'src', track.src
|
||||
'src', track.src,
|
||||
'lyrics', track.lyrics
|
||||
)
|
||||
FROM track, release_, release_group,artist, artist_track
|
||||
WHERE
|
||||
@@ -204,6 +205,18 @@ GROUP BY track.id;
|
||||
def get_tracks_without_filepath(self):
|
||||
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
|
||||
|
||||
def get_tracks_for_lyrics(self):
|
||||
return self.get_custom_track(["track.lyrics IS NULL"])
|
||||
|
||||
def add_lyrics(self, track_id: str, lyrics: str):
|
||||
query = f"""
|
||||
UPDATE track
|
||||
SET lyrics = ?
|
||||
WHERE '{track_id}' == id;
|
||||
"""
|
||||
self.cursor.execute(query, (str(lyrics), ))
|
||||
self.connection.commit()
|
||||
|
||||
def update_download_status(self, track_id: str):
|
||||
query = f"UPDATE track SET downloaded = 1, WHERE '{track_id}' == id;"
|
||||
self.cursor.execute(query)
|
||||
|
@@ -1,56 +0,0 @@
|
||||
DROP TABLE IF EXISTS artist;
|
||||
CREATE TABLE artist (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
name TEXT
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS artist_release_group;
|
||||
CREATE TABLE artist_release_group (
|
||||
artist_id TEXT NOT NULL,
|
||||
release_group_id TEXT NOT NULL
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS artist_track;
|
||||
CREATE TABLE artist_track (
|
||||
artist_id TEXT NOT NULL,
|
||||
track_id TEXT NOT NULL
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS release_group;
|
||||
CREATE TABLE release_group (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
albumartist TEXT,
|
||||
albumsort INT,
|
||||
musicbrainz_albumtype TEXT,
|
||||
compilation TEXT,
|
||||
album_artist_id TEXT
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS release_;
|
||||
CREATE TABLE release_ (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
release_group_id TEXT NOT NULL,
|
||||
title TEXT,
|
||||
copyright TEXT,
|
||||
album_status TEXT,
|
||||
language TEXT,
|
||||
year TEXT,
|
||||
date TEXT,
|
||||
country TEXT,
|
||||
barcode TEXT
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS track;
|
||||
CREATE TABLE track (
|
||||
id TEXT PRIMARY KEY NOT NULL,
|
||||
downloaded BOOLEAN NOT NULL DEFAULT 0,
|
||||
release_id TEXT NOT NULL,
|
||||
track TEXT,
|
||||
tracknumber TEXT,
|
||||
isrc TEXT,
|
||||
genre TEXT,
|
||||
path TEXT,
|
||||
file TEXT,
|
||||
url TEXT,
|
||||
src TEXT
|
||||
);
|
@@ -1,5 +1,5 @@
|
||||
from src.utils.shared import *
|
||||
from src.utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
|
||||
from ..utils.shared import *
|
||||
from ..utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
|
||||
|
||||
from typing import List
|
||||
import musicbrainzngs
|
||||
|
@@ -1,8 +1,8 @@
|
||||
from typing import List
|
||||
import musicbrainzngs
|
||||
|
||||
from src.utils.shared import *
|
||||
from src.utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
|
||||
from ..utils.shared import *
|
||||
from ..utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
|
||||
|
||||
logger = SEARCH_LOGGER
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
|
||||
from src.utils.shared import *
|
||||
from src.utils import phonetic_compares
|
||||
from ..utils.shared import *
|
||||
from ..utils import phonetic_compares
|
||||
|
||||
|
||||
def is_valid(a1, a2, t1, t2) -> bool:
|
||||
|
@@ -4,8 +4,8 @@ import time
|
||||
import requests
|
||||
import bs4
|
||||
|
||||
from src.utils.shared import *
|
||||
from src.utils import phonetic_compares
|
||||
from ..utils.shared import *
|
||||
from ..utils import phonetic_compares
|
||||
|
||||
TRIES = 5
|
||||
TIMEOUT = 10
|
||||
|
@@ -4,7 +4,7 @@ import youtube_dl
|
||||
import logging
|
||||
import time
|
||||
|
||||
from src.utils import phonetic_compares
|
||||
from ..utils import phonetic_compares
|
||||
|
||||
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
|
||||
YOUTUBE_URL_KEY = 'webpage_url'
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import os.path
|
||||
import logging
|
||||
|
||||
from src.utils.shared import *
|
||||
from .utils.shared import *
|
||||
|
||||
logger = PATH_LOGGER
|
||||
|
||||
|
@@ -28,13 +28,13 @@ def modify_title(to_modify: str) -> str:
|
||||
return to_modify
|
||||
|
||||
|
||||
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
||||
def match_titles(title_1: str, title_2: str):
|
||||
title_1, title_2 = modify_title(title_1), modify_title(title_2)
|
||||
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
|
||||
|
||||
|
||||
def match_artists(artist_1, artist_2: str) -> (bool, int):
|
||||
def match_artists(artist_1, artist_2: str):
|
||||
if type(artist_1) == list:
|
||||
distances = []
|
||||
|
||||
|
@@ -3,13 +3,13 @@ import logging
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from src.metadata.database import Database
|
||||
from ..metadata.database import Database
|
||||
|
||||
TEMP_FOLDER = "music-downloader"
|
||||
LOG_FILE = "download_logs.log"
|
||||
DATABASE_FILE = "metadata.db"
|
||||
DATABASE_STRUCTURE_FILE = "database_structure.sql"
|
||||
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql"
|
||||
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql"
|
||||
|
||||
SEARCH_LOGGER = logging.getLogger("mb-cli")
|
||||
DATABASE_LOGGER = logging.getLogger("database")
|
||||
@@ -17,6 +17,7 @@ METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
|
||||
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
|
||||
PATH_LOGGER = logging.getLogger("create-paths")
|
||||
DOWNLOAD_LOGGER = logging.getLogger("download")
|
||||
LYRICS_LOGGER = logging.getLogger("lyrics")
|
||||
GENIUS_LOGGER = logging.getLogger("genius")
|
||||
|
||||
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"
|
||||
|
Reference in New Issue
Block a user