Merge remote-tracking branch 'refs/remotes/origin/master'

cuz merge
This commit is contained in:
lars
2022-11-14 17:17:27 +01:00
73 changed files with 1876 additions and 627 deletions

View File

@@ -1 +0,0 @@
__name__ = "music downloader"

View File

@@ -1,14 +1,14 @@
from utils.shared import *
from .utils.shared import *
from metadata.download import MetadataDownloader
import metadata.download
import metadata.search
import download_links
import url_to_path
import download
from .metadata.download import MetadataDownloader
from .metadata import download
from .metadata import search as s
from . import download_links
from . import url_to_path
from . import download
# NEEDS REFACTORING
from lyrics_ import fetch_lyrics
from .lyrics.lyrics import fetch_lyrics
import logging
import os
@@ -34,7 +34,7 @@ def get_existing_genre():
def search_for_metadata():
search = metadata.search.Search()
search = s.Search()
while True:
input_ = input(
@@ -107,4 +107,4 @@ def cli(start_at: int = 0, only_lyrics: bool = False):
if __name__ == "__main__":
cli(start_at=3, only_lyrics=True)
cli(start_at=0, only_lyrics=False)

View File

@@ -4,8 +4,8 @@ import os.path
from mutagen.easyid3 import EasyID3
from pydub import AudioSegment
from src.utils.shared import *
from src.scraping import musify, youtube_music
from .utils.shared import *
from .scraping import musify, youtube_music
"""
https://en.wikipedia.org/wiki/ID3

View File

@@ -1,7 +1,7 @@
import requests
from src.utils.shared import *
from src.scraping import musify, youtube_music, file_system
from .utils.shared import *
from .scraping import musify, youtube_music, file_system
logger = URL_DOWNLOAD_LOGGER

View File

@@ -3,9 +3,9 @@ from typing import List
from bs4 import BeautifulSoup
import pycountry
from src.utils.shared import *
from src.utils import phonetic_compares
from src.utils.object_handeling import get_elem_from_obj
from ..utils.shared import *
from ..utils import phonetic_compares
from ..utils.object_handeling import get_elem_from_obj
# search doesn't support isrc
# https://genius.com/api/search/multi?q=I Prevail - Breaking Down
@@ -56,7 +56,7 @@ class Song:
def is_valid(self) -> bool:
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
artist_match, artist_distance = phonetic_compares.match_artists(self.desired_data['artist'], self.artist)
return not title_match and not artist_match

92
src/lyrics/lyrics.py Normal file
View File

@@ -0,0 +1,92 @@
import mutagen
from mutagen.id3 import ID3, USLT
from ..metadata import database as db
from ..utils.shared import *
from . import genius
logger = LYRICS_LOGGER
"""
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
Just wanted to get it to work.
- lyrics need to be put in the database and everything should continue from there then
"""
"""
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
"""
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
def add_lyrics(file_name, lyrics):
file_path = os.path.join(MUSIC_DIR, file_name)
if not os.path.exists(file_path):
return
try:
tags = ID3(file_path)
except mutagen.id3.ID3NoHeaderError:
return
logger.info(f"adding lyrics to the file {file_path}")
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
tags["USLT::'eng'"] = uslt_output
tags.save(file_path)
def fetch_single_lyrics(row: dict):
artists = [artist['name'] for artist in row['artists']]
track = row['title']
id_ = row['id']
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
lyrics = genius.search(artists, track)
if len(lyrics) == 0:
return
logger.info("found lyrics")
database.add_lyrics(id_, lyrics=lyrics[0])
add_lyrics(row['file'], lyrics[0])
def fetch_lyrics():
for row in database.get_tracks_for_lyrics():
fetch_single_lyrics(row)
if __name__ == "__main__":
import tempfile
import os
temp_folder = "music-downloader"
temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
if not os.path.exists(temp_dir):
os.mkdir(temp_dir)
logging.basicConfig(level=logging.DEBUG)
db_logger = logging.getLogger("database")
db_logger.setLevel(logging.DEBUG)
database = db.Database(os.path.join(temp_dir, "metadata.db"),
os.path.join(temp_dir, "database_structure.sql"),
"https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
db_logger,
reset_anyways=False)
fetch_lyrics()

View File

@@ -1,10 +1,12 @@
from mutagen.id3 import ID3, USLT
from metadata import database as db
from .metadata import database as db
from src.utils.shared import *
from .utils.shared import *
from lyrics import genius
from src.utils.shared import *
from .lyrics import genius
from .utils.shared import *
logger = LYRICS_LOGGER
"""
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
@@ -31,7 +33,12 @@ I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-ly
def add_lyrics(file_name, lyrics):
tags = ID3(file_name)
file_path = os.path.join(MUSIC_DIR, file_name)
if not os.path.exists(file_path):
return
logger.info(f"adding lyrics to the file {file_path}")
tags = ID3(file_path)
uslt_output = USLT(encoding=3, lang=lyrics.lang, desc=u'desc', text=lyrics.lyrics)
tags["USLT::'eng'"] = uslt_output
@@ -48,13 +55,15 @@ def fetch_single_lyrics(row: dict):
return
if row["file"] is None:
return
file_ = os.path.join(MUSIC_DIR, row['file'])
artist = row['artists'][0]['name']
file_ =
artists = [artist['name'] for artist in row['artists']]
track = row['title']
logger.info(f"try fetching lyrics for \"{track}\" by \"{', '.join(artists)}")
if not os.path.exists(file_):
return
lyrics = genius.search(artist, track)
lyrics = genius.search(artists, track)
if len(lyrics) == 0:
return
print("found something")
@@ -62,8 +71,7 @@ def fetch_single_lyrics(row: dict):
def fetch_lyrics():
for row in database.get_custom_track([]):
print(row['title'])
for row in database.get_tracks_for_lyrics():
fetch_single_lyrics(row)

View File

@@ -171,7 +171,8 @@ SELECT DISTINCT
'file', track.file,
'genre', track.genre,
'url', track.url,
'src', track.src
'src', track.src,
'lyrics', track.lyrics
)
FROM track, release_, release_group,artist, artist_track
WHERE
@@ -204,6 +205,18 @@ GROUP BY track.id;
def get_tracks_without_filepath(self):
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
def get_tracks_for_lyrics(self):
return self.get_custom_track(["track.lyrics IS NULL"])
def add_lyrics(self, track_id: str, lyrics: str):
query = f"""
UPDATE track
SET lyrics = ?
WHERE '{track_id}' == id;
"""
self.cursor.execute(query, (str(lyrics), ))
self.connection.commit()
def update_download_status(self, track_id: str):
query = f"UPDATE track SET downloaded = 1, WHERE '{track_id}' == id;"
self.cursor.execute(query)

View File

@@ -1,56 +0,0 @@
DROP TABLE IF EXISTS artist;
CREATE TABLE artist (
id TEXT PRIMARY KEY NOT NULL,
name TEXT
);
DROP TABLE IF EXISTS artist_release_group;
CREATE TABLE artist_release_group (
artist_id TEXT NOT NULL,
release_group_id TEXT NOT NULL
);
DROP TABLE IF EXISTS artist_track;
CREATE TABLE artist_track (
artist_id TEXT NOT NULL,
track_id TEXT NOT NULL
);
DROP TABLE IF EXISTS release_group;
CREATE TABLE release_group (
id TEXT PRIMARY KEY NOT NULL,
albumartist TEXT,
albumsort INT,
musicbrainz_albumtype TEXT,
compilation TEXT,
album_artist_id TEXT
);
DROP TABLE IF EXISTS release_;
CREATE TABLE release_ (
id TEXT PRIMARY KEY NOT NULL,
release_group_id TEXT NOT NULL,
title TEXT,
copyright TEXT,
album_status TEXT,
language TEXT,
year TEXT,
date TEXT,
country TEXT,
barcode TEXT
);
DROP TABLE IF EXISTS track;
CREATE TABLE track (
id TEXT PRIMARY KEY NOT NULL,
downloaded BOOLEAN NOT NULL DEFAULT 0,
release_id TEXT NOT NULL,
track TEXT,
tracknumber TEXT,
isrc TEXT,
genre TEXT,
path TEXT,
file TEXT,
url TEXT,
src TEXT
);

View File

@@ -1,5 +1,5 @@
from src.utils.shared import *
from src.utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
from ..utils.shared import *
from ..utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
from typing import List
import musicbrainzngs

View File

@@ -1,8 +1,8 @@
from typing import List
import musicbrainzngs
from src.utils.shared import *
from src.utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
from ..utils.shared import *
from ..utils.object_handeling import get_elem_from_obj, parse_music_brainz_date
logger = SEARCH_LOGGER

View File

@@ -1,7 +1,7 @@
import os
from src.utils.shared import *
from src.utils import phonetic_compares
from ..utils.shared import *
from ..utils import phonetic_compares
def is_valid(a1, a2, t1, t2) -> bool:

View File

@@ -4,8 +4,8 @@ import time
import requests
import bs4
from src.utils.shared import *
from src.utils import phonetic_compares
from ..utils.shared import *
from ..utils import phonetic_compares
TRIES = 5
TIMEOUT = 10

View File

@@ -4,7 +4,7 @@ import youtube_dl
import logging
import time
from src.utils import phonetic_compares
from ..utils import phonetic_compares
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
YOUTUBE_URL_KEY = 'webpage_url'

View File

@@ -1,7 +1,7 @@
import os.path
import logging
from src.utils.shared import *
from .utils.shared import *
logger = PATH_LOGGER

View File

@@ -28,13 +28,13 @@ def modify_title(to_modify: str) -> str:
return to_modify
def match_titles(title_1: str, title_2: str) -> (bool, int):
def match_titles(title_1: str, title_2: str):
title_1, title_2 = modify_title(title_1), modify_title(title_2)
distance = jellyfish.levenshtein_distance(title_1, title_2)
return distance > TITLE_THRESHOLD_LEVENSHTEIN, distance
def match_artists(artist_1, artist_2: str) -> (bool, int):
def match_artists(artist_1, artist_2: str):
if type(artist_1) == list:
distances = []

View File

@@ -3,13 +3,13 @@ import logging
import tempfile
import os
from src.metadata.database import Database
from ..metadata.database import Database
TEMP_FOLDER = "music-downloader"
LOG_FILE = "download_logs.log"
DATABASE_FILE = "metadata.db"
DATABASE_STRUCTURE_FILE = "database_structure.sql"
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql"
DATABASE_STRUCTURE_FALLBACK = "https://raw.githubusercontent.com/HeIIow2/music-downloader/master/assets/database_structure.sql"
SEARCH_LOGGER = logging.getLogger("mb-cli")
DATABASE_LOGGER = logging.getLogger("database")
@@ -17,6 +17,7 @@ METADATA_DOWNLOAD_LOGGER = logging.getLogger("metadata-download")
URL_DOWNLOAD_LOGGER = logging.getLogger("ling-download")
PATH_LOGGER = logging.getLogger("create-paths")
DOWNLOAD_LOGGER = logging.getLogger("download")
LYRICS_LOGGER = logging.getLogger("lyrics")
GENIUS_LOGGER = logging.getLogger("genius")
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"