From 1253581e3032dd99436ad2b09ceb26d642dbdfcb Mon Sep 17 00:00:00 2001 From: lars Date: Thu, 27 Oct 2022 19:53:12 +0200 Subject: [PATCH] finished implementing sqlite --- .idea/dataSources.xml | 12 +++++ src/metadata/database.py | 81 ++++++++++++++++++++--------- src/metadata/database_structure.sql | 14 +++-- src/metadata/download.py | 79 ++++++++++++++++++++++------ 4 files changed, 142 insertions(+), 44 deletions(-) create mode 100644 .idea/dataSources.xml diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml new file mode 100644 index 0000000..879cd05 --- /dev/null +++ b/.idea/dataSources.xml @@ -0,0 +1,12 @@ + + + + + sqlite.xerial + true + org.sqlite.JDBC + jdbc:sqlite:/tmp/music-downloader/metadata.db + $ProjectFileDir$ + + + \ No newline at end of file diff --git a/src/metadata/database.py b/src/metadata/database.py index 4a323ca..59367eb 100644 --- a/src/metadata/database.py +++ b/src/metadata/database.py @@ -2,6 +2,7 @@ import sqlite3 import os import logging + def get_temp_dir(): import tempfile @@ -11,7 +12,8 @@ def get_temp_dir(): os.mkdir(temp_dir) return temp_dir -DATABASE_STRUCTURE_FILE = "src/metadata/database_structure.sql" + +DATABASE_STRUCTURE_FILE = "database_structure.sql" TEMP_DIR = get_temp_dir() DATABASE_FILE = "metadata.db" db_path = os.path.join(TEMP_DIR, DATABASE_FILE) @@ -19,6 +21,7 @@ db_path = os.path.join(TEMP_DIR, DATABASE_FILE) connection = sqlite3.connect(db_path) cursor = connection.cursor() + def init_db(cursor, connection, reset_anyways: bool = False): # check if db exists exists = True @@ -28,7 +31,7 @@ def init_db(cursor, connection, reset_anyways: bool = False): _ = cursor.fetchall() except sqlite3.OperationalError: exists = False - + if not exists: logging.info("Database does not exist yet.") @@ -40,58 +43,88 @@ def init_db(cursor, connection, reset_anyways: bool = False): with open(DATABASE_STRUCTURE_FILE, "r") as database_structure_file: query = database_structure_file.read() cursor.executescript(query) + connection.commit() -init_db(cursor=cursor, connection=connection, reset_anyways=True) def add_artist( - musicbrainz_artistid: str, - artist: str = None + musicbrainz_artistid: str, + artist: str = None ): - query = "INSERT INTO artist (id, name) VALUES (?, ?);" + query = "INSERT OR REPLACE INTO artist (id, name) VALUES (?, ?);" values = musicbrainz_artistid, artist cursor.execute(query, values) connection.commit() + def add_release_group( - musicbrainz_releasegroupid: str, - artist_ids: list, - albumartist: str = None, - albumsort: int = None, - musicbrainz_albumtype: str = None, - compilation: str = None + musicbrainz_releasegroupid: str, + artist_ids: list, + albumartist: str = None, + albumsort: int = None, + musicbrainz_albumtype: str = None, + compilation: str = None, + album_artist_id: str = None ): # add adjacency adjacency_list = [] for artist_id in artist_ids: - adjacency_list.append((musicbrainz_releasegroupid, artist_id)) + adjacency_list.append((artist_id, musicbrainz_releasegroupid)) adjacency_values = tuple(adjacency_list) - adjacency_query = "INSERT INTO artist_release_group (artist_id, release_group_id) VALUES (?, ?);" + adjacency_query = "INSERT OR REPLACE INTO artist_release_group (artist_id, release_group_id) VALUES (?, ?);" cursor.executemany(adjacency_query, adjacency_values) connection.commit() # add release group - query = "INSERT INTO release_group (id, albumartist, albumsort, musicbrainz_albumtype, compilation) VALUES (?, ?, ?, ?, ?);" - values = musicbrainz_releasegroupid, albumartist, albumsort, musicbrainz_albumtype, compilation + query = "INSERT OR REPLACE INTO release_group (id, albumartist, albumsort, musicbrainz_albumtype, compilation, album_artist_id) VALUES (?, ?, ?, ?, ?, ?);" + values = musicbrainz_releasegroupid, albumartist, albumsort, musicbrainz_albumtype, compilation, album_artist_id cursor.execute(query, values) connection.commit() + def add_release( - musicbrainz_albumid: str, - release_group_id: str, - title: str = None, - copyright_: str = None + musicbrainz_albumid: str, + release_group_id: str, + title: str = None, + copyright_: str = None, + album_status: str = None, + language: str = None, + year: str = None, + date: str = None, + country: str = None, + barcode: str = None ): - query = "INSERT INTO release_ (id, release_group_id, title, copyright) VALUES (?, ?, ?, ?);" - values = musicbrainz_albumid, release_group_id, title, copyright_ + query = "INSERT OR REPLACE INTO release_ (id, release_group_id, title, copyright, album_status, language, year, date, country, barcode) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);" + values = musicbrainz_albumid, release_group_id, title, copyright_, album_status, language, year, date, country, barcode cursor.execute(query, values) connection.commit() + def add_track( - + musicbrainz_releasetrackid: str, + musicbrainz_albumid: str, + feature_aritsts: list, + track: str = None, + isrc: str = None ): - pass + # add adjacency + adjacency_list = [] + for artist_id in feature_aritsts: + adjacency_list.append((artist_id, musicbrainz_releasetrackid)) + adjacency_values = tuple(adjacency_list) + adjacency_query = "INSERT OR REPLACE INTO artist_track (artist_id, track_id) VALUES (?, ?);" + cursor.executemany(adjacency_query, adjacency_values) + connection.commit() + + # add track + query = "INSERT OR REPLACE INTO track (id, release_id, track, isrc) VALUES (?, ?, ?, ?);" + values = musicbrainz_releasetrackid, musicbrainz_albumid, track, isrc + cursor.execute(query, values) + connection.commit() + + +init_db(cursor=cursor, connection=connection, reset_anyways=False) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) diff --git a/src/metadata/database_structure.sql b/src/metadata/database_structure.sql index 5d6542f..0d86ee2 100644 --- a/src/metadata/database_structure.sql +++ b/src/metadata/database_structure.sql @@ -22,7 +22,8 @@ CREATE TABLE release_group ( albumartist TEXT, albumsort INT, musicbrainz_albumtype TEXT, - compilation TEXT + compilation TEXT, + album_artist_id TEXT ); DROP TABLE IF EXISTS release_; @@ -30,13 +31,20 @@ CREATE TABLE release_ ( id TEXT PRIMARY KEY NOT NULL, release_group_id TEXT NOT NULL, title TEXT, - copyright TEXT + copyright TEXT, + album_status TEXT, + language TEXT, + year TEXT, + date TEXT, + country TEXT, + barcode TEXT ); DROP TABLE IF EXISTS track; CREATE TABLE track ( id TEXT PRIMARY KEY NOT NULL, release_id TEXT NOT NULL, - name TEXT + track TEXT, + isrc TEXT ); diff --git a/src/metadata/download.py b/src/metadata/download.py index 7dabeb7..e0ffad5 100644 --- a/src/metadata/download.py +++ b/src/metadata/download.py @@ -2,7 +2,6 @@ from typing import List import musicbrainzngs import pandas as pd import logging -from datetime import date from object_handeling import get_elem_from_obj, parse_music_brainz_date import database @@ -54,7 +53,7 @@ class Artist: artists=[self], albumsort=i + 1 )) - + def __str__(self): newline = "\n" return f"id: {self.musicbrainz_artistid}\nname: {self.artist}\n{newline.join([str(release_group) for release_group in self.release_groups])}" @@ -95,7 +94,8 @@ class ReleaseGroup: if artist_id is None: continue self.append_artist(artist_id) - self.albumartist = "Various Artists" if len(self.artists) >= 1 else self.artists[0].artist + self.albumartist = "Various Artists" if len(self.artists) > 1 else self.artists[0].artist + self.album_artist_id = None if self.albumartist == "Various Artists" else self.artists[0].musicbrainz_artistid self.albumsort = albumsort self.musicbrainz_albumtype = get_elem_from_obj(release_group_data, ['primary-type']) @@ -117,10 +117,11 @@ class ReleaseGroup: database.add_release_group( musicbrainz_releasegroupid=self.musicbrainz_releasegroupid, artist_ids=[artist.musicbrainz_artistid for artist in self.artists], - albumartist = self.albumartist, - albumsort = self.albumsort, - musicbrainz_albumtype = self.musicbrainz_albumtype, - compilation=self.compilation + albumartist=self.albumartist, + albumsort=self.albumsort, + musicbrainz_albumtype=self.musicbrainz_albumtype, + compilation=self.compilation, + album_artist_id=self.album_artist_id ) def append_artist(self, artist_id: str) -> Artist: @@ -176,29 +177,41 @@ class Release: self.title = get_elem_from_obj(release_data, ['title']) self.copyright = get_elem_from_obj(label_data, [0, 'label', 'name']) + self.album_status = get_elem_from_obj(release_data, ['status']) + self.language = get_elem_from_obj(release_data, ['text-representation', 'language']) + self.year = get_elem_from_obj(release_data, ['date'], lambda x: x.split("-")[0]) + self.date = get_elem_from_obj(release_data, ['date']) + self.country = get_elem_from_obj(release_data, ['country']) + self.barcode = get_elem_from_obj(release_data, ['barcode']) + self.save() self.append_recordings(recording_datas) def __str__(self): - return f"{self.title} ©{self.copyright}" - + return f"{self.title} ©{self.copyright} {self.album_status}" + def save(self): logging.info(f"caching release {self}") database.add_release( musicbrainz_albumid=self.musicbrainz_albumid, release_group_id=self.release_group.musicbrainz_releasegroupid, title=self.title, - copyright_=self.copyright + copyright_=self.copyright, + album_status=self.album_status, + language=self.language, + year=self.year, + date=self.date, + country=self.country, + barcode=self.barcode ) def append_recordings(self, recording_datas: dict): for recording_data in recording_datas: - musicbrainz_releasetrackid = get_elem_from_obj(recording_data, ['id']) + musicbrainz_releasetrackid = get_elem_from_obj(recording_data, ['recording', 'id']) if musicbrainz_releasetrackid is None: continue - self.tracklist.append(musicbrainz_releasetrackid) - + self.tracklist.append(Track(musicbrainz_releasetrackid, self)) class Track: @@ -214,12 +227,42 @@ class Track: self.musicbrainz_releasetrackid = musicbrainz_releasetrackid self.release = release + self.artists = [] + + result = musicbrainzngs.get_recording_by_id(self.musicbrainz_releasetrackid, includes=["artists", "releases", "recording-rels", "isrcs", "work-level-rels"]) + recording_data = result['recording'] + for artist_data in get_elem_from_obj(recording_data, ['artist-credit'], return_if_none=[]): + self.append_artist(get_elem_from_obj(artist_data, ['artist', 'id'])) + + self.isrc = get_elem_from_obj(recording_data, ['isrc-list', 0]) + self.title = recording_data['title'] + + self.save() def __str__(self): - return "this is a track" - + return f"{self.title}: {self.isrc}" + def save(self): - logging.info("caching track {self}") + logging.info(f"caching track {self}") + + database.add_track( + musicbrainz_releasetrackid=self.musicbrainz_releasetrackid, + musicbrainz_albumid=self.release.musicbrainz_albumid, + feature_aritsts=[artist.musicbrainz_artistid for artist in self.artists], + track=self.title, + isrc=self.isrc + ) + + def append_artist(self, artist_id: str) -> Artist: + if artist_id is None: + return + + for existing_artist in self.artists: + if artist_id == existing_artist.musicbrainz_artistid: + return existing_artist + new_artist = Artist(artist_id, new_release_groups=False) + self.artists.append(new_artist) + return new_artist def download(option: dict): @@ -469,7 +512,9 @@ if __name__ == "__main__": if not os.path.exists(TEMP_DIR): os.mkdir(TEMP_DIR) """ - logging.basicConfig(level=logging.DEBUG) + + logger = logging.getLogger() + logger.setLevel(logging.INFO) download({'id': '5cfecbe4-f600-45e5-9038-ce820eedf3d1', 'type': 'artist'}) # download({'id': '4b9af532-ef7e-42ab-8b26-c466327cb5e0', 'type': 'release'})