finished implementing sqlite

This commit is contained in:
lars 2022-10-27 19:53:12 +02:00
parent 51a91de620
commit 1253581e30
4 changed files with 142 additions and 44 deletions

12
.idea/dataSources.xml Normal file
View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
<data-source source="LOCAL" name="metadata.db" uuid="ce8fd39b-f982-4397-914f-befaf55557bc">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:/tmp/music-downloader/metadata.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
</component>
</project>

View File

@ -2,6 +2,7 @@ import sqlite3
import os import os
import logging import logging
def get_temp_dir(): def get_temp_dir():
import tempfile import tempfile
@ -11,7 +12,8 @@ def get_temp_dir():
os.mkdir(temp_dir) os.mkdir(temp_dir)
return temp_dir return temp_dir
DATABASE_STRUCTURE_FILE = "src/metadata/database_structure.sql"
DATABASE_STRUCTURE_FILE = "database_structure.sql"
TEMP_DIR = get_temp_dir() TEMP_DIR = get_temp_dir()
DATABASE_FILE = "metadata.db" DATABASE_FILE = "metadata.db"
db_path = os.path.join(TEMP_DIR, DATABASE_FILE) db_path = os.path.join(TEMP_DIR, DATABASE_FILE)
@ -19,6 +21,7 @@ db_path = os.path.join(TEMP_DIR, DATABASE_FILE)
connection = sqlite3.connect(db_path) connection = sqlite3.connect(db_path)
cursor = connection.cursor() cursor = connection.cursor()
def init_db(cursor, connection, reset_anyways: bool = False): def init_db(cursor, connection, reset_anyways: bool = False):
# check if db exists # check if db exists
exists = True exists = True
@ -28,7 +31,7 @@ def init_db(cursor, connection, reset_anyways: bool = False):
_ = cursor.fetchall() _ = cursor.fetchall()
except sqlite3.OperationalError: except sqlite3.OperationalError:
exists = False exists = False
if not exists: if not exists:
logging.info("Database does not exist yet.") logging.info("Database does not exist yet.")
@ -40,58 +43,88 @@ def init_db(cursor, connection, reset_anyways: bool = False):
with open(DATABASE_STRUCTURE_FILE, "r") as database_structure_file: with open(DATABASE_STRUCTURE_FILE, "r") as database_structure_file:
query = database_structure_file.read() query = database_structure_file.read()
cursor.executescript(query) cursor.executescript(query)
connection.commit()
init_db(cursor=cursor, connection=connection, reset_anyways=True)
def add_artist( def add_artist(
musicbrainz_artistid: str, musicbrainz_artistid: str,
artist: str = None artist: str = None
): ):
query = "INSERT INTO artist (id, name) VALUES (?, ?);" query = "INSERT OR REPLACE INTO artist (id, name) VALUES (?, ?);"
values = musicbrainz_artistid, artist values = musicbrainz_artistid, artist
cursor.execute(query, values) cursor.execute(query, values)
connection.commit() connection.commit()
def add_release_group( def add_release_group(
musicbrainz_releasegroupid: str, musicbrainz_releasegroupid: str,
artist_ids: list, artist_ids: list,
albumartist: str = None, albumartist: str = None,
albumsort: int = None, albumsort: int = None,
musicbrainz_albumtype: str = None, musicbrainz_albumtype: str = None,
compilation: str = None compilation: str = None,
album_artist_id: str = None
): ):
# add adjacency # add adjacency
adjacency_list = [] adjacency_list = []
for artist_id in artist_ids: for artist_id in artist_ids:
adjacency_list.append((musicbrainz_releasegroupid, artist_id)) adjacency_list.append((artist_id, musicbrainz_releasegroupid))
adjacency_values = tuple(adjacency_list) adjacency_values = tuple(adjacency_list)
adjacency_query = "INSERT INTO artist_release_group (artist_id, release_group_id) VALUES (?, ?);" adjacency_query = "INSERT OR REPLACE INTO artist_release_group (artist_id, release_group_id) VALUES (?, ?);"
cursor.executemany(adjacency_query, adjacency_values) cursor.executemany(adjacency_query, adjacency_values)
connection.commit() connection.commit()
# add release group # add release group
query = "INSERT INTO release_group (id, albumartist, albumsort, musicbrainz_albumtype, compilation) VALUES (?, ?, ?, ?, ?);" query = "INSERT OR REPLACE INTO release_group (id, albumartist, albumsort, musicbrainz_albumtype, compilation, album_artist_id) VALUES (?, ?, ?, ?, ?, ?);"
values = musicbrainz_releasegroupid, albumartist, albumsort, musicbrainz_albumtype, compilation values = musicbrainz_releasegroupid, albumartist, albumsort, musicbrainz_albumtype, compilation, album_artist_id
cursor.execute(query, values) cursor.execute(query, values)
connection.commit() connection.commit()
def add_release( def add_release(
musicbrainz_albumid: str, musicbrainz_albumid: str,
release_group_id: str, release_group_id: str,
title: str = None, title: str = None,
copyright_: str = None copyright_: str = None,
album_status: str = None,
language: str = None,
year: str = None,
date: str = None,
country: str = None,
barcode: str = None
): ):
query = "INSERT INTO release_ (id, release_group_id, title, copyright) VALUES (?, ?, ?, ?);" query = "INSERT OR REPLACE INTO release_ (id, release_group_id, title, copyright, album_status, language, year, date, country, barcode) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"
values = musicbrainz_albumid, release_group_id, title, copyright_ values = musicbrainz_albumid, release_group_id, title, copyright_, album_status, language, year, date, country, barcode
cursor.execute(query, values) cursor.execute(query, values)
connection.commit() connection.commit()
def add_track( def add_track(
musicbrainz_releasetrackid: str,
musicbrainz_albumid: str,
feature_aritsts: list,
track: str = None,
isrc: str = None
): ):
pass # add adjacency
adjacency_list = []
for artist_id in feature_aritsts:
adjacency_list.append((artist_id, musicbrainz_releasetrackid))
adjacency_values = tuple(adjacency_list)
adjacency_query = "INSERT OR REPLACE INTO artist_track (artist_id, track_id) VALUES (?, ?);"
cursor.executemany(adjacency_query, adjacency_values)
connection.commit()
# add track
query = "INSERT OR REPLACE INTO track (id, release_id, track, isrc) VALUES (?, ?, ?, ?);"
values = musicbrainz_releasetrackid, musicbrainz_albumid, track, isrc
cursor.execute(query, values)
connection.commit()
init_db(cursor=cursor, connection=connection, reset_anyways=False)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)

View File

@ -22,7 +22,8 @@ CREATE TABLE release_group (
albumartist TEXT, albumartist TEXT,
albumsort INT, albumsort INT,
musicbrainz_albumtype TEXT, musicbrainz_albumtype TEXT,
compilation TEXT compilation TEXT,
album_artist_id TEXT
); );
DROP TABLE IF EXISTS release_; DROP TABLE IF EXISTS release_;
@ -30,13 +31,20 @@ CREATE TABLE release_ (
id TEXT PRIMARY KEY NOT NULL, id TEXT PRIMARY KEY NOT NULL,
release_group_id TEXT NOT NULL, release_group_id TEXT NOT NULL,
title TEXT, title TEXT,
copyright TEXT copyright TEXT,
album_status TEXT,
language TEXT,
year TEXT,
date TEXT,
country TEXT,
barcode TEXT
); );
DROP TABLE IF EXISTS track; DROP TABLE IF EXISTS track;
CREATE TABLE track ( CREATE TABLE track (
id TEXT PRIMARY KEY NOT NULL, id TEXT PRIMARY KEY NOT NULL,
release_id TEXT NOT NULL, release_id TEXT NOT NULL,
name TEXT track TEXT,
isrc TEXT
); );

View File

@ -2,7 +2,6 @@ from typing import List
import musicbrainzngs import musicbrainzngs
import pandas as pd import pandas as pd
import logging import logging
from datetime import date
from object_handeling import get_elem_from_obj, parse_music_brainz_date from object_handeling import get_elem_from_obj, parse_music_brainz_date
import database import database
@ -54,7 +53,7 @@ class Artist:
artists=[self], artists=[self],
albumsort=i + 1 albumsort=i + 1
)) ))
def __str__(self): def __str__(self):
newline = "\n" newline = "\n"
return f"id: {self.musicbrainz_artistid}\nname: {self.artist}\n{newline.join([str(release_group) for release_group in self.release_groups])}" return f"id: {self.musicbrainz_artistid}\nname: {self.artist}\n{newline.join([str(release_group) for release_group in self.release_groups])}"
@ -95,7 +94,8 @@ class ReleaseGroup:
if artist_id is None: if artist_id is None:
continue continue
self.append_artist(artist_id) self.append_artist(artist_id)
self.albumartist = "Various Artists" if len(self.artists) >= 1 else self.artists[0].artist self.albumartist = "Various Artists" if len(self.artists) > 1 else self.artists[0].artist
self.album_artist_id = None if self.albumartist == "Various Artists" else self.artists[0].musicbrainz_artistid
self.albumsort = albumsort self.albumsort = albumsort
self.musicbrainz_albumtype = get_elem_from_obj(release_group_data, ['primary-type']) self.musicbrainz_albumtype = get_elem_from_obj(release_group_data, ['primary-type'])
@ -117,10 +117,11 @@ class ReleaseGroup:
database.add_release_group( database.add_release_group(
musicbrainz_releasegroupid=self.musicbrainz_releasegroupid, musicbrainz_releasegroupid=self.musicbrainz_releasegroupid,
artist_ids=[artist.musicbrainz_artistid for artist in self.artists], artist_ids=[artist.musicbrainz_artistid for artist in self.artists],
albumartist = self.albumartist, albumartist=self.albumartist,
albumsort = self.albumsort, albumsort=self.albumsort,
musicbrainz_albumtype = self.musicbrainz_albumtype, musicbrainz_albumtype=self.musicbrainz_albumtype,
compilation=self.compilation compilation=self.compilation,
album_artist_id=self.album_artist_id
) )
def append_artist(self, artist_id: str) -> Artist: def append_artist(self, artist_id: str) -> Artist:
@ -176,29 +177,41 @@ class Release:
self.title = get_elem_from_obj(release_data, ['title']) self.title = get_elem_from_obj(release_data, ['title'])
self.copyright = get_elem_from_obj(label_data, [0, 'label', 'name']) self.copyright = get_elem_from_obj(label_data, [0, 'label', 'name'])
self.album_status = get_elem_from_obj(release_data, ['status'])
self.language = get_elem_from_obj(release_data, ['text-representation', 'language'])
self.year = get_elem_from_obj(release_data, ['date'], lambda x: x.split("-")[0])
self.date = get_elem_from_obj(release_data, ['date'])
self.country = get_elem_from_obj(release_data, ['country'])
self.barcode = get_elem_from_obj(release_data, ['barcode'])
self.save() self.save()
self.append_recordings(recording_datas) self.append_recordings(recording_datas)
def __str__(self): def __str__(self):
return f"{self.title} ©{self.copyright}" return f"{self.title} ©{self.copyright} {self.album_status}"
def save(self): def save(self):
logging.info(f"caching release {self}") logging.info(f"caching release {self}")
database.add_release( database.add_release(
musicbrainz_albumid=self.musicbrainz_albumid, musicbrainz_albumid=self.musicbrainz_albumid,
release_group_id=self.release_group.musicbrainz_releasegroupid, release_group_id=self.release_group.musicbrainz_releasegroupid,
title=self.title, title=self.title,
copyright_=self.copyright copyright_=self.copyright,
album_status=self.album_status,
language=self.language,
year=self.year,
date=self.date,
country=self.country,
barcode=self.barcode
) )
def append_recordings(self, recording_datas: dict): def append_recordings(self, recording_datas: dict):
for recording_data in recording_datas: for recording_data in recording_datas:
musicbrainz_releasetrackid = get_elem_from_obj(recording_data, ['id']) musicbrainz_releasetrackid = get_elem_from_obj(recording_data, ['recording', 'id'])
if musicbrainz_releasetrackid is None: if musicbrainz_releasetrackid is None:
continue continue
self.tracklist.append(musicbrainz_releasetrackid) self.tracklist.append(Track(musicbrainz_releasetrackid, self))
class Track: class Track:
@ -214,12 +227,42 @@ class Track:
self.musicbrainz_releasetrackid = musicbrainz_releasetrackid self.musicbrainz_releasetrackid = musicbrainz_releasetrackid
self.release = release self.release = release
self.artists = []
result = musicbrainzngs.get_recording_by_id(self.musicbrainz_releasetrackid, includes=["artists", "releases", "recording-rels", "isrcs", "work-level-rels"])
recording_data = result['recording']
for artist_data in get_elem_from_obj(recording_data, ['artist-credit'], return_if_none=[]):
self.append_artist(get_elem_from_obj(artist_data, ['artist', 'id']))
self.isrc = get_elem_from_obj(recording_data, ['isrc-list', 0])
self.title = recording_data['title']
self.save()
def __str__(self): def __str__(self):
return "this is a track" return f"{self.title}: {self.isrc}"
def save(self): def save(self):
logging.info("caching track {self}") logging.info(f"caching track {self}")
database.add_track(
musicbrainz_releasetrackid=self.musicbrainz_releasetrackid,
musicbrainz_albumid=self.release.musicbrainz_albumid,
feature_aritsts=[artist.musicbrainz_artistid for artist in self.artists],
track=self.title,
isrc=self.isrc
)
def append_artist(self, artist_id: str) -> Artist:
if artist_id is None:
return
for existing_artist in self.artists:
if artist_id == existing_artist.musicbrainz_artistid:
return existing_artist
new_artist = Artist(artist_id, new_release_groups=False)
self.artists.append(new_artist)
return new_artist
def download(option: dict): def download(option: dict):
@ -469,7 +512,9 @@ if __name__ == "__main__":
if not os.path.exists(TEMP_DIR): if not os.path.exists(TEMP_DIR):
os.mkdir(TEMP_DIR) os.mkdir(TEMP_DIR)
""" """
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
download({'id': '5cfecbe4-f600-45e5-9038-ce820eedf3d1', 'type': 'artist'}) download({'id': '5cfecbe4-f600-45e5-9038-ce820eedf3d1', 'type': 'artist'})
# download({'id': '4b9af532-ef7e-42ab-8b26-c466327cb5e0', 'type': 'release'}) # download({'id': '4b9af532-ef7e-42ab-8b26-c466327cb5e0', 'type': 'release'})