gitignore

This commit is contained in:
Lars Noack 2022-10-18 18:48:24 +02:00
parent 1c580dad9c
commit 5a72e1843d
41 changed files with 110 additions and 106 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.mp3

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,9 +1,77 @@
import requests import requests
import os.path import os.path
import pandas as pd import pandas as pd
from mutagen.easyid3 import EasyID3
import json
import logging import logging
"""
https://en.wikipedia.org/wiki/ID3
https://mutagen.readthedocs.io/en/latest/user/id3.html
>>> from mutagen.easyid3 import EasyID3
>>> print(EasyID3.valid_keys.keys())
dict_keys(
[
'album',
'bpm',
'compilation',
'composer',
'copyright',
'encodedby',
'lyricist',
'length',
'media',
'mood',
'grouping',
'title',
'version',
'artist',
'albumartist',
'conductor',
'arranger',
'discnumber',
'organization',
'tracknumber',
'author',
'albumartistsort',
'albumsort',
'composersort',
'artistsort',
'titlesort',
'isrc',
'discsubtitle',
'language',
'genre',
'date',
'originaldate',
'performer:*',
'musicbrainz_trackid',
'website',
'replaygain_*_gain',
'replaygain_*_peak',
'musicbrainz_artistid',
'musicbrainz_albumid',
'musicbrainz_albumartistid',
'musicbrainz_trmid',
'musicip_puid',
'musicip_fingerprint',
'musicbrainz_albumstatus',
'musicbrainz_albumtype',
'releasecountry',
'musicbrainz_discid',
'asin',
'performer',
'barcode',
'catalognumber',
'musicbrainz_releasetrackid',
'musicbrainz_releasegroupid',
'musicbrainz_workid',
'acoustid_fingerprint',
'acoustid_id'
])
"""
class Download: class Download:
def __init__(self, session: requests.Session = requests.Session(), file: str = ".cache3.csv", temp: str = "temp"): def __init__(self, session: requests.Session = requests.Session(), file: str = ".cache3.csv", temp: str = "temp"):
@ -18,14 +86,17 @@ class Download:
self.dataframe = pd.read_csv(os.path.join(self.temp, self.file), index_col=0) self.dataframe = pd.read_csv(os.path.join(self.temp, self.file), index_col=0)
for idx, row in self.dataframe.iterrows(): for idx, row in self.dataframe.iterrows():
row['artist'] = json.loads(row['artist'].replace("'", '"'))
self.download(row['path'], row['file'], row['url']) self.download(row['path'], row['file'], row['url'])
self.write_metadata(row, row['file'])
def download(self, path, file, url): def download(self, path, file, url):
if os.path.exists(file): if os.path.exists(file):
logging.info(f"'{file}' does already exist, thus not downloading.")
return return
os.makedirs(path, exist_ok=True) os.makedirs(path, exist_ok=True)
logging.info(f"downloading: {url}") logging.info(f"downloading: '{url}'")
r = self.session.get(url) r = self.session.get(url)
if r.status_code != 200: if r.status_code != 200:
if r.status_code == 404: if r.status_code == 404:
@ -36,99 +107,22 @@ class Download:
mp3_file.write(r.content) mp3_file.write(r.content)
logging.info("finished") logging.info("finished")
def write_metadata(self, row, file):
""" audiofile = EasyID3(file)
class Track:
def __init__(self, url: str, release: Release, track_name: str, track_artists: list = None, audiofile["artist"] = row['artist']
session: requests.Session = requests.Session()): audiofile["albumartist"] = row['album_artist']
self.session = session audiofile["date"] = str(row['year'])
self.url = url audiofile["genre"] = row['genre']
audiofile["title"] = row['title']
parsed_url = urllib.parse.urlparse(url) audiofile["album"] = row['album']
path = os.path.normpath(parsed_url.path) audiofile["tracknumber"] = str(row['track'])
split_path = path.split(os.sep)
url_type = split_path[1]
if url_type != "track":
raise Exception(f'"{url}" is supposed to link a track.')
name = split_path[2]
name = name.split("-")
self.id = name[-1]
self.name = "-".join(name[:-1])
self.track_artists = track_artists
self.release = release
self.pretty_track = track_name
self.mp3_url = self.get_mp3_url()
def __str__(self):
def fetch(self):
return
def get_mp3_url(self):
# https://musify.club/track/dl/17254894/ghost-bath-convince-me-to-bleed.mp3
return f"https://musify.club/track/dl/{self.id}/{self.name}.mp3"
def add_album_art(self, path):
img = self.release.raw_artwork
audio = EasyMP3(path, ID3=ID3)
try:
audio.add_tags()
except _util.error:
pass
audio.tags.add(
APIC(
encoding=3, # UTF-8
mime='image/png',
type=3, # 3 is for album art
desc='Cover',
data=img.read() # Reads and adds album art
)
)
audio.save()
def download(self):
download_path = os.path.join(self.release.path, self.name + ".mp3")
# download only when the file doesn't exist yet
if not os.path.exists(download_path):
logging.info(f"downloading: {self.mp3_url}")
r = requests.get(self.mp3_url, proxies=proxy)
if r.status_code != 200:
if r.status_code == 404:
logging.warning(f"{self.mp3_url} was not found")
return -1
raise ConnectionError(f"\"{self.mp3_url}\" returned {r.status_code}: {r.text}")
with open(download_path, "wb") as mp3_file:
mp3_file.write(r.content)
logging.info("finished")
audiofile = EasyID3(download_path)
if self.track_artists is not None:
audiofile["artist"] = self.track_artists
else:
audiofile["artist"] = self.release.pretty_release
audiofile["albumartist"] = self.release.artist.pretty_name
audiofile["date"] = self.release.year
if self.release.genre is not None:
audiofile["genre"] = self.release.genre
audiofile["title"] = self.pretty_track
audiofile["album"] = self.release.pretty_release
audiofile.save() audiofile.save()
# self.add_album_art(download_path)
"""
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
proxies = { proxies = {
'http': 'socks5h://127.0.0.1:9150', 'http': 'socks5h://127.0.0.1:9150',
'https': 'socks5h://127.0.0.1:9150' 'https': 'socks5h://127.0.0.1:9150'

View File

@ -1,11 +1,14 @@
import metadata import metadata
import download_links import download_links
import url_to_path import url_to_path
import download
import logging import logging
import requests
TEMP = "temp" TEMP = "temp"
STEP_ONE_CACHE = ".cache1.csv" STEP_ONE_CACHE = ".cache1.csv"
STEP_TWO_CACHE = ".cache2.csv" STEP_TWO_CACHE = ".cache2.csv"
STEP_THREE_CACHE = ".cache3.csv"
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -34,16 +37,25 @@ def search_for_metadata(query: str):
def cli(): def cli():
session = requests.Session()
session.proxies = {
'http': 'socks5h://127.0.0.1:9150',
'https': 'socks5h://127.0.0.1:9150'
}
search = search_for_metadata(query=input("initial query: ")) search = search_for_metadata(query=input("initial query: "))
logging.info("Starting Downloading of metadata") logging.info("Starting Downloading of metadata")
search.download(file=STEP_ONE_CACHE) search.download(file=STEP_ONE_CACHE)
logging.info("Fetching Download Links") logging.info("Fetching Download Links")
download_links.Download(file=STEP_TWO_CACHE, metadata_csv=STEP_ONE_CACHE, temp=TEMP) download_links.Download(file=STEP_TWO_CACHE, metadata_csv=STEP_ONE_CACHE, temp=TEMP, session=session)
logging.info("creating Paths") logging.info("creating Paths")
url_to_path.UrlPath("dsbm") url_to_path.UrlPath("dsbm")
logging.info("starting to download the mp3's")
download.Download(session=session, file=STEP_THREE_CACHE, temp=TEMP)
if __name__ == "__main__": if __name__ == "__main__":
cli() cli()

View File

@ -1,4 +1,5 @@
import os.path import os.path
from select import select
import musicbrainzngs import musicbrainzngs
import pandas as pd import pandas as pd
@ -82,17 +83,8 @@ class Search:
def download_track(self, mb_id, is_various_artist: bool = None, track: int = None, total_tracks: int = None): def download_track(self, mb_id, is_various_artist: bool = None, track: int = None, total_tracks: int = None):
""" """
Title album
Artist bpm
Album:
Album artist
Composer
Genre
Track number <> of <>
Disc number <> of <>
Year
BPM
Comment
Album Art Album Art
""" """
@ -100,7 +92,8 @@ class Search:
aliases, tags, user-tags, ratings, user-ratings, area-rels, artist-rels, label-rels, place-rels, event-rels, aliases, tags, user-tags, ratings, user-ratings, area-rels, artist-rels, label-rels, place-rels, event-rels,
recording-rels, release-rels, release-group-rels, series-rels, url-rels, work-rels, instrument-rels """ recording-rels, release-rels, release-group-rels, series-rels, url-rels, work-rels, instrument-rels """
result = musicbrainzngs.get_recording_by_id(mb_id, includes=["artists", "releases"]) result = musicbrainzngs.get_recording_by_id(mb_id, includes=["artists", "releases", "recording-rels"])
print(result)
recording_data = result['recording'] recording_data = result['recording']
release_data = recording_data['release-list'][0] release_data = recording_data['release-list'][0]
@ -109,7 +102,7 @@ class Search:
artist_ids = [artist_['artist']['id'] for artist_ in recording_data['artist-credit']] artist_ids = [artist_['artist']['id'] for artist_ in recording_data['artist-credit']]
def get_additional_release_info(mb_id_): def get_additional_release_info(mb_id_):
r = musicbrainzngs.get_release_by_id(mb_id_, includes=["artists", "recordings"]) r = musicbrainzngs.get_release_by_id(mb_id_, includes=["artists", "recordings", "recording-rels"])
is_various_artist_ = len(r['release']['artist-credit']) > 1 is_various_artist_ = len(r['release']['artist-credit']) > 1
tracklist = r['release']['medium-list'][0]['track-list'] tracklist = r['release']['medium-list'][0]['track-list']
track_count_ = len(tracklist) track_count_ = len(tracklist)
@ -322,10 +315,10 @@ def interactive_demo():
if __name__ == "__main__": if __name__ == "__main__":
interactive_demo() # interactive_demo()
# automated_demo() # automated_demo()
# search = Search(query="psychonaut 4") search = Search(query="psychonaut 4")
# search.download_release("27f00fb8-983c-4d5c-950f-51418aac55dc") # search.download_release("27f00fb8-983c-4d5c-950f-51418aac55dc")
# for track_ in search.download_artist("c0c720b5-012f-4204-a472-981403f37b12"): # for track_ in search.download_artist("c0c720b5-012f-4204-a472-981403f37b12"):
# print(track_) # print(track_)
# search.download_track("83a30323-aee1-401a-b767-b3c1bdd026c0") search.download_track("83a30323-aee1-401a-b767-b3c1bdd026c0")

2
temp/.cache1.csv Normal file
View File

@ -0,0 +1,2 @@
,id,title,artist,album_artist,album,year,track,total_tracks
0,39e9dee2-6b09-4aa5-bb5b-d04fa43578db,Black Smoke Curling From the Lips of War,['Cradle of Filth'],Cradle of Filth,Existence Is Futile,2021,6,12
1 id title artist album_artist album year track total_tracks
2 0 39e9dee2-6b09-4aa5-bb5b-d04fa43578db Black Smoke Curling From the Lips of War ['Cradle of Filth'] Cradle of Filth Existence Is Futile 2021 6 12

1
temp/.cache2.csv Normal file
View File

@ -0,0 +1 @@
""
1

1
temp/.cache3.csv Normal file
View File

@ -0,0 +1 @@
""
1