gitignore

This commit is contained in:
Lars Noack
2022-10-18 18:48:24 +02:00
parent 1c580dad9c
commit 5a72e1843d
41 changed files with 110 additions and 106 deletions

Binary file not shown.

Binary file not shown.

View File

@@ -1,9 +1,77 @@
import requests
import os.path
import pandas as pd
from mutagen.easyid3 import EasyID3
import json
import logging
"""
https://en.wikipedia.org/wiki/ID3
https://mutagen.readthedocs.io/en/latest/user/id3.html
>>> from mutagen.easyid3 import EasyID3
>>> print(EasyID3.valid_keys.keys())
dict_keys(
[
'album',
'bpm',
'compilation',
'composer',
'copyright',
'encodedby',
'lyricist',
'length',
'media',
'mood',
'grouping',
'title',
'version',
'artist',
'albumartist',
'conductor',
'arranger',
'discnumber',
'organization',
'tracknumber',
'author',
'albumartistsort',
'albumsort',
'composersort',
'artistsort',
'titlesort',
'isrc',
'discsubtitle',
'language',
'genre',
'date',
'originaldate',
'performer:*',
'musicbrainz_trackid',
'website',
'replaygain_*_gain',
'replaygain_*_peak',
'musicbrainz_artistid',
'musicbrainz_albumid',
'musicbrainz_albumartistid',
'musicbrainz_trmid',
'musicip_puid',
'musicip_fingerprint',
'musicbrainz_albumstatus',
'musicbrainz_albumtype',
'releasecountry',
'musicbrainz_discid',
'asin',
'performer',
'barcode',
'catalognumber',
'musicbrainz_releasetrackid',
'musicbrainz_releasegroupid',
'musicbrainz_workid',
'acoustid_fingerprint',
'acoustid_id'
])
"""
class Download:
def __init__(self, session: requests.Session = requests.Session(), file: str = ".cache3.csv", temp: str = "temp"):
@@ -18,14 +86,17 @@ class Download:
self.dataframe = pd.read_csv(os.path.join(self.temp, self.file), index_col=0)
for idx, row in self.dataframe.iterrows():
row['artist'] = json.loads(row['artist'].replace("'", '"'))
self.download(row['path'], row['file'], row['url'])
self.write_metadata(row, row['file'])
def download(self, path, file, url):
if os.path.exists(file):
logging.info(f"'{file}' does already exist, thus not downloading.")
return
os.makedirs(path, exist_ok=True)
logging.info(f"downloading: {url}")
logging.info(f"downloading: '{url}'")
r = self.session.get(url)
if r.status_code != 200:
if r.status_code == 404:
@@ -36,99 +107,22 @@ class Download:
mp3_file.write(r.content)
logging.info("finished")
"""
class Track:
def __init__(self, url: str, release: Release, track_name: str, track_artists: list = None,
session: requests.Session = requests.Session()):
self.session = session
self.url = url
parsed_url = urllib.parse.urlparse(url)
path = os.path.normpath(parsed_url.path)
split_path = path.split(os.sep)
url_type = split_path[1]
if url_type != "track":
raise Exception(f'"{url}" is supposed to link a track.')
name = split_path[2]
name = name.split("-")
self.id = name[-1]
self.name = "-".join(name[:-1])
self.track_artists = track_artists
self.release = release
self.pretty_track = track_name
self.mp3_url = self.get_mp3_url()
def __str__(self):
def fetch(self):
return
def get_mp3_url(self):
# https://musify.club/track/dl/17254894/ghost-bath-convince-me-to-bleed.mp3
return f"https://musify.club/track/dl/{self.id}/{self.name}.mp3"
def add_album_art(self, path):
img = self.release.raw_artwork
audio = EasyMP3(path, ID3=ID3)
try:
audio.add_tags()
except _util.error:
pass
audio.tags.add(
APIC(
encoding=3, # UTF-8
mime='image/png',
type=3, # 3 is for album art
desc='Cover',
data=img.read() # Reads and adds album art
)
)
audio.save()
def download(self):
download_path = os.path.join(self.release.path, self.name + ".mp3")
# download only when the file doesn't exist yet
if not os.path.exists(download_path):
logging.info(f"downloading: {self.mp3_url}")
r = requests.get(self.mp3_url, proxies=proxy)
if r.status_code != 200:
if r.status_code == 404:
logging.warning(f"{self.mp3_url} was not found")
return -1
raise ConnectionError(f"\"{self.mp3_url}\" returned {r.status_code}: {r.text}")
with open(download_path, "wb") as mp3_file:
mp3_file.write(r.content)
logging.info("finished")
audiofile = EasyID3(download_path)
if self.track_artists is not None:
audiofile["artist"] = self.track_artists
else:
audiofile["artist"] = self.release.pretty_release
audiofile["albumartist"] = self.release.artist.pretty_name
audiofile["date"] = self.release.year
if self.release.genre is not None:
audiofile["genre"] = self.release.genre
audiofile["title"] = self.pretty_track
audiofile["album"] = self.release.pretty_release
def write_metadata(self, row, file):
audiofile = EasyID3(file)
audiofile["artist"] = row['artist']
audiofile["albumartist"] = row['album_artist']
audiofile["date"] = str(row['year'])
audiofile["genre"] = row['genre']
audiofile["title"] = row['title']
audiofile["album"] = row['album']
audiofile["tracknumber"] = str(row['track'])
audiofile.save()
# self.add_album_art(download_path)
"""
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
proxies = {
'http': 'socks5h://127.0.0.1:9150',
'https': 'socks5h://127.0.0.1:9150'

View File

@@ -1,11 +1,14 @@
import metadata
import download_links
import url_to_path
import download
import logging
import requests
TEMP = "temp"
STEP_ONE_CACHE = ".cache1.csv"
STEP_TWO_CACHE = ".cache2.csv"
STEP_THREE_CACHE = ".cache3.csv"
logging.basicConfig(level=logging.INFO)
@@ -34,16 +37,25 @@ def search_for_metadata(query: str):
def cli():
session = requests.Session()
session.proxies = {
'http': 'socks5h://127.0.0.1:9150',
'https': 'socks5h://127.0.0.1:9150'
}
search = search_for_metadata(query=input("initial query: "))
logging.info("Starting Downloading of metadata")
search.download(file=STEP_ONE_CACHE)
logging.info("Fetching Download Links")
download_links.Download(file=STEP_TWO_CACHE, metadata_csv=STEP_ONE_CACHE, temp=TEMP)
download_links.Download(file=STEP_TWO_CACHE, metadata_csv=STEP_ONE_CACHE, temp=TEMP, session=session)
logging.info("creating Paths")
url_to_path.UrlPath("dsbm")
logging.info("starting to download the mp3's")
download.Download(session=session, file=STEP_THREE_CACHE, temp=TEMP)
if __name__ == "__main__":
cli()

View File

@@ -1,4 +1,5 @@
import os.path
from select import select
import musicbrainzngs
import pandas as pd
@@ -82,17 +83,8 @@ class Search:
def download_track(self, mb_id, is_various_artist: bool = None, track: int = None, total_tracks: int = None):
"""
Title
Artist
Album:
Album artist
Composer
Genre
Track number <> of <>
Disc number <> of <>
Year
BPM
Comment
album
bpm
Album Art
"""
@@ -100,7 +92,8 @@ class Search:
aliases, tags, user-tags, ratings, user-ratings, area-rels, artist-rels, label-rels, place-rels, event-rels,
recording-rels, release-rels, release-group-rels, series-rels, url-rels, work-rels, instrument-rels """
result = musicbrainzngs.get_recording_by_id(mb_id, includes=["artists", "releases"])
result = musicbrainzngs.get_recording_by_id(mb_id, includes=["artists", "releases", "recording-rels"])
print(result)
recording_data = result['recording']
release_data = recording_data['release-list'][0]
@@ -109,7 +102,7 @@ class Search:
artist_ids = [artist_['artist']['id'] for artist_ in recording_data['artist-credit']]
def get_additional_release_info(mb_id_):
r = musicbrainzngs.get_release_by_id(mb_id_, includes=["artists", "recordings"])
r = musicbrainzngs.get_release_by_id(mb_id_, includes=["artists", "recordings", "recording-rels"])
is_various_artist_ = len(r['release']['artist-credit']) > 1
tracklist = r['release']['medium-list'][0]['track-list']
track_count_ = len(tracklist)
@@ -322,10 +315,10 @@ def interactive_demo():
if __name__ == "__main__":
interactive_demo()
# interactive_demo()
# automated_demo()
# search = Search(query="psychonaut 4")
search = Search(query="psychonaut 4")
# search.download_release("27f00fb8-983c-4d5c-950f-51418aac55dc")
# for track_ in search.download_artist("c0c720b5-012f-4204-a472-981403f37b12"):
# print(track_)
# search.download_track("83a30323-aee1-401a-b767-b3c1bdd026c0")
search.download_track("83a30323-aee1-401a-b767-b3c1bdd026c0")