implemented the prev changes in the downloader

This commit is contained in:
lars 2022-11-03 17:05:52 +01:00
parent 9f05d43022
commit a59244e82d
6 changed files with 38 additions and 34 deletions

View File

@ -1,12 +1,11 @@
import mutagen.id3 import mutagen.id3
import requests import requests
import os.path import os.path
import pandas as pd
from mutagen.easyid3 import EasyID3 from mutagen.easyid3 import EasyID3
from pydub import AudioSegment from pydub import AudioSegment
import json
import logging import logging
from metadata import database
import musify import musify
import youtube_music import youtube_music
@ -21,6 +20,10 @@ print(EasyID3.valid_keys.keys())
def write_metadata(row, file_path): def write_metadata(row, file_path):
if not os.path.exists(file_path):
logging.warning("something went really wrong")
return False
# only convert the file to the proper format if mutagen doesn't work with it due to time # only convert the file to the proper format if mutagen doesn't work with it due to time
try: try:
audiofile = EasyID3(file_path) audiofile = EasyID3(file_path)
@ -31,10 +34,12 @@ def write_metadata(row, file_path):
valid_keys = list(EasyID3.valid_keys.keys()) valid_keys = list(EasyID3.valid_keys.keys())
for key in list(row.keys()): for key in list(row.keys()):
if type(row[key]) == list or key in valid_keys and not pd.isna(row[key]): if key in valid_keys and row[key] is not None:
if type(row[key]) == int or type(row[key]) == float: if type(row[key]) != list:
row[key] = str(row[key]) row[key] = str(row[key])
audiofile[key] = row[key] audiofile[key] = row[key]
else:
logging.warning(key)
logging.info("saving") logging.info("saving")
audiofile.save(file_path, v1=2) audiofile.save(file_path, v1=2)
@ -50,18 +55,12 @@ def path_stuff(path: str, file_: str):
class Download: class Download:
def __init__(self, proxies: dict = None, file: str = ".cache3.csv", temp: str = "temp", def __init__(self, proxies: dict = None, base_path: str = ""):
base_path: str = ""):
if proxies is not None: if proxies is not None:
musify.set_proxy(proxies) musify.set_proxy(proxies)
self.temp = temp for row in database.get_tracks_to_download():
self.file = file row['artist'] = [i['name'] for i in row['artists']]
self.dataframe = pd.read_csv(os.path.join(self.temp, self.file), index_col=0)
for idx, row in self.dataframe.iterrows():
row['artist'] = json.loads(row['artist'].replace("'", '"'))
row['file'] = os.path.join(base_path, row['file']) row['file'] = os.path.join(base_path, row['file'])
row['path'] = os.path.join(base_path, row['path']) row['path'] = os.path.join(base_path, row['path'])

View File

@ -1,3 +1,4 @@
import metadata.download
import metadata.metadata import metadata.metadata
import download_links import download_links
import url_to_path import url_to_path
@ -14,7 +15,7 @@ STEP_THREE_CACHE = ".cache3.csv"
NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea" NOT_A_GENRE = ".", "..", "misc_scripts", "Music", "script", ".git", ".idea"
MUSIC_DIR = os.path.expanduser('~/Music') MUSIC_DIR = os.path.expanduser('~/Music')
TOR = True TOR = False
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -39,7 +40,7 @@ def search_for_metadata(query: str):
if input_ == "q": if input_ == "q":
exit(0) exit(0)
if input_ == "ok": if input_ == "ok":
return search return search.current_chosen_option
if input_ == ".": if input_ == ".":
print(search.options) print(search.options)
continue continue
@ -84,20 +85,21 @@ def cli(start_at: int = 0):
if start_at <= 0: if start_at <= 0:
search = search_for_metadata(query=input("initial query: ")) search = search_for_metadata(query=input("initial query: "))
logging.info("Starting Downloading of metadata") logging.info("Starting Downloading of metadata")
search.download(file=STEP_ONE_CACHE) metadata.download.download(search)
if start_at <= 1: if start_at <= 1:
logging.info("Fetching Download Links") logging.info("Fetching Download Links")
download_links.Download(file=STEP_TWO_CACHE, metadata_csv=STEP_ONE_CACHE, temp=TEMP, proxies=proxies) download_links.Download(proxies=proxies)
if start_at <= 2: if start_at <= 2:
logging.info("creating Paths") logging.info("creating Paths")
print(genre)
url_to_path.UrlPath(genre=genre) url_to_path.UrlPath(genre=genre)
if start_at <= 3: if start_at <= 3:
logging.info("starting to download the mp3's") logging.info("starting to download the mp3's")
download.Download(proxies=proxies, file=STEP_THREE_CACHE, temp=TEMP, base_path=MUSIC_DIR) download.Download(proxies=proxies, base_path=MUSIC_DIR)
if __name__ == "__main__": if __name__ == "__main__":
cli(start_at=0) cli(start_at=2)

View File

@ -14,8 +14,8 @@ def get_temp_dir():
return temp_dir return temp_dir
DATABASE_STRUCTURE_FILE = "database_structure.sql" # DATABASE_STRUCTURE_FILE = "database_structure.sql"
# DATABASE_STRUCTURE_FILE = "src/metadata/database_structure.sql" DATABASE_STRUCTURE_FILE = "src/metadata/database_structure.sql"
TEMP_DIR = get_temp_dir() TEMP_DIR = get_temp_dir()
DATABASE_FILE = "metadata.db" DATABASE_FILE = "metadata.db"
db_path = os.path.join(TEMP_DIR, DATABASE_FILE) db_path = os.path.join(TEMP_DIR, DATABASE_FILE)
@ -195,12 +195,13 @@ def get_track_metadata(musicbrainz_releasetrackid: str):
def get_tracks_to_download(): def get_tracks_to_download():
return get_custom_track(["track.downloaded == 0"]) return get_custom_track(['track.downloaded == 0'])
def get_tracks_without_src(): def get_tracks_without_src():
return get_custom_track(["(track.url IS NULL OR track.src IS NULL)"]) return get_custom_track(["(track.url IS NULL OR track.src IS NULL)"])
def get_tracks_without_isrc(): def get_tracks_without_isrc():
return get_custom_track(["track.isrc IS NULL"]) return get_custom_track(["track.isrc IS NULL"])

View File

@ -3,8 +3,8 @@ import musicbrainzngs
import pandas as pd import pandas as pd
import logging import logging
from object_handeling import get_elem_from_obj, parse_music_brainz_date from metadata.object_handeling import get_elem_from_obj, parse_music_brainz_date
import database from metadata import database
# I don't know if it would be feesable to set up my own mb instance # I don't know if it would be feesable to set up my own mb instance
# https://github.com/metabrainz/musicbrainz-docker # https://github.com/metabrainz/musicbrainz-docker

View File

@ -1,8 +1,7 @@
import logging import logging
import musicbrainzngs import musicbrainzngs
import options from metadata import options
from object_handeling import get_elem_from_obj
mb_log = logging.getLogger("musicbrainzngs") mb_log = logging.getLogger("musicbrainzngs")
mb_log.setLevel(logging.WARNING) mb_log.setLevel(logging.WARNING)
@ -58,12 +57,13 @@ class Search:
if not self.current_options.choose(index): if not self.current_options.choose(index):
return self.current_options return self.current_options
self.current_chosen_option = self.current_options.get_current_option() self.current_chosen_option = self.current_options.get_current_option(komplex=True)
kind = self.current_chosen_option['kind'] kind = self.current_chosen_option['type']
if kind == 'artist': if kind == 'artist':
return self.browse_artist(self.current_chosen_option, limit=limit) return self.browse_artist(self.current_chosen_option, limit=limit)
if kind == 'release': if kind == 'release':
release_limit = limit if not ignore_limit_for_tracklist else 100 release_limit = limit if not ignore_limit_for_tracklist else 100
release_limit = 100
return self.browse_release(self.current_chosen_option, limit=release_limit) return self.browse_release(self.current_chosen_option, limit=release_limit)
if kind == 'track': if kind == 'track':
track_limit = limit if not ignore_limit_for_tracklist else 100 track_limit = limit if not ignore_limit_for_tracklist else 100

View File

@ -24,7 +24,7 @@ def get_string_for_tracks(tracks: dict) -> str:
def get_string_for_option(option: dict) -> str: def get_string_for_option(option: dict) -> str:
kind = option['kind'] kind = option['type']
if kind == "artist": if kind == "artist":
return get_string_for_artist(option) return get_string_for_artist(option)
if kind == "release": if kind == "release":
@ -37,6 +37,7 @@ def get_string_for_option(option: dict) -> str:
class Options: class Options:
def __init__(self, results: list): def __init__(self, results: list):
self.results = results self.results = results
print(results)
self.artist_count = 0 self.artist_count = 0
self.release_count = 0 self.release_count = 0
@ -56,7 +57,7 @@ class Options:
komplex_information = self.result_list[self.current_option_ind] komplex_information = self.result_list[self.current_option_ind]
return { return {
'id': komplex_information['id'], 'id': komplex_information['id'],
'type': komplex_information['kind'] 'type': komplex_information['type']
} }
def choose(self, index: int) -> bool: def choose(self, index: int) -> bool:
@ -68,7 +69,8 @@ class Options:
def __str__(self) -> str: def __str__(self) -> str:
string = f"artists: {self.artist_count}; releases {self.release_count}; tracks {self.track_count}\n" string = f"artists: {self.artist_count}; releases {self.release_count}; tracks {self.track_count}\n"
for i, option in enumerate(self.result_list): for i, option in enumerate(self.result_list):
string += f"{i})\t{option['kind']}:\t" + get_string_for_option(option) print(option)
string += f"{i})\t{option['type']}:\t" + get_string_for_option(option)
return string return string
def set_options_values(self): def set_options_values(self):
@ -86,19 +88,19 @@ class Options:
def set_artist_values(self, option_set: dict): def set_artist_values(self, option_set: dict):
self.artist_count += option_set['artist-count'] self.artist_count += option_set['artist-count']
for artist in option_set['artist-list']: for artist in option_set['artist-list']:
artist['kind'] = "artist" artist['type'] = "artist"
self.result_list.append(artist) self.result_list.append(artist)
def set_release_values(self, option_set: dict): def set_release_values(self, option_set: dict):
self.release_count += option_set['release-count'] self.release_count += option_set['release-count']
for release in option_set['release-list']: for release in option_set['release-list']:
release['kind'] = "release" release['type'] = "release"
self.result_list.append(release) self.result_list.append(release)
def set_track_values(self, option_set: dict): def set_track_values(self, option_set: dict):
self.track_count += option_set['recording-count'] self.track_count += option_set['recording-count']
for track in option_set['recording-list']: for track in option_set['recording-list']:
track['kind'] = "track" track['type'] = "track"
self.result_list.append(track) self.result_list.append(track)
""" """