diff --git a/src/download.py b/src/download.py index 08a66ee..6f1d185 100644 --- a/src/download.py +++ b/src/download.py @@ -32,7 +32,6 @@ def write_metadata(row, file_path): for key in list(row.keys()): if type(row[key]) == list or key in valid_keys and not pd.isna(row[key]): - # print(key) if type(row[key]) == int or type(row[key]) == float: row[key] = str(row[key]) audiofile[key] = row[key] @@ -72,11 +71,17 @@ class Download: write_metadata(row, row['file']) continue + download_success = None src = row['src'] if src == 'musify': - musify.download(row) + download_success = musify.download(row) elif src == 'youtube': - youtube_music.download(row) + download_success = youtube_music.download(row) + + if download_success == -1: + logging.warning(f"couldn't download {row.url} from {row.src}") + continue + write_metadata(row, row['file']) diff --git a/src/download_links.py b/src/download_links.py index 7a272e9..78e7178 100644 --- a/src/download_links.py +++ b/src/download_links.py @@ -18,20 +18,20 @@ class Download: for idx, row in self.metadata.iterrows(): row['artist'] = json.loads(row['artist'].replace("'", '"')) - + # check musify musify_url = musify.get_musify_url(row) if musify_url is not None: self.add_url(musify_url, 'musify', dict(row)) continue - # check youtube + # check YouTube youtube_url = youtube_music.get_youtube_url(row) if youtube_url is not None: self.add_url(youtube_url, 'youtube', dict(row)) continue - # check musify again, but with a diffrent methode that takes longer + # check musify again, but with a different methode that takes longer musify_url = musify.get_musify_url_slow(row) if musify_url is not None: self.add_url(musify_url, 'musify', dict(row)) @@ -40,7 +40,6 @@ class Download: logging.warning(f"Didn't find any sources for {row['title']}") self.dump_urls(file) - def add_url(self, url: str, src: str, row: dict): row['url'] = url @@ -48,7 +47,6 @@ class Download: self.urls.append(row) - def dump_urls(self, file: str = ".cache2.csv"): df = pd.DataFrame(self.urls) df.to_csv(os.path.join(self.temp, file)) diff --git a/src/musify.py b/src/musify.py index c52bd71..795d067 100644 --- a/src/musify.py +++ b/src/musify.py @@ -35,8 +35,6 @@ def get_download_link(default_url): musify_id = file_.split("-")[-1] musify_name = "-".join(file_.split("-")[:-1]) - logging.info(f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3") - return f"https://musify.club/track/dl/{musify_id}/{musify_name}.mp3" @@ -45,7 +43,10 @@ def download_from_musify(file, url): r = session.get(url) if r.status_code != 200: if r.status_code == 404: - logging.warning(f"{url} was not found") + logging.warning(f"{r.url} was not found") + return -1 + if r.status_code == 503: + logging.warning(f"{r.url} raised an internal server error") return -1 raise ConnectionError(f"\"{url}\" returned {r.status_code}: {r.text}") with open(file, "wb") as mp3_file: @@ -61,12 +62,13 @@ def download(row): def get_soup_of_search(query: str): url = f"https://musify.club/search?searchText={query}" - print(url) + logging.debug(f"Trying to get soup from {url}") r = session.get(url) if r.status_code != 200: raise ConnectionError(f"{r.url} returned {r.status_code}:\n{r.content}") return bs4.BeautifulSoup(r.content, features="html.parser") + def search_for_track(row): track = row.title artist = row.artist diff --git a/src/phonetic_compares.py b/src/phonetic_compares.py index a5bd28f..facb851 100644 --- a/src/phonetic_compares.py +++ b/src/phonetic_compares.py @@ -13,7 +13,6 @@ def match_artists(artist_1, artist_2: str) -> (bool, int): distances = [] for artist_1_ in artist_1: - print(artist_1_) match, distance = match_titles(artist_1_, artist_2) if not match: return match, distance