From 6b79d6a6ae5d16558dcf2021827c5ff63966212e Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 29 Nov 2022 14:49:56 +0100 Subject: [PATCH] added checking against lenth for youtube videos --- src/goof.py | 6 +++++- src/music_kraken/audio_source/sources/youtube.py | 12 +++++++++--- src/music_kraken/utils/phonetic_compares.py | 5 +++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/goof.py b/src/goof.py index 76d11d5..a0d2899 100644 --- a/src/goof.py +++ b/src/goof.py @@ -4,10 +4,14 @@ print(mk.__file__) mk.clear_cache() song_list = mk.cache.get_custom_track([]) print(mk.cache, len(song_list)) -#recording/694bfd3c-9d2d-4d67-9bfc-cee5bf77166e id_="694bfd3c-9d2d-4d67-9bfc-cee5bf77166e" +id_="5cc28584-10c6-40e2-b6d4-6891e7e7c575" mk.fetch_metadata(id_=id_, type_="recording") song = mk.cache.get_track_metadata(musicbrainz_releasetrackid=id_) print(song) print(song.length) +mk.set_targets(genre="test") + +song = mk.cache.get_track_metadata(musicbrainz_releasetrackid=id_) +mk.fetch_sources([song]) diff --git a/src/music_kraken/audio_source/sources/youtube.py b/src/music_kraken/audio_source/sources/youtube.py index f965530..f0051d3 100644 --- a/src/music_kraken/audio_source/sources/youtube.py +++ b/src/music_kraken/audio_source/sources/youtube.py @@ -18,6 +18,9 @@ YOUTUBE_TITLE_KEY = 'title' WAIT_BETWEEN_BLOCK = 10 MAX_TRIES = 3 +def youtube_length_to_mp3_length(youtube_len: float) -> int: + return int(youtube_len * 1000) + class Youtube(AudioSource): @classmethod @@ -31,7 +34,8 @@ class Youtube(AudioSource): return [{ 'url': video[YOUTUBE_URL_KEY], - 'title': video[YOUTUBE_TITLE_KEY] + 'title': video[YOUTUBE_TITLE_KEY], + 'length': youtube_length_to_mp3_length(float(videos[0]['duration'])) } for video in videos] @classmethod @@ -51,8 +55,10 @@ class Youtube(AudioSource): match, distance = phonetic_compares.match_titles(video_title, real_title) if match: - # logger.warning( - # f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.") + continue + + if not phonetic_compares.match_length(song.length, result['length']): + logger.warning(f"{song.length} doesn't match with {result}") continue final_result = result diff --git a/src/music_kraken/utils/phonetic_compares.py b/src/music_kraken/utils/phonetic_compares.py index 45a77d3..159363d 100644 --- a/src/music_kraken/utils/phonetic_compares.py +++ b/src/music_kraken/utils/phonetic_compares.py @@ -4,6 +4,8 @@ import string TITLE_THRESHOLD_LEVENSHTEIN = 1 UNIFY_TO = " " +ALLOWED_LENGTH_DISTANCE = 20 + def unify_punctuation(to_unify: str) -> str: for char in string.punctuation: @@ -46,3 +48,6 @@ def match_artists(artist_1, artist_2: str): distances.append(distance) return True, min(distances) return match_titles(artist_1, artist_2) + +def match_length(length_1: int, length_2: int) -> bool: + return abs(length_1 - length_2) <= ALLOWED_LENGTH_DISTANCE