moved linguistics stuff to another file
This commit is contained in:
parent
966f0c9602
commit
137b18517f
@ -3,4 +3,5 @@ pandas~=1.5.0
|
|||||||
mutagen~=1.46.0
|
mutagen~=1.46.0
|
||||||
musicbrainzngs~=0.7.1
|
musicbrainzngs~=0.7.1
|
||||||
jellyfish~=0.9.0
|
jellyfish~=0.9.0
|
||||||
pydub~=0.25.1
|
pydub~=0.25.1
|
||||||
|
youtube_dl
|
BIN
src/__pycache__/phonetic_compares.cpython-310.pyc
Normal file
BIN
src/__pycache__/phonetic_compares.cpython-310.pyc
Normal file
Binary file not shown.
@ -1,2 +1,8 @@
|
|||||||
import jellyfish
|
import jellyfish
|
||||||
|
|
||||||
|
TITLE_THRESHOLD_LEVENSHTEIN = 1
|
||||||
|
|
||||||
|
|
||||||
|
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
||||||
|
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||||
|
return distance > 1, distance
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
import youtube_dl
|
import youtube_dl
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import jellyfish
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
import phonetic_compares
|
||||||
|
|
||||||
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
|
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
|
||||||
YOUTUBE_URL_KEY = 'webpage_url'
|
YOUTUBE_URL_KEY = 'webpage_url'
|
||||||
WAIT_BETWEEN_BLOCK = 10
|
WAIT_BETWEEN_BLOCK = 10
|
||||||
@ -31,12 +32,11 @@ def get_youtube_url(row):
|
|||||||
result = get_youtube_from_isrc(row['isrc'])
|
result = get_youtube_from_isrc(row['isrc'])
|
||||||
video_title = result['title'].lower()
|
video_title = result['title'].lower()
|
||||||
|
|
||||||
phonetic_distance = jellyfish.levenshtein_distance(real_title, video_title)
|
match, distance = phonetic_compares.match_titles(video_title, real_title)
|
||||||
|
|
||||||
print(real_title, video_title, phonetic_distance)
|
if match:
|
||||||
if phonetic_distance > 1:
|
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"dont downloading {result['url']} cuz the phonetic distance ({phonetic_distance}) between {real_title} and {video_title} is to high.")
|
f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
|
||||||
return None
|
return None
|
||||||
return result['url']
|
return result['url']
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user