moved linguistics stuff to another file
This commit is contained in:
parent
966f0c9602
commit
137b18517f
@ -4,3 +4,4 @@ mutagen~=1.46.0
|
||||
musicbrainzngs~=0.7.1
|
||||
jellyfish~=0.9.0
|
||||
pydub~=0.25.1
|
||||
youtube_dl
|
BIN
src/__pycache__/phonetic_compares.cpython-310.pyc
Normal file
BIN
src/__pycache__/phonetic_compares.cpython-310.pyc
Normal file
Binary file not shown.
@ -1,2 +1,8 @@
|
||||
import jellyfish
|
||||
|
||||
TITLE_THRESHOLD_LEVENSHTEIN = 1
|
||||
|
||||
|
||||
def match_titles(title_1: str, title_2: str) -> (bool, int):
|
||||
distance = jellyfish.levenshtein_distance(title_1, title_2)
|
||||
return distance > 1, distance
|
||||
|
@ -1,9 +1,10 @@
|
||||
import youtube_dl
|
||||
import pandas as pd
|
||||
import jellyfish
|
||||
import logging
|
||||
import time
|
||||
|
||||
import phonetic_compares
|
||||
|
||||
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
|
||||
YOUTUBE_URL_KEY = 'webpage_url'
|
||||
WAIT_BETWEEN_BLOCK = 10
|
||||
@ -31,12 +32,11 @@ def get_youtube_url(row):
|
||||
result = get_youtube_from_isrc(row['isrc'])
|
||||
video_title = result['title'].lower()
|
||||
|
||||
phonetic_distance = jellyfish.levenshtein_distance(real_title, video_title)
|
||||
match, distance = phonetic_compares.match_titles(video_title, real_title)
|
||||
|
||||
print(real_title, video_title, phonetic_distance)
|
||||
if phonetic_distance > 1:
|
||||
if match:
|
||||
logging.warning(
|
||||
f"dont downloading {result['url']} cuz the phonetic distance ({phonetic_distance}) between {real_title} and {video_title} is to high.")
|
||||
f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
|
||||
return None
|
||||
return result['url']
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user