moved linguistics stuff to another file

This commit is contained in:
lars 2022-10-24 13:23:07 +02:00
parent 966f0c9602
commit 137b18517f
4 changed files with 13 additions and 6 deletions

View File

@ -3,4 +3,5 @@ pandas~=1.5.0
mutagen~=1.46.0
musicbrainzngs~=0.7.1
jellyfish~=0.9.0
pydub~=0.25.1
pydub~=0.25.1
youtube_dl

Binary file not shown.

View File

@ -1,2 +1,8 @@
import jellyfish
TITLE_THRESHOLD_LEVENSHTEIN = 1
def match_titles(title_1: str, title_2: str) -> (bool, int):
distance = jellyfish.levenshtein_distance(title_1, title_2)
return distance > 1, distance

View File

@ -1,9 +1,10 @@
import youtube_dl
import pandas as pd
import jellyfish
import logging
import time
import phonetic_compares
YDL_OPTIONS = {'format': 'bestaudio', 'noplaylist': 'True'}
YOUTUBE_URL_KEY = 'webpage_url'
WAIT_BETWEEN_BLOCK = 10
@ -31,12 +32,11 @@ def get_youtube_url(row):
result = get_youtube_from_isrc(row['isrc'])
video_title = result['title'].lower()
phonetic_distance = jellyfish.levenshtein_distance(real_title, video_title)
match, distance = phonetic_compares.match_titles(video_title, real_title)
print(real_title, video_title, phonetic_distance)
if phonetic_distance > 1:
if match:
logging.warning(
f"dont downloading {result['url']} cuz the phonetic distance ({phonetic_distance}) between {real_title} and {video_title} is to high.")
f"dont downloading {result['url']} cuz the phonetic distance ({distance}) between {real_title} and {video_title} is to high.")
return None
return result['url']