startet genius lyrics fetcher
This commit is contained in:
parent
3bc133ef62
commit
9bb510e41a
@ -1,5 +1,4 @@
|
||||
requests~=2.28.1
|
||||
pandas~=1.5.0
|
||||
mutagen~=1.46.0
|
||||
musicbrainzngs~=0.7.1
|
||||
jellyfish~=0.9.0
|
||||
|
121
src/lyrics.py
121
src/lyrics.py
@ -1,121 +0,0 @@
|
||||
from mutagen.id3 import ID3, USLT
|
||||
|
||||
"""
|
||||
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
|
||||
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
|
||||
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
|
||||
|
||||
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
|
||||
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
|
||||
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
|
||||
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
|
||||
"""
|
||||
|
||||
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
||||
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
||||
|
||||
MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3"
|
||||
LYRICS_BREAKING_DOWN = """
|
||||
[Chorus: Brian Burkheiser]
|
||||
I think, I think too much
|
||||
I'm a little bit paranoid, I think I’m breaking
|
||||
Maybe it's in my blood
|
||||
Got a pain that I can't avoid, I think I’m breaking down
|
||||
|
||||
[Verse 1: Brian Burkheiser]
|
||||
Hate every single second, minute, hour every day
|
||||
Person in the mirror, they won't let me feel a thing
|
||||
Keep me focused on my problems, I'm addicted to the pain
|
||||
Everybody's out to get you
|
||||
[Pre-Chorus: Eric Vanlerberghe]
|
||||
I guess I never noticed how it came creeping in
|
||||
My enemy emotion, but I can't sink or swim
|
||||
I say I'm feeling hopeless, they give me medicine
|
||||
They give me medicine, they give me medicine
|
||||
|
||||
[Chorus: Brian Burkheiser & Eric Vanlerberghe]
|
||||
I think I think too much (Too much)
|
||||
I'm a little bit paranoid, I think I'm breaking
|
||||
Maybe it’s in my blood (My blood)
|
||||
Got a pain that I can’t avoid, I think I'm breaking
|
||||
Down, I think I’m breaking
|
||||
Down, I think I'm breaking
|
||||
I think I think too much (Too much)
|
||||
I'm a little bit paranoid, I think I'm breaking down
|
||||
|
||||
[Verse 2: Brian Burkheiser]
|
||||
Lies, every time they ask me, I just tell ’em that I'm fine
|
||||
Try to hide my demons, but they only multiply
|
||||
Keep me running from the voices on repeat inside my mind
|
||||
Everybody fucking hates you
|
||||
|
||||
[Pre-Chorus: Eric Vanlerberghe]
|
||||
I guess I never noticed how it came creeping in
|
||||
My enemy emotion, but I can't sink or swim
|
||||
I say I'm feeling hopeless, but no one's listening
|
||||
But no one's listening, but no one's listening
|
||||
You might also like
|
||||
DOA
|
||||
I Prevail
|
||||
Rise Above It
|
||||
I Prevail
|
||||
Bow Down
|
||||
I Prevail
|
||||
[Chorus: Brian Burkheiser & Eric Vanlerberghe]
|
||||
I think I think too much (Too much)
|
||||
I'm a little bit paranoid, I think I'm breaking
|
||||
Maybe it's in my blood (My blood)
|
||||
Got a pain that I can't avoid, I think I'm breaking
|
||||
Down, I think I'm breaking
|
||||
Down, I think I'm breaking
|
||||
I think I think too much (Too much)
|
||||
I'm a little bit paranoid, I think I'm breaking down
|
||||
|
||||
[Outro: Brian Burkheiser]
|
||||
I don't really like myself
|
||||
I don't really like myself
|
||||
I don't really like myself
|
||||
I don't really like myself
|
||||
I think I'm breaking down
|
||||
"""
|
||||
LYRICS_ECLIPSE = """
|
||||
Your offerings have consecrated
|
||||
They are marked by the brand
|
||||
The sun has seen it's fifth death
|
||||
For the red lake to flow again
|
||||
|
||||
He will
|
||||
Feel their pain in order to
|
||||
Complete the final transformation
|
||||
A name new and old
|
||||
|
||||
Your offerings have been consecrated by the laws of Causality
|
||||
Falcon of Darkness
|
||||
Send us into an age of abyss
|
||||
Blinded by beauty
|
||||
With stacks of bodies as high as the eye can see
|
||||
Feast, apostles, feast
|
||||
|
||||
The one chosen by the hand of God
|
||||
The master of the sinful black sheep
|
||||
And the king of the faithful blind
|
||||
|
||||
Welcome to the new age
|
||||
Welcome to the new age
|
||||
We are the branded ones"""
|
||||
|
||||
|
||||
def add_lyrics(file_name, lyrics=""):
|
||||
tags = ID3(file_name)
|
||||
uslt_output = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics)
|
||||
tags["USLT::'eng'"] = uslt_output
|
||||
|
||||
tags.save(file_name)
|
||||
|
||||
def get_lyrics(file_name):
|
||||
tags = ID3(file_name)
|
||||
return tags.getall("USLT")
|
||||
|
||||
if __name__ == "__main__":
|
||||
add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE)
|
||||
print(get_lyrics(MP3_PATH))
|
@ -3,10 +3,12 @@ import sys
|
||||
import os
|
||||
import logging
|
||||
from typing import List
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
current = os.path.dirname(os.path.realpath(__file__))
|
||||
parent = os.path.dirname(current)
|
||||
sys.path.append(parent)
|
||||
# utils >:3
|
||||
from tools import phonetic_compares
|
||||
from tools.object_handeling import get_elem_from_obj
|
||||
|
||||
@ -49,7 +51,8 @@ class Song:
|
||||
self.lyricist: str
|
||||
|
||||
if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
|
||||
logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
|
||||
logger.warning(
|
||||
f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
|
||||
|
||||
self.valid = self.is_valid()
|
||||
if self.valid:
|
||||
@ -61,33 +64,44 @@ class Song:
|
||||
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
|
||||
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
|
||||
|
||||
return title_match and artist_match
|
||||
return not title_match and not artist_match
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.title} by {self.artist}"
|
||||
return f"{self.title} by {self.artist} ({self.url})"
|
||||
|
||||
def fetch_lyrics(self) -> str:
|
||||
def fetch_lyrics(self) -> str | None:
|
||||
if not self.valid:
|
||||
logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
|
||||
lyrics = ""
|
||||
|
||||
r = session.get(self.url)
|
||||
if r.status_code != 200:
|
||||
logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(r.content, "html.parser")
|
||||
lyrics_soups = soup.find_all('div', {'data-lyrics-container': "true"})
|
||||
if len(lyrics_soups) == 0:
|
||||
logger.warning(f"didn't found lyrics on {self.url}")
|
||||
return None
|
||||
if len(lyrics_soups) != 1:
|
||||
logger.warning(f"number of lyrics_soups doesn't equals 1, but {len(lyrics_soups)} on {self.url}")
|
||||
|
||||
lyrics_soup = lyrics_soups[0]
|
||||
lyrics = lyrics_soup.getText(separator="\n", strip=True)
|
||||
|
||||
# <div data-lyrics-container="true" class="Lyrics__Container-sc-1ynbvzw-6 YYrds">With the soundle
|
||||
self.lyrics = lyrics
|
||||
return lyrics
|
||||
|
||||
|
||||
def build_search_query(artist: str, track: str) -> str:
|
||||
return f"{artist} - {track}"
|
||||
|
||||
|
||||
def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]:
|
||||
all_songs = [Song(song_data, desired_data) for song_data in song_datas]
|
||||
return [song for song in all_songs if not song.valid]
|
||||
return [song_ for song_ in all_songs if not song_.valid]
|
||||
|
||||
|
||||
def search_song_list(artist: str, track: str) -> List[Song]:
|
||||
endpoint = "https://genius.com/api/search/multi?q="
|
||||
url = endpoint + build_search_query(artist, track)
|
||||
url = f"{endpoint}{artist} - {track}"
|
||||
logging.info(f"requesting {url}")
|
||||
|
||||
desired_data = {
|
||||
@ -104,24 +118,56 @@ def search_song_list(artist: str, track: str) -> List[Song]:
|
||||
logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
|
||||
return []
|
||||
|
||||
# print(r.status_code)
|
||||
# print(r.json())
|
||||
|
||||
sections = get_elem_from_obj(content, ['response', 'sections'])
|
||||
for section in sections:
|
||||
section_type = get_elem_from_obj(section, ['type'])
|
||||
print(section_type)
|
||||
if section_type == "song":
|
||||
return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def search(artist: str, track: str):
|
||||
return search_song_list(artist, track)
|
||||
raw_songs = search_song_list(artist, track)
|
||||
all_lyrics = [raw_song.fetch_lyrics() for raw_song in raw_songs]
|
||||
return [i for i in all_lyrics if i is not None]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
|
||||
print(songs)
|
||||
song = Song(
|
||||
{'highlights': [], 'index': 'song', 'type': 'song',
|
||||
'result': {'_type': 'song', 'annotation_count': 0, 'api_path': '/songs/6142483',
|
||||
'artist_names': 'Psychonaut 4',
|
||||
'full_title': 'Sana Sana Sana, Cura Cura Cura by\xa0Psychonaut\xa04',
|
||||
'header_image_thumbnail_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.300x300x1.jpg',
|
||||
'header_image_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.651x651x1.jpg',
|
||||
'id': 6142483, 'instrumental': False, 'language': 'en', 'lyrics_owner_id': 4443216,
|
||||
'lyrics_state': 'complete', 'lyrics_updated_at': 1604698709,
|
||||
'path': '/Psychonaut-4-sana-sana-sana-cura-cura-cura-lyrics', 'pyongs_count': None,
|
||||
'relationships_index_url': 'https://genius.com/Psychonaut-4-sana-sana-sana-cura-cura-cura-sample',
|
||||
'release_date_components': {'year': 2020, 'month': 7, 'day': 1},
|
||||
'release_date_for_display': 'July 1, 2020',
|
||||
'release_date_with_abbreviated_month_for_display': 'Jul. 1, 2020',
|
||||
'song_art_image_thumbnail_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.300x300x1.jpg',
|
||||
'song_art_image_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.651x651x1.jpg',
|
||||
'stats': {'unreviewed_annotations': 0, 'hot': False}, 'title': 'Sana Sana Sana, Cura Cura Cura',
|
||||
'title_with_featured': 'Sana Sana Sana, Cura Cura Cura', 'updated_by_human_at': 1647353214,
|
||||
'url': 'https://genius.com/Psychonaut-4-sana-sana-sana-cura-cura-cura-lyrics',
|
||||
'featured_artists': [], 'primary_artist': {'_type': 'artist', 'api_path': '/artists/1108956',
|
||||
'header_image_url': 'https://images.genius.com/ff13efc74a043237cfca3fc0a6cb12dd.1000x563x1.jpg',
|
||||
'id': 1108956,
|
||||
'image_url': 'https://images.genius.com/25ff7cfdcb6d92a9f19ebe394a895736.640x640x1.jpg',
|
||||
'index_character': 'p', 'is_meme_verified': False,
|
||||
'is_verified': False, 'name': 'Psychonaut 4',
|
||||
'slug': 'Psychonaut-4',
|
||||
'url': 'https://genius.com/artists/Psychonaut-4'}}},
|
||||
{'artist': 'Psychonaut 4', 'track': 'Sana Sana Sana, Cura Cura Cura'}
|
||||
)
|
||||
print(song.fetch_lyrics())
|
||||
|
||||
exit()
|
||||
songs = search("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
|
||||
for song in songs:
|
||||
print(song)
|
||||
|
85
src/lyrics_.py
Normal file
85
src/lyrics_.py
Normal file
@ -0,0 +1,85 @@
|
||||
from mutagen.id3 import ID3, USLT
|
||||
from metadata import database as db
|
||||
import logging
|
||||
import os
|
||||
|
||||
from lyrics import genius
|
||||
|
||||
"""
|
||||
This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
|
||||
Just wanted to get it to work.
|
||||
- lyrics need to be put in the database and everything should continue from there then
|
||||
"""
|
||||
|
||||
"""
|
||||
https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
|
||||
Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
|
||||
Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
|
||||
|
||||
One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
|
||||
'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
|
||||
Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
|
||||
would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
|
||||
|
||||
I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
|
||||
"""
|
||||
|
||||
|
||||
# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
|
||||
# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
|
||||
|
||||
|
||||
def add_lyrics(file_name, lyrics=""):
|
||||
print(lyrics)
|
||||
tags = ID3(file_name)
|
||||
uslt_output = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics)
|
||||
tags["USLT::'eng'"] = uslt_output
|
||||
|
||||
tags.save(file_name)
|
||||
|
||||
|
||||
def get_lyrics(file_name):
|
||||
tags = ID3(file_name)
|
||||
return tags.getall("USLT")
|
||||
|
||||
|
||||
def fetch_single_lyrics(row: dict):
|
||||
file_ = os.path.join(os.path.expanduser('~/Music'), row['file'])
|
||||
artist = row['artists'][0]['name']
|
||||
track = row['title']
|
||||
|
||||
if not os.path.exists(file_):
|
||||
return
|
||||
print(file_, artist, track)
|
||||
lyrics = genius.search(artist, track)
|
||||
if len(lyrics) == 0:
|
||||
return
|
||||
add_lyrics(file_, lyrics[0])
|
||||
|
||||
|
||||
def fetch_lyrics(database: db.Database):
|
||||
for row in database.get_custom_track([]):
|
||||
print(row)
|
||||
fetch_single_lyrics(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
temp_folder = "music-downloader"
|
||||
temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
|
||||
if not os.path.exists(temp_dir):
|
||||
os.mkdir(temp_dir)
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
db_logger = logging.getLogger("database")
|
||||
db_logger.setLevel(logging.DEBUG)
|
||||
|
||||
database = db.Database(os.path.join(temp_dir, "metadata.db"),
|
||||
os.path.join(temp_dir, "database_structure.sql"),
|
||||
"https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
|
||||
db_logger,
|
||||
reset_anyways=False)
|
||||
|
||||
fetch_lyrics(database)
|
11
src/main.py
11
src/main.py
@ -6,11 +6,13 @@ import download_links
|
||||
import url_to_path
|
||||
import download
|
||||
|
||||
# NEEDS REFACTORING
|
||||
from lyrics_ import fetch_lyrics
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
|
||||
TEMP_FOLDER = "music-downloader"
|
||||
LOG_FILE = "download_logs.log"
|
||||
DATABASE_FILE = "metadata.db"
|
||||
@ -42,8 +44,6 @@ database = Database(os.path.join(temp_dir, DATABASE_FILE),
|
||||
reset_anyways=True)
|
||||
|
||||
|
||||
|
||||
|
||||
def get_existing_genre():
|
||||
valid_directories = []
|
||||
for elem in os.listdir(MUSIC_DIR):
|
||||
@ -77,6 +77,7 @@ def search_for_metadata():
|
||||
|
||||
return search.current_option
|
||||
|
||||
|
||||
def get_genre():
|
||||
existing_genres = get_existing_genre()
|
||||
print("printing available genres:")
|
||||
@ -125,6 +126,10 @@ def cli(start_at: int = 0):
|
||||
logging.info("starting to download the mp3's")
|
||||
download.Download(database, DOWNLOAD_LOGGER, proxies=proxies, base_path=MUSIC_DIR)
|
||||
|
||||
if start_at <= 4:
|
||||
logging.info("starting to fetch the lyrics")
|
||||
fetch_lyrics(database)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli(start_at=0)
|
||||
|
@ -205,7 +205,9 @@ GROUP BY track.id;
|
||||
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
|
||||
|
||||
def update_download_status(self, track_id: str):
|
||||
pass
|
||||
query = f"UPDATE track SET downloaded = 1, WHERE '{track_id}' == id;"
|
||||
self.cursor.execute(query)
|
||||
self.connection.commit()
|
||||
|
||||
def set_download_data(self, track_id: str, url: str, src: str):
|
||||
query = f"""
|
||||
|
@ -14,6 +14,7 @@ TIMEOUT = 10
|
||||
|
||||
session = requests.Session()
|
||||
session.headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://musify.club/"
|
||||
}
|
||||
@ -137,20 +138,4 @@ def get_musify_url_slow(row):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pandas as pd
|
||||
import json
|
||||
|
||||
TOR = True
|
||||
if TOR:
|
||||
set_proxy({
|
||||
'http': 'socks5h://127.0.0.1:9150',
|
||||
'https': 'socks5h://127.0.0.1:9150'
|
||||
})
|
||||
|
||||
df = pd.read_csv("../temp/.cache1.csv")
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
row['artist'] = json.loads(row['artist'].replace("'", '"'))
|
||||
print("-" * 200)
|
||||
print("slow")
|
||||
print(get_musify_url_slow(row))
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user