diff --git a/requirements.txt b/requirements.txt
index 0516df7..2017b36 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
requests~=2.28.1
-pandas~=1.5.0
mutagen~=1.46.0
musicbrainzngs~=0.7.1
jellyfish~=0.9.0
diff --git a/src/lyrics.py b/src/lyrics.py
deleted file mode 100644
index 2beb1d5..0000000
--- a/src/lyrics.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from mutagen.id3 import ID3, USLT
-
-"""
-https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
-Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
-Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
-
-One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
-'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
-Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
-would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
-"""
-
-# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
-# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
-
-MP3_PATH = "/home/lars/Music/deathcore/Brand of Sacrifice/The Interstice/Eclipse.mp3"
-LYRICS_BREAKING_DOWN = """
-[Chorus: Brian Burkheiser]
-I think, I think too much
-I'm a little bit paranoid, I think I’m breaking
-Maybe it's in my blood
-Got a pain that I can't avoid, I think I’m breaking down
-
-[Verse 1: Brian Burkheiser]
-Hate every single second, minute, hour every day
-Person in the mirror, they won't let me feel a thing
-Keep me focused on my problems, I'm addicted to the pain
-Everybody's out to get you
-[Pre-Chorus: Eric Vanlerberghe]
-I guess I never noticed how it came creeping in
-My enemy emotion, but I can't sink or swim
-I say I'm feeling hopeless, they give me medicine
-They give me medicine, they give me medicine
-
-[Chorus: Brian Burkheiser & Eric Vanlerberghe]
-I think I think too much (Too much)
-I'm a little bit paranoid, I think I'm breaking
-Maybe it’s in my blood (My blood)
-Got a pain that I can’t avoid, I think I'm breaking
-Down, I think I’m breaking
-Down, I think I'm breaking
-I think I think too much (Too much)
-I'm a little bit paranoid, I think I'm breaking down
-
-[Verse 2: Brian Burkheiser]
-Lies, every time they ask me, I just tell ’em that I'm fine
-Try to hide my demons, but they only multiply
-Keep me running from the voices on repeat inside my mind
-Everybody fucking hates you
-
-[Pre-Chorus: Eric Vanlerberghe]
-I guess I never noticed how it came creeping in
-My enemy emotion, but I can't sink or swim
-I say I'm feeling hopeless, but no one's listening
-But no one's listening, but no one's listening
-You might also like
-DOA
-I Prevail
-Rise Above It
-I Prevail
-Bow Down
-I Prevail
-[Chorus: Brian Burkheiser & Eric Vanlerberghe]
-I think I think too much (Too much)
-I'm a little bit paranoid, I think I'm breaking
-Maybe it's in my blood (My blood)
-Got a pain that I can't avoid, I think I'm breaking
-Down, I think I'm breaking
-Down, I think I'm breaking
-I think I think too much (Too much)
-I'm a little bit paranoid, I think I'm breaking down
-
-[Outro: Brian Burkheiser]
-I don't really like myself
-I don't really like myself
-I don't really like myself
-I don't really like myself
-I think I'm breaking down
-"""
-LYRICS_ECLIPSE = """
-Your offerings have consecrated
-They are marked by the brand
-The sun has seen it's fifth death
-For the red lake to flow again
-
-He will
-Feel their pain in order to
-Complete the final transformation
-A name new and old
-
-Your offerings have been consecrated by the laws of Causality
-Falcon of Darkness
-Send us into an age of abyss
-Blinded by beauty
-With stacks of bodies as high as the eye can see
-Feast, apostles, feast
-
-The one chosen by the hand of God
-The master of the sinful black sheep
-And the king of the faithful blind
-
-Welcome to the new age
-Welcome to the new age
-We are the branded ones"""
-
-
-def add_lyrics(file_name, lyrics=""):
- tags = ID3(file_name)
- uslt_output = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics)
- tags["USLT::'eng'"] = uslt_output
-
- tags.save(file_name)
-
-def get_lyrics(file_name):
- tags = ID3(file_name)
- return tags.getall("USLT")
-
-if __name__ == "__main__":
- add_lyrics(MP3_PATH, lyrics=LYRICS_ECLIPSE)
- print(get_lyrics(MP3_PATH))
diff --git a/src/lyrics/genius.py b/src/lyrics/genius.py
index e6e4a75..46900d1 100644
--- a/src/lyrics/genius.py
+++ b/src/lyrics/genius.py
@@ -3,10 +3,12 @@ import sys
import os
import logging
from typing import List
+from bs4 import BeautifulSoup
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
+# utils >:3
from tools import phonetic_compares
from tools.object_handeling import get_elem_from_obj
@@ -49,7 +51,8 @@ class Song:
self.lyricist: str
if get_elem_from_obj(song_data, ['lyrics_state']) != "complete":
- logger.warning(f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
+ logger.warning(
+ f"lyrics state of {self.title} by {self.artist} is not complete but {get_elem_from_obj(song_data, ['lyrics_state'])}")
self.valid = self.is_valid()
if self.valid:
@@ -61,33 +64,44 @@ class Song:
title_match, title_distance = phonetic_compares.match_titles(self.title, self.desired_data['track'])
artist_match, artist_distance = phonetic_compares.match_artists(self.artist, self.desired_data['artist'])
- return title_match and artist_match
+ return not title_match and not artist_match
def __repr__(self) -> str:
- return f"{self.title} by {self.artist}"
+ return f"{self.title} by {self.artist} ({self.url})"
- def fetch_lyrics(self) -> str:
+ def fetch_lyrics(self) -> str | None:
if not self.valid:
logger.warning(f"{self.__repr__()} is invalid but the lyrics still get fetched. Something could be wrong.")
- lyrics = ""
+ r = session.get(self.url)
+ if r.status_code != 200:
+ logging.warning(f"{r.url} returned {r.status_code}:\n{r.content}")
+ return None
+ soup = BeautifulSoup(r.content, "html.parser")
+ lyrics_soups = soup.find_all('div', {'data-lyrics-container': "true"})
+ if len(lyrics_soups) == 0:
+ logger.warning(f"didn't found lyrics on {self.url}")
+ return None
+ if len(lyrics_soups) != 1:
+ logger.warning(f"number of lyrics_soups doesn't equals 1, but {len(lyrics_soups)} on {self.url}")
+
+ lyrics_soup = lyrics_soups[0]
+ lyrics = lyrics_soup.getText(separator="\n", strip=True)
+
+ #
With the soundle
self.lyrics = lyrics
return lyrics
-def build_search_query(artist: str, track: str) -> str:
- return f"{artist} - {track}"
-
-
def process_multiple_songs(song_datas: list, desired_data: dict) -> List[Song]:
all_songs = [Song(song_data, desired_data) for song_data in song_datas]
- return [song for song in all_songs if not song.valid]
+ return [song_ for song_ in all_songs if not song_.valid]
def search_song_list(artist: str, track: str) -> List[Song]:
endpoint = "https://genius.com/api/search/multi?q="
- url = endpoint + build_search_query(artist, track)
+ url = f"{endpoint}{artist} - {track}"
logging.info(f"requesting {url}")
desired_data = {
@@ -104,24 +118,56 @@ def search_song_list(artist: str, track: str) -> List[Song]:
logging.warning(f"{r.url} returned {get_elem_from_obj(content, ['meta', 'status'])}:\n{content}")
return []
- # print(r.status_code)
- # print(r.json())
-
sections = get_elem_from_obj(content, ['response', 'sections'])
for section in sections:
section_type = get_elem_from_obj(section, ['type'])
- print(section_type)
if section_type == "song":
return process_multiple_songs(get_elem_from_obj(section, ['hits'], return_if_none=[]), desired_data)
return []
+
def search(artist: str, track: str):
- return search_song_list(artist, track)
+ raw_songs = search_song_list(artist, track)
+ all_lyrics = [raw_song.fetch_lyrics() for raw_song in raw_songs]
+ return [i for i in all_lyrics if i is not None]
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
- songs = search_song_list("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
- print(songs)
+ song = Song(
+ {'highlights': [], 'index': 'song', 'type': 'song',
+ 'result': {'_type': 'song', 'annotation_count': 0, 'api_path': '/songs/6142483',
+ 'artist_names': 'Psychonaut 4',
+ 'full_title': 'Sana Sana Sana, Cura Cura Cura by\xa0Psychonaut\xa04',
+ 'header_image_thumbnail_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.300x300x1.jpg',
+ 'header_image_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.651x651x1.jpg',
+ 'id': 6142483, 'instrumental': False, 'language': 'en', 'lyrics_owner_id': 4443216,
+ 'lyrics_state': 'complete', 'lyrics_updated_at': 1604698709,
+ 'path': '/Psychonaut-4-sana-sana-sana-cura-cura-cura-lyrics', 'pyongs_count': None,
+ 'relationships_index_url': 'https://genius.com/Psychonaut-4-sana-sana-sana-cura-cura-cura-sample',
+ 'release_date_components': {'year': 2020, 'month': 7, 'day': 1},
+ 'release_date_for_display': 'July 1, 2020',
+ 'release_date_with_abbreviated_month_for_display': 'Jul. 1, 2020',
+ 'song_art_image_thumbnail_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.300x300x1.jpg',
+ 'song_art_image_url': 'https://images.genius.com/f9f67a3f9c801f697fbaf68c7efd3599.651x651x1.jpg',
+ 'stats': {'unreviewed_annotations': 0, 'hot': False}, 'title': 'Sana Sana Sana, Cura Cura Cura',
+ 'title_with_featured': 'Sana Sana Sana, Cura Cura Cura', 'updated_by_human_at': 1647353214,
+ 'url': 'https://genius.com/Psychonaut-4-sana-sana-sana-cura-cura-cura-lyrics',
+ 'featured_artists': [], 'primary_artist': {'_type': 'artist', 'api_path': '/artists/1108956',
+ 'header_image_url': 'https://images.genius.com/ff13efc74a043237cfca3fc0a6cb12dd.1000x563x1.jpg',
+ 'id': 1108956,
+ 'image_url': 'https://images.genius.com/25ff7cfdcb6d92a9f19ebe394a895736.640x640x1.jpg',
+ 'index_character': 'p', 'is_meme_verified': False,
+ 'is_verified': False, 'name': 'Psychonaut 4',
+ 'slug': 'Psychonaut-4',
+ 'url': 'https://genius.com/artists/Psychonaut-4'}}},
+ {'artist': 'Psychonaut 4', 'track': 'Sana Sana Sana, Cura Cura Cura'}
+ )
+ print(song.fetch_lyrics())
+
+ exit()
+ songs = search("Psychonaut 4", "Sana Sana Sana, Cura Cura Cura")
+ for song in songs:
+ print(song)
diff --git a/src/lyrics_.py b/src/lyrics_.py
new file mode 100644
index 0000000..2a939ef
--- /dev/null
+++ b/src/lyrics_.py
@@ -0,0 +1,85 @@
+from mutagen.id3 import ID3, USLT
+from metadata import database as db
+import logging
+import os
+
+from lyrics import genius
+
+"""
+This whole Part is bodgy as hell and I need to rewrite this little file urgently. genius.py is really clean though :3
+Just wanted to get it to work.
+ - lyrics need to be put in the database and everything should continue from there then
+"""
+
+"""
+https://cweiske.de/tagebuch/rhythmbox-lyrics.htm
+Rythmbox, my music player doesn't support ID3 lyrics (USLT) yet, so I have to find something else
+Lyrics in MP3 ID3 tags (SYLT/USLT) is still missing, because GStreamer does not support that yet.
+
+One possible sollution would be to use ogg/vorbis files. Those lyrics are supported in rythmbox
+'So, the next Rhythmbox release (3.5.0 or 3.4.2) will read lyrics directly from ogg/vorbis files, using the LYRICS and SYNCLYRICS tags.'
+Another possible sollution (probaply the better one cuz I dont need to refactor whole metadata AGAIN)
+would be to write a Rhythmbox plugin that fetches lyrics from ID3 USLT
+
+I have written that Rhythmbox plugin: https://github.com/HeIIow2/rythmbox-id3-lyrics-support
+"""
+
+
+# https://www.programcreek.com/python/example/63462/mutagen.mp3.EasyMP3
+# https://code.activestate.com/recipes/577138-embed-lyrics-into-mp3-files-using-mutagen-uslt-tag/
+
+
+def add_lyrics(file_name, lyrics=""):
+ print(lyrics)
+ tags = ID3(file_name)
+ uslt_output = USLT(encoding=3, lang=u'eng', desc=u'desc', text=lyrics)
+ tags["USLT::'eng'"] = uslt_output
+
+ tags.save(file_name)
+
+
+def get_lyrics(file_name):
+ tags = ID3(file_name)
+ return tags.getall("USLT")
+
+
+def fetch_single_lyrics(row: dict):
+ file_ = os.path.join(os.path.expanduser('~/Music'), row['file'])
+ artist = row['artists'][0]['name']
+ track = row['title']
+
+ if not os.path.exists(file_):
+ return
+ print(file_, artist, track)
+ lyrics = genius.search(artist, track)
+ if len(lyrics) == 0:
+ return
+ add_lyrics(file_, lyrics[0])
+
+
+def fetch_lyrics(database: db.Database):
+ for row in database.get_custom_track([]):
+ print(row)
+ fetch_single_lyrics(row)
+
+
+if __name__ == "__main__":
+ import tempfile
+ import os
+
+ temp_folder = "music-downloader"
+ temp_dir = os.path.join(tempfile.gettempdir(), temp_folder)
+ if not os.path.exists(temp_dir):
+ os.mkdir(temp_dir)
+
+ logging.basicConfig(level=logging.DEBUG)
+ db_logger = logging.getLogger("database")
+ db_logger.setLevel(logging.DEBUG)
+
+ database = db.Database(os.path.join(temp_dir, "metadata.db"),
+ os.path.join(temp_dir, "database_structure.sql"),
+ "https://raw.githubusercontent.com/HeIIow2/music-downloader/new_metadata/assets/database_structure.sql",
+ db_logger,
+ reset_anyways=False)
+
+ fetch_lyrics(database)
diff --git a/src/main.py b/src/main.py
index df97213..a56e0c4 100644
--- a/src/main.py
+++ b/src/main.py
@@ -6,11 +6,13 @@ import download_links
import url_to_path
import download
+# NEEDS REFACTORING
+from lyrics_ import fetch_lyrics
+
import logging
import os
import tempfile
-
TEMP_FOLDER = "music-downloader"
LOG_FILE = "download_logs.log"
DATABASE_FILE = "metadata.db"
@@ -37,13 +39,11 @@ logging.basicConfig(level=logging.INFO, filename=os.path.join(temp_dir, LOG_FILE
database = Database(os.path.join(temp_dir, DATABASE_FILE),
os.path.join(temp_dir, DATABASE_STRUCTURE_FILE),
- DATABASE_STRUCTURE_FALLBACK,
+ DATABASE_STRUCTURE_FALLBACK,
DATABASE_LOGGER,
reset_anyways=True)
-
-
def get_existing_genre():
valid_directories = []
for elem in os.listdir(MUSIC_DIR):
@@ -77,6 +77,7 @@ def search_for_metadata():
return search.current_option
+
def get_genre():
existing_genres = get_existing_genre()
print("printing available genres:")
@@ -125,6 +126,10 @@ def cli(start_at: int = 0):
logging.info("starting to download the mp3's")
download.Download(database, DOWNLOAD_LOGGER, proxies=proxies, base_path=MUSIC_DIR)
+ if start_at <= 4:
+ logging.info("starting to fetch the lyrics")
+ fetch_lyrics(database)
+
if __name__ == "__main__":
cli(start_at=0)
diff --git a/src/metadata/database.py b/src/metadata/database.py
index f10ef22..da78c7a 100644
--- a/src/metadata/database.py
+++ b/src/metadata/database.py
@@ -205,7 +205,9 @@ GROUP BY track.id;
return self.get_custom_track(["(track.file IS NULL OR track.path IS NULL OR track.genre IS NULL)"])
def update_download_status(self, track_id: str):
- pass
+ query = f"UPDATE track SET downloaded = 1, WHERE '{track_id}' == id;"
+ self.cursor.execute(query)
+ self.connection.commit()
def set_download_data(self, track_id: str, url: str, src: str):
query = f"""
diff --git a/src/scraping/musify.py b/src/scraping/musify.py
index b4719bc..4afd889 100644
--- a/src/scraping/musify.py
+++ b/src/scraping/musify.py
@@ -14,6 +14,7 @@ TIMEOUT = 10
session = requests.Session()
session.headers = {
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0",
"Connection": "keep-alive",
"Referer": "https://musify.club/"
}
@@ -137,20 +138,4 @@ def get_musify_url_slow(row):
if __name__ == "__main__":
- import pandas as pd
- import json
-
- TOR = True
- if TOR:
- set_proxy({
- 'http': 'socks5h://127.0.0.1:9150',
- 'https': 'socks5h://127.0.0.1:9150'
- })
-
- df = pd.read_csv("../temp/.cache1.csv")
-
- for idx, row in df.iterrows():
- row['artist'] = json.loads(row['artist'].replace("'", '"'))
- print("-" * 200)
- print("slow")
- print(get_musify_url_slow(row))
+ pass