diff --git a/src/music_kraken/database/objects/formatted_text.py b/src/music_kraken/database/objects/formatted_text.py index dee1a67..c98ca3f 100644 --- a/src/music_kraken/database/objects/formatted_text.py +++ b/src/music_kraken/database/objects/formatted_text.py @@ -8,15 +8,16 @@ https://pandoc.org/installing.html !!!!!!!!!!!!!!!!!!IMPORTANT!!!!!!!!!!!!!!!!!! """ + class FormattedText: doc = None def __init__( - self, - plaintext: str = None, - markdown: str = None, - html: str = None - ) -> None: + self, + plaintext: str = None, + markdown: str = None, + html: str = None + ) -> None: self.set_plaintext(plaintext) self.set_markdown(markdown) self.set_html(html) @@ -46,7 +47,7 @@ class FormattedText: return None return pandoc.write(self.doc, format="html").strip() - def get_plaintext(self)-> str: + def get_plaintext(self) -> str: if self.doc is None: return None return pandoc.write(self.doc, format="plain").strip() @@ -60,6 +61,7 @@ class NotesAttributes: def __init__(self) -> None: pass + if __name__ == "__main__": _plaintext = """ World of Work @@ -118,4 +120,4 @@ According to the band, "Ghost Bath" refers to "the act of committing suicide by # print("-"*30) # print(notes.get_markdown()) - print(notes.get_markdown()) \ No newline at end of file + print(notes.get_markdown()) diff --git a/src/music_kraken/pages/encyclopaedia_metallum.py b/src/music_kraken/pages/encyclopaedia_metallum.py index 5308f34..721aca1 100644 --- a/src/music_kraken/pages/encyclopaedia_metallum.py +++ b/src/music_kraken/pages/encyclopaedia_metallum.py @@ -448,8 +448,37 @@ class EncyclopaediaMetallum(Page): soup = BeautifulSoup(r.text, 'html.parser') - tracklist_soup = soup.find("table", {"class": "table_lyrics"}) - print(tracklist_soup.prettify) + tracklist_soup = soup.find("table", {"class": "table_lyrics"}).find("tbody") + + for row in tracklist_soup.find_all("tr", {"class": ["even", "odd"]}): + """ + example of row: + + + 1. # id and tracksort + Convince Me to Bleed # name + 03:40 # length +   + Show lyrics + + + """ + row_list = row.find_all(recursive=False) + + track_sort_soup = row_list[0] + track_sort = int(track_sort_soup.text[:-1]) + track_id = track_sort_soup.find("a").get("name") + + title = row_list[1].text.strip() + + duration_stamp = row_list[2].text + minutes, seconds = duration_stamp.split(":") + duration_in_seconds = int(minutes) * 60 + int(seconds) + print(track_sort, track_id) + print(title) + print(duration_in_seconds) + print("-"*20) + # print(row) return album