Files
music-kraken/internal/plugin/musify.go
2025-10-09 13:40:40 +02:00

613 lines
16 KiB
Go

package plugin
import (
"errors"
"regexp"
"strings"
"gitea.elara.ws/Hazel/music-kraken/internal/common"
"gitea.elara.ws/Hazel/music-kraken/internal/data"
"gitea.elara.ws/Hazel/music-kraken/internal/scraper"
"github.com/PuerkitoBio/goquery"
)
func extractName(s string) string {
parts := strings.Split(s, "/")
lastPart := parts[len(parts)-1]
hyphenParts := strings.Split(lastPart, "-")
result := strings.Join(hyphenParts[:len(hyphenParts)-1], " ")
return result
}
const musifyHost = "https://musify.club"
type Musify struct {
session *scraper.Session
sourceType data.SourceType
}
func (m Musify) Name() string {
return "musify"
}
func (m Musify) Regex() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/(artist|release|track)/[a-z\-0-9]+`)
}
func (m Musify) RegexArtist() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/artist/[a-z\-0-9]+`)
}
func (m Musify) RegexAlbum() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/release/[a-z\-0-9]+`)
}
func (m *Musify) Init(sourceType data.SourceType) {
m.session = scraper.NewSession()
m.sourceType = sourceType
}
func (m Musify) RegexSong() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
}
func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist := data.Artist{}
var err error
anchor := contact.Find("a")
if anchor.Length() > 0 {
url, urlExists := anchor.Attr("href")
if urlExists {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + url,
ObjectType: data.ArtistSource,
})
}
if name, nameExists := anchor.Attr("title"); nameExists {
artist.Name = name
}
} else {
err = errors.New("no anchor found")
}
/*
# artist image
image_soup = contact.find("img")
if image_soup is not None:
alt = image_soup.get("alt")
if alt is not None:
name = alt
artist_thumbnail = image_soup.get("src")
*/
return artist, err
}
func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
album := data.Album{}
var err error
/*
<div class="contacts__item">
<a href="/release/ghost-bath-ghost-bath-2013-602489" title="Ghost Bath - 2013">
<div class="contacts__img release">
<img alt="Ghost Bath" class="lozad" data-src="https://37s.musify.club/img/69/9060265/24178833.jpg"/>
<noscript><img alt="Ghost Bath" src="https://37s.musify.club/img/69/9060265/24178833.jpg"/></noscript>
</div>
<div class="contacts__info">
<strong>Ghost Bath - 2013</strong>
<small>Ghost Bath</small>
<small>Треков: 4</small> <!--tracks-->
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
</div>
</a>
</div>
*/
anchor := contact.Find("a")
if anchor.Length() > 0 {
if url, urlExists := anchor.Attr("href"); urlExists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + url,
ObjectType: data.AlbumSource,
})
}
if titleDate, titleExists := anchor.Attr("title"); titleExists {
delimiter := " - "
splitAttr := strings.Split(strings.TrimSpace(titleDate), delimiter)
album.Name = strings.Join(splitAttr[:len(splitAttr)-1], delimiter)
}
} else {
err = errors.New("no anchor found")
}
contactInfo := contact.Find("div.contacts__info")
if contactInfo.Length() > 0 {
/*
<strong>Ghost Bath - 2013</strong>
<small>Ghost Bath</small>
<small>Треков: 4</small> <!--tracks-->
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
*/
// titleSoup := contactInfo.Find("strong")
smallList := contactInfo.Find("small")
if smallList.Length() == 3 {
// artist
rawArtistStr := smallList.First().Text()
for _, artistStr := range strings.Split(rawArtistStr, "&") {
artistStr = strings.TrimRight(artistStr, "& ...\r\n")
artistStr = strings.TrimSpace(artistStr)
if strings.HasSuffix(artistStr, "]") && strings.Contains(artistStr, "[") {
parts := strings.Split(artistStr, "[")
if len(parts) > 1 {
artistStr = strings.TrimSpace(parts[0])
}
}
album.Artists = append(album.Artists, data.Artist{
Name: artistStr,
})
}
// trackCountSoup := smallList[1]
// ratingSoup := smallList[2]
}
}
return album, err
}
func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{}
contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) {
anchor := contact.Find("a")
if anchor.Length() > 0 {
url, exists := anchor.Attr("href")
if exists {
if strings.Contains(url, "artist") {
if artist, err := parseArtistContact(contact); err == nil {
res = append(res, artist)
}
} else if strings.Contains(url, "release") {
if album, err := parseAlbumContact(contact); err == nil {
res = append(res, album)
}
}
}
}
})
return res
}
func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song := data.Song{}
var err error
song.Name, _ = playlistItem.Attr("data-name")
playlistDetails := playlistItem.Find("div.playlist__heading")
if playlistDetails.Length() > 0 {
anchorList := playlistDetails.Find("a")
if anchorList.Length() >= 2 {
// artists
anchorList.Each(func(i int, artistAnchor *goquery.Selection) {
if i < anchorList.Length()-1 { // all except the last one
if url, exists := artistAnchor.Attr("href"); exists {
song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{
{Url: musifyHost + url, ObjectType: data.ArtistSource},
},
})
}
}
})
// track
trackAnchor := anchorList.Last()
if href, exists := trackAnchor.Attr("href"); exists {
song.Sources = append(song.Sources, data.Source{
Url: musifyHost + href,
ObjectType: data.SongSource,
})
}
} else {
err = errors.New("there are not enough anchors (2) for artist and track")
}
}
return song, err
}
func parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{}
playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) {
if song, err := parsePlaylistItem(playlistItem); err == nil {
res = append(res, song)
}
})
return res
}
func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) {
musicObjects := []data.MusicObject{}
resp, err := m.session.PostMultipartForm("https://musify.club/search", map[string]string{
"SearchText": query.Search, // alternatively I could also add year and genre
})
if err != nil {
return musicObjects, err
}
doc, err := scraper.GetHtml(resp)
if err != nil {
return musicObjects, err
}
doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) {
musicObjects = append(musicObjects, parseContactContainer(contactContainer)...)
})
doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) {
musicObjects = append(musicObjects, parsePlaylist(playlist)...)
})
return musicObjects, nil
}
func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
song := data.Song{
Sources: []data.Source{
source,
},
}
resp, err := m.session.Get(source.Url)
if err != nil {
return song, err
}
doc, err := scraper.GetHtml(resp)
if err != nil {
return song, err
}
// Download URL
/*
doc.Find("a[itemprop='audio']").Each(func(i int, anchor *goquery.Selection) {
href, exists := anchor.Attr("href")
if exists {
source.AudioURL = p.host + href
}
})
*/
// Song detail
var listElement *goquery.Selection
doc.Find("ul.album-info").Each(func(i int, albumInfo *goquery.Selection) {
listElement = albumInfo.Find("li").First()
})
if listElement != nil {
listElement.Find("a").Each(func(i int, artistAnchor *goquery.Selection) {
if href, exists := artistAnchor.Attr("href"); exists {
song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{
{Url: musifyHost + href},
},
})
}
})
}
// Breadcrumbs
if breadcrumbList := doc.Find("ol.breadcrumb"); breadcrumbList.Length() > 0 {
listPoints := breadcrumbList.Find("li.breadcrumb-item")
if listPoints.Length() != 5 {
return song, errors.New("too many breadcrumbs on page")
}
if artistAnchor := listPoints.Eq(2).Find("a"); artistAnchor != nil && artistAnchor.Length() > 0 {
artist := data.Artist{}
useArtist := true
if href, exists := artistAnchor.Attr("href"); exists {
hrefParts := strings.Split(href, "/")
if len(hrefParts) <= 1 || hrefParts[len(hrefParts)-2] != "artist" {
useArtist = false
}
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href})
} else {
useArtist = false
}
if nameElem := artistAnchor.Find("span[itemprop='name']"); nameElem.Length() > 0 {
artist.Name = strings.TrimSpace(nameElem.Text())
} else {
useArtist = false
}
if useArtist {
song.Artists = append(song.Artists, artist)
}
}
if albumAnchor := listPoints.Eq(3).Find("a"); albumAnchor != nil && albumAnchor.Length() > 0 {
if href, exists := albumAnchor.Attr("href"); exists {
song.Album.Sources = append(song.Album.Sources, data.Source{
Url: musifyHost + href,
})
}
if nameElem := albumAnchor.Find("span[itemprop='name']"); nameElem.Length() > 0 {
song.Album.Name = strings.TrimSpace(nameElem.Text())
}
}
song.Name = strings.TrimSpace(listPoints.Eq(4).Text())
}
return song, nil
}
func parseSongCard(songCard *goquery.Selection) data.Song {
song := data.Song{
Artists: []data.Artist{},
Sources: []data.Source{},
}
// Get song name from data attribute
songName, _ := songCard.Attr("data-name")
song.Name = songName
/*
// Get tracksort
tracksortSelection := songCard.Find("div.playlist__position")
if tracksortSelection.Length() > 0 {
rawTracksort := strings.TrimSpace(tracksortSelection.Text())
if parsedTracksort, err := strconv.Atoi(rawTracksort); err == nil {
tracksort = parsedTracksort
}
}
*/
// Playlist details
playlistDetails := songCard.Find("div.playlist__details")
if playlistDetails.Length() > 0 {
// Track anchor
anchorList := playlistDetails.Find("a")
if anchorList.Length() > 1 {
trackAnchor := anchorList.Last()
if href, exists := trackAnchor.Attr("href"); exists {
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href})
}
song.Name = strings.TrimSpace(trackAnchor.Text())
}
// Artist spans
playlistDetails.Find("span[itemprop='byArtist']").Each(func(i int, artistSpan *goquery.Selection) {
artist := data.Artist{
Sources: []data.Source{},
}
// Artist URL
metaArtistSrc := artistSpan.Find("meta[itemprop='url']")
if metaArtistSrc.Length() > 0 {
if content, exists := metaArtistSrc.Attr("content"); exists && content != "" {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content,
})
}
}
// Artist name
metaArtistName := artistSpan.Find("meta[itemprop='name']")
if metaArtistName.Length() > 0 {
if content, exists := metaArtistName.Attr("content"); exists && content != "" {
artist.Name = content
}
}
if artist.Name != "" || len(artist.Sources) > 0 {
song.Artists = append(song.Artists, artist)
}
})
}
/*
// Playlist actions - download link
playlistActions := songCard.Find("div.playlist__actions")
if playlistActions.Length() > 0 {
downloadAnchor := playlistActions.Find("a[itemprop='audio']")
if downloadAnchor.Length() > 0 {
if href, exists := downloadAnchor.Attr("href"); exists && currentURL != "" {
// Add source with audio URL
song.Sources = append(song.Sources, data.Source{
Url: currentURL,
})
}
}
}
*/
return song
}
func parseAlbum(doc *goquery.Document) data.Album {
album := data.Album{
Artists: []data.Artist{},
Sources: []data.Source{},
Songs: []data.Song{},
}
// Breadcrumb
breadcrumb := doc.Find("ol.breadcrumb")
breadcrumbElements := breadcrumb.Find("li.breadcrumb-item")
if breadcrumbElements.Length() == 4 {
// Album name from last breadcrumb
albumCrumb := breadcrumbElements.Eq(3)
album.Name = strings.TrimSpace(albumCrumb.Text())
// Artist from second last breadcrumb
artistCrumb := breadcrumbElements.Eq(2)
artistAnchor := artistCrumb.Find("a")
if artistAnchor.Length() > 0 {
if href, exists := artistAnchor.Attr("href"); exists {
hrefParts := strings.Split(href, "/")
if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" {
artist := data.Artist{
Sources: []data.Source{},
}
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + strings.TrimSpace(href),
})
// Artist name from span
if span := artistAnchor.Find("span"); span.Length() > 0 {
artist.Name = strings.TrimSpace(span.Text())
} else {
artist.Name = strings.TrimSpace(artistAnchor.Text())
}
album.Artists = append(album.Artists, artist)
}
}
}
} else {
// m.logger.Debug("there are not 4 breadcrumb items, which shouldn't be the case")
}
// Meta tags
metaURL := doc.Find("meta[itemprop='url']")
if metaURL.Length() > 0 {
if content, exists := metaURL.Attr("content"); exists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + content,
})
}
}
metaName := doc.Find("meta[itemprop='name']")
if metaName.Length() > 0 {
if content, exists := metaName.Attr("content"); exists {
album.Name = content
}
}
// Album info
albumInfo := doc.Find("ul.album-info")
if albumInfo.Length() > 0 {
// Artists
albumInfo.Find("a[itemprop='byArtist']").Each(func(i int, artistAnchor *goquery.Selection) {
artist := data.Artist{
Sources: []data.Source{},
}
// Artist URL
artistURLMeta := artistAnchor.Find("meta[itemprop='url']")
if artistURLMeta.Length() > 0 {
if content, exists := artistURLMeta.Attr("content"); exists {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content,
})
}
}
// Artist name
artistNameMeta := artistAnchor.Find("meta[itemprop='name']")
if artistNameMeta.Length() > 0 {
if content, exists := artistNameMeta.Attr("content"); exists {
artist.Name = content
}
}
if artist.Name != "" {
album.Artists = append(album.Artists, artist)
}
})
/*
// Date published
timeSelection := albumInfo.Find("time[itemprop='datePublished']")
if timeSelection.Length() > 0 {
if datetime, exists := timeSelection.Attr("datetime"); exists {
// Note: You'll need to parse the datetime according to your needs
// For now, we'll store it as a string or you can parse it to time.Time
// album.Date = parsedDate
}
}
*/
}
// Album artwork would be handled here based on your ArtworkCollection implementation
return album
}
func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
album := data.Album{
Sources: []data.Source{source},
Artists: []data.Artist{},
Songs: []data.Song{},
}
resp, err := m.session.Get(source.Url)
if err != nil {
return album, err
}
doc, err := scraper.GetHtml(resp)
if err != nil {
return album, err
}
// Parse album metadata
parsedAlbum := parseAlbum(doc)
album.Name = parsedAlbum.Name
album.Artists = parsedAlbum.Artists
album.Sources = append(album.Sources, parsedAlbum.Sources...)
// Parse songs from cards
cardBody := doc.Find("div.card-body")
if cardBody.Length() > 0 {
cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) {
song := parseSongCard(songCard)
album.Songs = append(album.Songs, song)
})
}
// Update tracksort would be handled here based on your implementation
return album, nil
}
func (m Musify) FetchArtist(source data.Source) (data.Artist, error) {
return data.Artist{
Name: extractName(source.Url),
}, nil
}