944 lines
25 KiB
Go
944 lines
25 KiB
Go
package plugin
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"gitea.elara.ws/Hazel/music-kraken/internal/common"
|
|
"gitea.elara.ws/Hazel/music-kraken/internal/data"
|
|
"gitea.elara.ws/Hazel/music-kraken/internal/scraper"
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
func extractName(s string) string {
|
|
parts := strings.Split(s, "/")
|
|
lastPart := parts[len(parts)-1]
|
|
hyphenParts := strings.Split(lastPart, "-")
|
|
result := strings.Join(hyphenParts[:len(hyphenParts)-1], " ")
|
|
return result
|
|
}
|
|
|
|
const musifyHost = "https://musify.club"
|
|
|
|
type Musify struct {
|
|
session *scraper.Session
|
|
sourceType data.SourceType
|
|
}
|
|
|
|
func (m Musify) Name() string {
|
|
return "musify"
|
|
}
|
|
|
|
func (m Musify) Regex() *regexp.Regexp {
|
|
return regexp.MustCompile(`(?i)https?://musify\.club/(artist|release|track)/[a-z\-0-9]+`)
|
|
}
|
|
|
|
func (m Musify) RegexArtist() *regexp.Regexp {
|
|
return regexp.MustCompile(`(?i)https?://musify\.club/artist/[a-z\-0-9]+`)
|
|
}
|
|
|
|
func (m Musify) RegexAlbum() *regexp.Regexp {
|
|
return regexp.MustCompile(`(?i)https?://musify\.club/release/[a-z\-0-9]+`)
|
|
}
|
|
|
|
func (m *Musify) Init(sourceType data.SourceType) {
|
|
m.session = scraper.NewSession()
|
|
m.sourceType = sourceType
|
|
}
|
|
|
|
func (m Musify) RegexSong() *regexp.Regexp {
|
|
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
|
|
}
|
|
|
|
func (m Musify) parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
|
|
artist := data.Artist{}
|
|
var err error
|
|
|
|
anchor := contact.Find("a")
|
|
if anchor.Length() > 0 {
|
|
url, urlExists := anchor.Attr("href")
|
|
|
|
if urlExists {
|
|
artist.Sources = append(artist.Sources, data.Source{
|
|
Url: musifyHost + url,
|
|
ObjectType: data.ArtistSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
|
|
if name, nameExists := anchor.Attr("title"); nameExists {
|
|
artist.Name = name
|
|
}
|
|
} else {
|
|
err = errors.New("no anchor found")
|
|
}
|
|
|
|
/*
|
|
# artist image
|
|
image_soup = contact.find("img")
|
|
if image_soup is not None:
|
|
alt = image_soup.get("alt")
|
|
if alt is not None:
|
|
name = alt
|
|
|
|
artist_thumbnail = image_soup.get("src")
|
|
*/
|
|
|
|
return artist, err
|
|
}
|
|
|
|
func (m Musify) parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
|
|
album := data.Album{}
|
|
var err error
|
|
|
|
/*
|
|
<div class="contacts__item">
|
|
<a href="/release/ghost-bath-ghost-bath-2013-602489" title="Ghost Bath - 2013">
|
|
|
|
<div class="contacts__img release">
|
|
<img alt="Ghost Bath" class="lozad" data-src="https://37s.musify.club/img/69/9060265/24178833.jpg"/>
|
|
<noscript><img alt="Ghost Bath" src="https://37s.musify.club/img/69/9060265/24178833.jpg"/></noscript>
|
|
</div>
|
|
|
|
<div class="contacts__info">
|
|
<strong>Ghost Bath - 2013</strong>
|
|
<small>Ghost Bath</small>
|
|
<small>Треков: 4</small> <!--tracks-->
|
|
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
|
|
</div>
|
|
</a>
|
|
</div>
|
|
*/
|
|
|
|
anchor := contact.Find("a")
|
|
if anchor.Length() > 0 {
|
|
if url, urlExists := anchor.Attr("href"); urlExists {
|
|
album.Sources = append(album.Sources, data.Source{
|
|
Url: musifyHost + url,
|
|
ObjectType: data.AlbumSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
|
|
if titleDate, titleExists := anchor.Attr("title"); titleExists {
|
|
delimiter := " - "
|
|
|
|
splitAttr := strings.Split(strings.TrimSpace(titleDate), delimiter)
|
|
album.Name = strings.Join(splitAttr[:len(splitAttr)-1], delimiter)
|
|
}
|
|
} else {
|
|
err = errors.New("no anchor found")
|
|
}
|
|
|
|
contactInfo := contact.Find("div.contacts__info")
|
|
if contactInfo.Length() > 0 {
|
|
/*
|
|
<strong>Ghost Bath - 2013</strong>
|
|
<small>Ghost Bath</small>
|
|
<small>Треков: 4</small> <!--tracks-->
|
|
<small><i class="zmdi zmdi-star zmdi-hc-fw"></i> 9,04</small>
|
|
*/
|
|
|
|
// titleSoup := contactInfo.Find("strong")
|
|
|
|
smallList := contactInfo.Find("small")
|
|
if smallList.Length() == 3 {
|
|
// artist
|
|
rawArtistStr := smallList.First().Text()
|
|
|
|
for _, artistStr := range strings.Split(rawArtistStr, "&") {
|
|
artistStr = strings.TrimRight(artistStr, "& ...\r\n")
|
|
artistStr = strings.TrimSpace(artistStr)
|
|
|
|
if strings.HasSuffix(artistStr, "]") && strings.Contains(artistStr, "[") {
|
|
parts := strings.Split(artistStr, "[")
|
|
if len(parts) > 1 {
|
|
artistStr = strings.TrimSpace(parts[0])
|
|
}
|
|
}
|
|
|
|
album.Artists = append(album.Artists, data.Artist{
|
|
Name: artistStr,
|
|
})
|
|
}
|
|
|
|
// trackCountSoup := smallList[1]
|
|
// ratingSoup := smallList[2]
|
|
}
|
|
}
|
|
|
|
return album, err
|
|
}
|
|
|
|
func (m Musify) parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
|
|
res := []data.MusicObject{}
|
|
|
|
contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) {
|
|
anchor := contact.Find("a")
|
|
|
|
if anchor.Length() > 0 {
|
|
url, exists := anchor.Attr("href")
|
|
|
|
if exists {
|
|
if strings.Contains(url, "artist") {
|
|
if artist, err := m.parseArtistContact(contact); err == nil {
|
|
res = append(res, artist)
|
|
}
|
|
} else if strings.Contains(url, "release") {
|
|
if album, err := m.parseAlbumContact(contact); err == nil {
|
|
res = append(res, album)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
return res
|
|
}
|
|
|
|
func (m Musify) parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
|
|
song := data.Song{}
|
|
var err error
|
|
|
|
song.Name, _ = playlistItem.Attr("data-name")
|
|
|
|
playlistDetails := playlistItem.Find("div.playlist__heading")
|
|
if playlistDetails.Length() > 0 {
|
|
anchorList := playlistDetails.Find("a")
|
|
|
|
if anchorList.Length() >= 2 {
|
|
// artists
|
|
anchorList.Each(func(i int, artistAnchor *goquery.Selection) {
|
|
if i < anchorList.Length()-1 { // all except the last one
|
|
if url, exists := artistAnchor.Attr("href"); exists {
|
|
song.Artists = append(song.Artists, data.Artist{
|
|
Name: strings.TrimSpace(artistAnchor.Text()),
|
|
Sources: []data.Source{
|
|
{Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
|
|
},
|
|
})
|
|
}
|
|
}
|
|
})
|
|
|
|
// track
|
|
trackAnchor := anchorList.Last()
|
|
if href, exists := trackAnchor.Attr("href"); exists {
|
|
song.Sources = append(song.Sources, data.Source{
|
|
Url: musifyHost + href,
|
|
ObjectType: data.SongSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
|
|
} else {
|
|
err = errors.New("there are not enough anchors (2) for artist and track")
|
|
}
|
|
}
|
|
|
|
return song, err
|
|
}
|
|
|
|
func (m Musify) parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
|
|
res := []data.MusicObject{}
|
|
|
|
playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) {
|
|
if song, err := m.parsePlaylistItem(playlistItem); err == nil {
|
|
res = append(res, song)
|
|
}
|
|
})
|
|
|
|
return res
|
|
}
|
|
|
|
func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) {
|
|
musicObjects := []data.MusicObject{}
|
|
|
|
resp, err := m.session.PostMultipartForm("https://musify.club/search", map[string]string{
|
|
"SearchText": query.Search, // alternatively I could also add year and genre
|
|
})
|
|
if err != nil {
|
|
return musicObjects, err
|
|
}
|
|
|
|
doc, err := scraper.GetHtml(resp)
|
|
if err != nil {
|
|
return musicObjects, err
|
|
}
|
|
|
|
doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) {
|
|
musicObjects = append(musicObjects, m.parseContactContainer(contactContainer)...)
|
|
})
|
|
|
|
doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) {
|
|
musicObjects = append(musicObjects, m.parsePlaylist(playlist)...)
|
|
})
|
|
|
|
return musicObjects, nil
|
|
}
|
|
|
|
type parsedSongUrl struct {
|
|
id string
|
|
name string
|
|
url string
|
|
}
|
|
|
|
func newParsedSongUrl(rawUrl string) (parsedSongUrl, error) {
|
|
res := parsedSongUrl{
|
|
url: rawUrl,
|
|
}
|
|
|
|
parsed, err := url.Parse(rawUrl)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
|
|
dirs := strings.Split(parsed.Path, "/")
|
|
correctPart := dirs[len(dirs)-1]
|
|
split := strings.Split(correctPart, "-")
|
|
if len(split) < 2 {
|
|
return res, errors.New("last part of path has to consist of at least one - " + correctPart)
|
|
}
|
|
|
|
res.id = strings.TrimSpace(split[len(split)-1])
|
|
res.name = strings.Join(split[:len(split)-1], "-")
|
|
|
|
if !common.IsNumeric(res.id) {
|
|
return res, errors.New("last elem (id) has to be numeric " + res.id)
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
|
|
song := data.Song{
|
|
Sources: []data.Source{
|
|
source,
|
|
},
|
|
}
|
|
|
|
resp, err := m.session.Get(source.Url)
|
|
if err != nil {
|
|
return song, err
|
|
}
|
|
|
|
doc, err := scraper.GetHtml(resp)
|
|
if err != nil {
|
|
return song, err
|
|
}
|
|
|
|
// Download URL
|
|
doc.Find("a[itemprop='audio']").Each(func(i int, anchor *goquery.Selection) {
|
|
if href, _ := anchor.Attr("href"); true {
|
|
// will be the source first added at the begining
|
|
song.Sources[0].AudioUrl = musifyHost + href
|
|
} else {
|
|
// http://musify.club/track/dl/7141298/crystal-f-sekundenschlaf.mp3
|
|
parsed, err := newParsedSongUrl(song.Sources[0].Url)
|
|
if err != nil {
|
|
return
|
|
}
|
|
song.Sources[0].AudioUrl = "http://musify.club/track/dl/" + parsed.id + "/" + parsed.name + ".mp3"
|
|
}
|
|
})
|
|
|
|
// Song detail
|
|
var listElement *goquery.Selection
|
|
doc.Find("ul.album-info").Each(func(i int, albumInfo *goquery.Selection) {
|
|
listElement = albumInfo.Find("li").First()
|
|
})
|
|
|
|
if listElement != nil {
|
|
listElement.Find("a").Each(func(i int, artistAnchor *goquery.Selection) {
|
|
if href, exists := artistAnchor.Attr("href"); exists {
|
|
song.Artists = append(song.Artists, data.Artist{
|
|
Name: strings.TrimSpace(artistAnchor.Text()),
|
|
Sources: []data.Source{
|
|
{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
|
|
},
|
|
})
|
|
}
|
|
|
|
})
|
|
}
|
|
|
|
// Breadcrumbs
|
|
if breadcrumbList := doc.Find("ol.breadcrumb"); breadcrumbList.Length() > 0 {
|
|
listPoints := breadcrumbList.Find("li.breadcrumb-item")
|
|
if listPoints.Length() != 5 {
|
|
return song, errors.New("too many breadcrumbs on page")
|
|
}
|
|
|
|
if artistAnchor := listPoints.Eq(2).Find("a"); artistAnchor != nil && artistAnchor.Length() > 0 {
|
|
artist := data.Artist{}
|
|
useArtist := true
|
|
|
|
if href, exists := artistAnchor.Attr("href"); exists {
|
|
hrefParts := strings.Split(href, "/")
|
|
if len(hrefParts) <= 1 || hrefParts[len(hrefParts)-2] != "artist" {
|
|
useArtist = false
|
|
}
|
|
|
|
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType})
|
|
} else {
|
|
useArtist = false
|
|
}
|
|
|
|
if nameElem := artistAnchor.Find("span[itemprop='name']"); nameElem.Length() > 0 {
|
|
artist.Name = strings.TrimSpace(nameElem.Text())
|
|
} else {
|
|
useArtist = false
|
|
}
|
|
|
|
if useArtist {
|
|
song.Artists = append(song.Artists, artist)
|
|
}
|
|
}
|
|
|
|
if albumAnchor := listPoints.Eq(3).Find("a"); albumAnchor != nil && albumAnchor.Length() > 0 {
|
|
if href, exists := albumAnchor.Attr("href"); exists {
|
|
song.Album.Sources = append(song.Album.Sources, data.Source{
|
|
Url: musifyHost + href,
|
|
ObjectType: data.AlbumSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
|
|
if nameElem := albumAnchor.Find("span[itemprop='name']"); nameElem.Length() > 0 {
|
|
song.Album.Name = strings.TrimSpace(nameElem.Text())
|
|
}
|
|
}
|
|
|
|
song.Name = strings.TrimSpace(listPoints.Eq(4).Text())
|
|
}
|
|
|
|
return song, nil
|
|
}
|
|
|
|
func (m Musify) parseSongCard(songCard *goquery.Selection) data.Song {
|
|
song := data.Song{
|
|
Artists: []data.Artist{},
|
|
Sources: []data.Source{},
|
|
}
|
|
|
|
// Get song name from data attribute
|
|
songName, _ := songCard.Attr("data-name")
|
|
song.Name = songName
|
|
|
|
/*
|
|
// Get tracksort
|
|
tracksortSelection := songCard.Find("div.playlist__position")
|
|
if tracksortSelection.Length() > 0 {
|
|
rawTracksort := strings.TrimSpace(tracksortSelection.Text())
|
|
if parsedTracksort, err := strconv.Atoi(rawTracksort); err == nil {
|
|
tracksort = parsedTracksort
|
|
}
|
|
}
|
|
*/
|
|
|
|
// Playlist details
|
|
playlistDetails := songCard.Find("div.playlist__details")
|
|
if playlistDetails.Length() > 0 {
|
|
// Track anchor
|
|
anchorList := playlistDetails.Find("a")
|
|
if anchorList.Length() > 1 {
|
|
trackAnchor := anchorList.Last()
|
|
if href, exists := trackAnchor.Attr("href"); exists {
|
|
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType})
|
|
}
|
|
song.Name = strings.TrimSpace(trackAnchor.Text())
|
|
}
|
|
|
|
// Artist spans
|
|
playlistDetails.Find("span[itemprop='byArtist']").Each(func(i int, artistSpan *goquery.Selection) {
|
|
artist := data.Artist{
|
|
Sources: []data.Source{},
|
|
}
|
|
|
|
// Artist URL
|
|
metaArtistSrc := artistSpan.Find("meta[itemprop='url']")
|
|
if metaArtistSrc.Length() > 0 {
|
|
if content, exists := metaArtistSrc.Attr("content"); exists && content != "" {
|
|
artist.Sources = append(artist.Sources, data.Source{
|
|
Url: musifyHost + content,
|
|
ObjectType: data.ArtistSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Artist name
|
|
metaArtistName := artistSpan.Find("meta[itemprop='name']")
|
|
if metaArtistName.Length() > 0 {
|
|
if content, exists := metaArtistName.Attr("content"); exists && content != "" {
|
|
artist.Name = content
|
|
}
|
|
}
|
|
|
|
if artist.Name != "" || len(artist.Sources) > 0 {
|
|
song.Artists = append(song.Artists, artist)
|
|
}
|
|
})
|
|
}
|
|
|
|
/*
|
|
// Playlist actions - download link
|
|
playlistActions := songCard.Find("div.playlist__actions")
|
|
if playlistActions.Length() > 0 {
|
|
downloadAnchor := playlistActions.Find("a[itemprop='audio']")
|
|
if downloadAnchor.Length() > 0 {
|
|
if href, exists := downloadAnchor.Attr("href"); exists && currentURL != "" {
|
|
// Add source with audio URL
|
|
song.Sources = append(song.Sources, data.Source{
|
|
Url: currentURL,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
return song
|
|
}
|
|
|
|
func (m Musify) parseAlbum(doc *goquery.Document) data.Album {
|
|
album := data.Album{
|
|
Artists: []data.Artist{},
|
|
Sources: []data.Source{},
|
|
Songs: []data.Song{},
|
|
}
|
|
|
|
// Breadcrumb
|
|
breadcrumb := doc.Find("ol.breadcrumb")
|
|
breadcrumbElements := breadcrumb.Find("li.breadcrumb-item")
|
|
if breadcrumbElements.Length() == 4 {
|
|
// Album name from last breadcrumb
|
|
albumCrumb := breadcrumbElements.Eq(3)
|
|
album.Name = strings.TrimSpace(albumCrumb.Text())
|
|
|
|
// Artist from second last breadcrumb
|
|
artistCrumb := breadcrumbElements.Eq(2)
|
|
artistAnchor := artistCrumb.Find("a")
|
|
if artistAnchor.Length() > 0 {
|
|
if href, exists := artistAnchor.Attr("href"); exists {
|
|
hrefParts := strings.Split(href, "/")
|
|
if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" {
|
|
artist := data.Artist{}
|
|
|
|
artist.Sources = append(artist.Sources, data.Source{
|
|
Url: musifyHost + strings.TrimSpace(href),
|
|
ObjectType: data.ArtistSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
|
|
// Artist name from span
|
|
if span := artistAnchor.Find("span"); span.Length() > 0 {
|
|
artist.Name = strings.TrimSpace(span.Text())
|
|
} else {
|
|
artist.Name = strings.TrimSpace(artistAnchor.Text())
|
|
}
|
|
|
|
album.Artists = append(album.Artists, artist)
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// m.logger.Debug("there are not 4 breadcrumb items, which shouldn't be the case")
|
|
}
|
|
|
|
// Meta tags
|
|
metaURL := doc.Find("meta[itemprop='url']")
|
|
if metaURL.Length() > 0 {
|
|
if content, exists := metaURL.Attr("content"); exists {
|
|
album.Sources = append(album.Sources, data.Source{
|
|
Url: musifyHost + content,
|
|
ObjectType: data.AlbumSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
}
|
|
|
|
metaName := doc.Find("meta[itemprop='name']")
|
|
if metaName.Length() > 0 {
|
|
if content, exists := metaName.Attr("content"); exists {
|
|
album.Name = content
|
|
}
|
|
}
|
|
|
|
// Album info
|
|
albumInfo := doc.Find("ul.album-info")
|
|
if albumInfo.Length() > 0 {
|
|
// Artists
|
|
albumInfo.Find("a[itemprop='byArtist']").Each(func(i int, artistAnchor *goquery.Selection) {
|
|
artist := data.Artist{
|
|
Sources: []data.Source{},
|
|
}
|
|
|
|
// Artist URL
|
|
artistURLMeta := artistAnchor.Find("meta[itemprop='url']")
|
|
if artistURLMeta.Length() > 0 {
|
|
if content, exists := artistURLMeta.Attr("content"); exists {
|
|
artist.Sources = append(artist.Sources, data.Source{
|
|
Url: musifyHost + content,
|
|
ObjectType: data.ArtistSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Artist name
|
|
artistNameMeta := artistAnchor.Find("meta[itemprop='name']")
|
|
if artistNameMeta.Length() > 0 {
|
|
if content, exists := artistNameMeta.Attr("content"); exists {
|
|
artist.Name = content
|
|
}
|
|
}
|
|
|
|
if artist.Name != "" {
|
|
album.Artists = append(album.Artists, artist)
|
|
}
|
|
})
|
|
|
|
/*
|
|
// Date published
|
|
timeSelection := albumInfo.Find("time[itemprop='datePublished']")
|
|
if timeSelection.Length() > 0 {
|
|
if datetime, exists := timeSelection.Attr("datetime"); exists {
|
|
// Note: You'll need to parse the datetime according to your needs
|
|
// For now, we'll store it as a string or you can parse it to time.Time
|
|
// album.Date = parsedDate
|
|
}
|
|
}
|
|
*/
|
|
}
|
|
|
|
// Album artwork would be handled here based on your ArtworkCollection implementation
|
|
|
|
return album
|
|
}
|
|
|
|
func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
|
|
album := data.Album{
|
|
Sources: []data.Source{source},
|
|
Artists: []data.Artist{},
|
|
Songs: []data.Song{},
|
|
}
|
|
|
|
resp, err := m.session.Get(source.Url)
|
|
if err != nil {
|
|
return album, err
|
|
}
|
|
|
|
doc, err := scraper.GetHtml(resp)
|
|
if err != nil {
|
|
return album, err
|
|
}
|
|
|
|
// Parse album metadata
|
|
parsedAlbum := m.parseAlbum(doc)
|
|
album.Name = parsedAlbum.Name
|
|
album.Artists = parsedAlbum.Artists
|
|
album.Sources = append(album.Sources, parsedAlbum.Sources...)
|
|
|
|
// Parse songs from cards
|
|
cardBody := doc.Find("div.card-body")
|
|
if cardBody.Length() > 0 {
|
|
cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) {
|
|
song := m.parseSongCard(songCard)
|
|
album.Songs = append(album.Songs, song)
|
|
})
|
|
}
|
|
|
|
// Update tracksort would be handled here based on your implementation
|
|
|
|
return album, nil
|
|
}
|
|
|
|
type parsedArtistUrl struct {
|
|
id string
|
|
name string
|
|
url string
|
|
}
|
|
|
|
func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) {
|
|
res := parsedArtistUrl{
|
|
url: rawUrl,
|
|
}
|
|
|
|
parsed, err := url.Parse(rawUrl)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
|
|
dirs := strings.Split(parsed.Path, "/")
|
|
correctPart := dirs[len(dirs)-1]
|
|
split := strings.Split(correctPart, "-")
|
|
if len(split) < 2 {
|
|
return res, errors.New("last part of path has to consist of at least one - " + correctPart)
|
|
}
|
|
|
|
res.id = split[len(split)-1]
|
|
res.name = strings.Join(split[:len(split)-1], "-")
|
|
|
|
if !common.IsNumeric(res.id) {
|
|
return res, errors.New("last elem (id) has to be numeric " + res.id)
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
func (m *Musify) fetchInitialArtist(parsed parsedArtistUrl, artist data.Artist) (data.Artist, error) {
|
|
endpoint := fmt.Sprintf("https://musify.club/artist/%s?_pjax=#bodyContent", parsed.name)
|
|
resp, err := m.session.Get(endpoint)
|
|
if err != nil {
|
|
return artist, err
|
|
}
|
|
|
|
doc, err := scraper.GetHtml(resp)
|
|
if err != nil {
|
|
return artist, err
|
|
}
|
|
|
|
// Breadcrumbs
|
|
breadcrumbs := doc.Find("ol.breadcrumb")
|
|
if breadcrumbs.Length() > 0 {
|
|
breadcrumbList := breadcrumbs.Find("li.breadcrumb-item")
|
|
if breadcrumbList.Length() == 3 {
|
|
artist.Name = strings.TrimSpace(breadcrumbList.Eq(2).Text())
|
|
} else {
|
|
return artist, errors.New("breadcrumb layout on artist page changed")
|
|
}
|
|
}
|
|
|
|
// Nav tabs for songs link
|
|
navTabs := doc.Find("ul.nav-tabs")
|
|
if navTabs.Length() > 0 {
|
|
navTabs.Find("li.nav-item").Each(func(i int, listItem *goquery.Selection) {
|
|
if strings.HasPrefix(strings.TrimSpace(listItem.Text()), "песни") {
|
|
// "песни" translates to "songs"
|
|
anchor := listItem.Find("a")
|
|
if href, exists := anchor.Attr("href"); exists {
|
|
artist.Sources = append(artist.Sources, data.Source{
|
|
Url: musifyHost + href,
|
|
})
|
|
}
|
|
}
|
|
|
|
})
|
|
}
|
|
|
|
// Content title
|
|
contentTitle := doc.Find("header.content__title")
|
|
if contentTitle.Length() > 0 {
|
|
h1Name := contentTitle.Find("h1")
|
|
if h1Name.Length() > 0 {
|
|
artist.Name = strings.TrimSpace(h1Name.Text())
|
|
}
|
|
}
|
|
|
|
// Country and additional sources from icon list
|
|
iconList := doc.Find("ul.icon-list")
|
|
if iconList.Length() > 0 {
|
|
// Country flag - simplified version
|
|
countryFlag := iconList.Find("i.flag-icon")
|
|
if countryFlag.Length() > 0 {
|
|
// Extract country code from class names
|
|
classes, _ := countryFlag.Attr("class")
|
|
classList := strings.Fields(classes)
|
|
for _, class := range classList {
|
|
if class != "flag-icon" && class != "shadow" && len(class) == 2 {
|
|
// This would be where you'd use a country lookup library
|
|
// artist.Country = getCountryFromCode(class)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// Additional sources
|
|
iconList.Find("a.link").Each(func(i int, additionalSource *goquery.Selection) {
|
|
if href, exists := additionalSource.Attr("href"); exists {
|
|
// Simplified source matching - you'd implement your Source.matchUrl equivalent
|
|
artist.Sources = append(artist.Sources, data.Source{Url: href})
|
|
}
|
|
})
|
|
}
|
|
|
|
/*
|
|
// Notes
|
|
noteSection := doc.Find("#text-main")
|
|
if noteSection.Length() > 0 {
|
|
html, _ := noteSection.Html()
|
|
// artist.Notes = FormattedText{HTML: html} - if you have this field
|
|
}
|
|
|
|
// Artist artwork
|
|
doc.Find("img.artist-img").Each(func(i int, img *goquery.Selection) {
|
|
src, exists := img.Attr("data-src")
|
|
if !exists {
|
|
src, _ = img.Attr("src")
|
|
}
|
|
if src != "" {
|
|
// artist.Artwork = append(artist.Artwork, Artwork{Url: src})
|
|
}
|
|
})
|
|
*/
|
|
|
|
return artist, nil
|
|
}
|
|
|
|
func (m Musify) parseAlbumCard(albumCard *goquery.Selection, artistName string) (data.Album, error) {
|
|
album := data.Album{
|
|
Sources: []data.Source{},
|
|
Artists: []data.Artist{},
|
|
Songs: []data.Song{},
|
|
}
|
|
|
|
/*
|
|
// Album type from data attribute
|
|
if albumTypeID, exists := albumCard.Attr("data-type"); exists {
|
|
if parsedType, err := strconv.Atoi(albumTypeID); err == nil {
|
|
// album.Type = getAlbumTypeFromID(parsedType) - implement your mapping
|
|
if parsedType == 5 {
|
|
// album.Status = AlbumStatusBootleg
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
// Parse release anchor
|
|
parseReleaseAnchor := func(anchor *goquery.Selection, textIsName bool) {
|
|
if anchor == nil {
|
|
return
|
|
}
|
|
|
|
if href, exists := anchor.Attr("href"); exists {
|
|
album.Sources = append(album.Sources, data.Source{
|
|
Url: musifyHost + href,
|
|
ObjectType: data.AlbumSource,
|
|
SourceType: &m.sourceType,
|
|
})
|
|
}
|
|
|
|
if textIsName {
|
|
album.Name = common.CleanSongTitle(strings.TrimSpace(anchor.Text()), artistName)
|
|
}
|
|
}
|
|
|
|
// Main thumbnail anchor
|
|
anchorList := albumCard.Find("a")
|
|
if anchorList.Length() > 0 {
|
|
mainAnchor := anchorList.First()
|
|
parseReleaseAnchor(mainAnchor, false)
|
|
|
|
/*
|
|
// Thumbnail image
|
|
thumbnail := mainAnchor.Find("img")
|
|
if thumbnail.Length() > 0 {
|
|
if alt, exists := thumbnail.Attr("alt"); exists {
|
|
album.Name = common.CleanSongTitle(alt, artistName)
|
|
}
|
|
|
|
// Image URL could be stored if needed
|
|
// if src, exists := thumbnail.Attr("src"); exists { ... }
|
|
}
|
|
*/
|
|
} else {
|
|
return album, errors.New("the card has no thumbnail or url")
|
|
}
|
|
|
|
// Card body
|
|
cardBody := albumCard.Find("div.card-body")
|
|
if cardBody.Length() > 0 {
|
|
parseReleaseAnchor(cardBody.Find("a"), true)
|
|
}
|
|
|
|
/*
|
|
// Parse date from card footer
|
|
parseSmallDate := func(small *goquery.Selection) {
|
|
italic := small.Find("i")
|
|
if italic.Length() == 0 {
|
|
return
|
|
}
|
|
|
|
if title, exists := italic.Attr("title"); exists && title == "Добавлено" {
|
|
rawTime := strings.TrimSpace(small.Text())
|
|
// Parse date from "13.11.2021" format
|
|
// album.Date = parseDate(rawTime, "02.01.2006")
|
|
}
|
|
}
|
|
|
|
// Card footers
|
|
cardFooters := albumCard.Find("div.card-footer")
|
|
if cardFooters.Length() == 3 {
|
|
lastFooter := cardFooters.Last()
|
|
lastFooter.Find("small").Each(func(i int, small *goquery.Selection) {
|
|
parseSmallDate(small)
|
|
})
|
|
} else {
|
|
m.logger.Debug(fmt.Sprintf("expected 3 card footers, got %d", cardFooters.Length()))
|
|
}
|
|
*/
|
|
|
|
return album, nil
|
|
}
|
|
|
|
func (m *Musify) fetchArtistDiscography(url parsedArtistUrl, artistName string, albumTypeBlacklist []string) ([]data.Album, error) {
|
|
albumList := []data.Album{}
|
|
|
|
endpoint := "https://musify.club/discography/filteralbums"
|
|
|
|
// POST request with form data
|
|
formData := map[string]string{
|
|
"ArtistID": url.id,
|
|
"SortOrder.Property": "dateCreated",
|
|
"SortOrder.IsAscending": "false",
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
}
|
|
resp, err := m.session.PostForm(endpoint, formData, map[string]string{"X-Requested-With": "XMLHttpRequest"})
|
|
if err != nil {
|
|
return albumList, err
|
|
}
|
|
|
|
doc, err := scraper.GetHtml(resp)
|
|
if err != nil {
|
|
return albumList, err
|
|
}
|
|
|
|
doc.Find("div.card").Each(func(i int, card *goquery.Selection) {
|
|
album, err := m.parseAlbumCard(card, artistName)
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
}
|
|
|
|
albumList = append(albumList, album)
|
|
})
|
|
|
|
return albumList, nil
|
|
}
|
|
|
|
func (m *Musify) FetchArtist(source data.Source) (data.Artist, error) {
|
|
res := data.Artist{
|
|
Name: extractName(source.Url),
|
|
}
|
|
parsed, err := newParsedArtistUrl(source.Url)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
|
|
res, err = m.fetchInitialArtist(parsed, res)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
|
|
albumList, err := m.fetchArtistDiscography(parsed, res.Name, []string{})
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
res.Albums = append(res.Albums, albumList...)
|
|
|
|
return res, nil
|
|
}
|