Compare commits

...

2 Commits

Author SHA1 Message Date
Hazel Noack
3565f14181 partial fetching of artist 2025-10-09 14:37:13 +02:00
Hazel Noack
1ae859b9b9 parsing artist url 2025-10-09 14:05:10 +02:00
3 changed files with 329 additions and 4 deletions

View File

@@ -1,6 +1,7 @@
package common package common
import ( import (
"regexp"
"strconv" "strconv"
"strings" "strings"
) )
@@ -21,3 +22,13 @@ func ZeroPad(num int, length int) string {
} }
return strings.Repeat("0", length-len(str)) + str return strings.Repeat("0", length-len(str)) + str
} }
var numericRegex = regexp.MustCompile(`^[\d]+$`)
func IsNumeric(num string) bool {
return numericRegex.MatchString(num)
}
func CleanSongTitle(title string, artistName string) string {
return title
}

View File

@@ -2,6 +2,8 @@ package plugin
import ( import (
"errors" "errors"
"fmt"
"net/url"
"regexp" "regexp"
"strings" "strings"
@@ -616,8 +618,292 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
return album, nil return album, nil
} }
func (m Musify) FetchArtist(source data.Source) (data.Artist, error) { type parsedArtistUrl struct {
return data.Artist{ id string
Name: extractName(source.Url), name string
}, nil url string
}
func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) {
res := parsedArtistUrl{
url: rawUrl,
}
parsed, err := url.Parse(rawUrl)
if err != nil {
return res, err
}
dirs := strings.Split(parsed.Path, "/")
correctPart := dirs[len(dirs)-1]
split := strings.Split(correctPart, "-")
if len(split) < 2 {
return res, errors.New("last part of path has to consist of at least one - " + correctPart)
}
res.id = split[len(split)-1]
res.name = strings.Join(split[:len(split)-1], "-")
if !common.IsNumeric(res.id) {
return res, errors.New("last elem (id) has to be numeric " + res.id)
}
return res, nil
}
func (m *Musify) fetchInitialArtist(parsed parsedArtistUrl, artist data.Artist) (data.Artist, error) {
endpoint := fmt.Sprintf("https://musify.club/artist/%s?_pjax=#bodyContent", parsed.name)
resp, err := m.session.Get(endpoint)
if err != nil {
return artist, err
}
doc, err := scraper.GetHtml(resp)
if err != nil {
return artist, err
}
// Breadcrumbs
breadcrumbs := doc.Find("ol.breadcrumb")
if breadcrumbs.Length() > 0 {
breadcrumbList := breadcrumbs.Find("li.breadcrumb-item")
if breadcrumbList.Length() == 3 {
artist.Name = strings.TrimSpace(breadcrumbList.Eq(2).Text())
} else {
return artist, errors.New("breadcrumb layout on artist page changed")
}
}
// Nav tabs for songs link
navTabs := doc.Find("ul.nav-tabs")
if navTabs.Length() > 0 {
navTabs.Find("li.nav-item").Each(func(i int, listItem *goquery.Selection) {
if strings.HasPrefix(strings.TrimSpace(listItem.Text()), "песни") {
// "песни" translates to "songs"
anchor := listItem.Find("a")
if href, exists := anchor.Attr("href"); exists {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + href,
})
}
}
})
}
// Content title
contentTitle := doc.Find("header.content__title")
if contentTitle.Length() > 0 {
h1Name := contentTitle.Find("h1")
if h1Name.Length() > 0 {
artist.Name = strings.TrimSpace(h1Name.Text())
}
}
// Country and additional sources from icon list
iconList := doc.Find("ul.icon-list")
if iconList.Length() > 0 {
// Country flag - simplified version
countryFlag := iconList.Find("i.flag-icon")
if countryFlag.Length() > 0 {
// Extract country code from class names
classes, _ := countryFlag.Attr("class")
classList := strings.Fields(classes)
for _, class := range classList {
if class != "flag-icon" && class != "shadow" && len(class) == 2 {
// This would be where you'd use a country lookup library
// artist.Country = getCountryFromCode(class)
break
}
}
}
// Additional sources
iconList.Find("a.link").Each(func(i int, additionalSource *goquery.Selection) {
if href, exists := additionalSource.Attr("href"); exists {
// Simplified source matching - you'd implement your Source.matchUrl equivalent
artist.Sources = append(artist.Sources, data.Source{Url: href})
}
})
}
/*
// Notes
noteSection := doc.Find("#text-main")
if noteSection.Length() > 0 {
html, _ := noteSection.Html()
// artist.Notes = FormattedText{HTML: html} - if you have this field
}
// Artist artwork
doc.Find("img.artist-img").Each(func(i int, img *goquery.Selection) {
src, exists := img.Attr("data-src")
if !exists {
src, _ = img.Attr("src")
}
if src != "" {
// artist.Artwork = append(artist.Artwork, Artwork{Url: src})
}
})
*/
return artist, nil
}
func (m Musify) parseAlbumCard(albumCard *goquery.Selection, artistName string) (data.Album, error) {
album := data.Album{
Sources: []data.Source{},
Artists: []data.Artist{},
Songs: []data.Song{},
}
/*
// Album type from data attribute
if albumTypeID, exists := albumCard.Attr("data-type"); exists {
if parsedType, err := strconv.Atoi(albumTypeID); err == nil {
// album.Type = getAlbumTypeFromID(parsedType) - implement your mapping
if parsedType == 5 {
// album.Status = AlbumStatusBootleg
}
}
}
*/
// Parse release anchor
parseReleaseAnchor := func(anchor *goquery.Selection, textIsName bool) {
if anchor == nil {
return
}
if href, exists := anchor.Attr("href"); exists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + href,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
if textIsName {
album.Name = common.CleanSongTitle(strings.TrimSpace(anchor.Text()), artistName)
}
}
// Main thumbnail anchor
anchorList := albumCard.Find("a")
if anchorList.Length() > 0 {
mainAnchor := anchorList.First()
parseReleaseAnchor(mainAnchor, false)
/*
// Thumbnail image
thumbnail := mainAnchor.Find("img")
if thumbnail.Length() > 0 {
if alt, exists := thumbnail.Attr("alt"); exists {
album.Name = common.CleanSongTitle(alt, artistName)
}
// Image URL could be stored if needed
// if src, exists := thumbnail.Attr("src"); exists { ... }
}
*/
} else {
return album, errors.New("the card has no thumbnail or url")
}
// Card body
cardBody := albumCard.Find("div.card-body")
if cardBody.Length() > 0 {
parseReleaseAnchor(cardBody.Find("a"), true)
}
/*
// Parse date from card footer
parseSmallDate := func(small *goquery.Selection) {
italic := small.Find("i")
if italic.Length() == 0 {
return
}
if title, exists := italic.Attr("title"); exists && title == "Добавлено" {
rawTime := strings.TrimSpace(small.Text())
// Parse date from "13.11.2021" format
// album.Date = parseDate(rawTime, "02.01.2006")
}
}
// Card footers
cardFooters := albumCard.Find("div.card-footer")
if cardFooters.Length() == 3 {
lastFooter := cardFooters.Last()
lastFooter.Find("small").Each(func(i int, small *goquery.Selection) {
parseSmallDate(small)
})
} else {
m.logger.Debug(fmt.Sprintf("expected 3 card footers, got %d", cardFooters.Length()))
}
*/
return album, nil
}
func (m *Musify) fetchArtistDiscography(url parsedArtistUrl, artistName string, albumTypeBlacklist []string) ([]data.Album, error) {
albumList := []data.Album{}
endpoint := musifyHost + "/artist/filteralbums"
// POST request with form data
formData := map[string]string{
"ArtistID": url.id,
"SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": "false",
"X-Requested-With": "XMLHttpRequest",
}
resp, err := m.session.PostForm(endpoint, formData)
if err != nil {
return albumList, err
}
fmt.Println(scraper.GetText(resp))
return albumList, nil
doc, err := scraper.GetHtml(resp)
if err != nil {
return albumList, err
}
doc.Find("div.card").Each(func(i int, card *goquery.Selection) {
album, err := m.parseAlbumCard(card, artistName)
if err != nil {
fmt.Println(err)
}
albumList = append(albumList, album)
})
return albumList, nil
}
func (m *Musify) FetchArtist(source data.Source) (data.Artist, error) {
res := data.Artist{
Name: extractName(source.Url),
}
parsed, err := newParsedArtistUrl(source.Url)
if err != nil {
return res, err
}
res, err = m.fetchInitialArtist(parsed, res)
if err != nil {
return res, err
}
albumList, err := m.fetchArtistDiscography(parsed, res.Name, []string{})
if err != nil {
return res, err
}
res.Albums = append(res.Albums, albumList...)
return res, nil
} }

View File

@@ -8,6 +8,7 @@ import (
"net/http" "net/http"
"net/http/cookiejar" "net/http/cookiejar"
"net/url" "net/url"
"strings"
"time" "time"
"golang.org/x/net/publicsuffix" "golang.org/x/net/publicsuffix"
@@ -118,6 +119,33 @@ func (s *Session) PostMultipartForm(url string, data map[string]string, headers
return s.client.Do(req) return s.client.Do(req)
} }
func (s *Session) PostForm(rawUrl string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
fullURL := s.buildURL(rawUrl)
// Prepare form data
formData := url.Values{}
for k, v := range data {
formData.Add(k, v)
}
body := strings.NewReader(formData.Encode())
req, err := http.NewRequest("POST", fullURL, body)
if err != nil {
return nil, err
}
s.setDefaultHeaders(req)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
// Add any additional headers provided
if len(headers) > 0 {
for key, value := range headers[0] {
req.Header.Set(key, value)
}
}
return s.client.Do(req)
}
// PostJSON performs a POST request with JSON data // PostJSON performs a POST request with JSON data
func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) { func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
fullURL := s.buildURL(url) fullURL := s.buildURL(url)