partial fetching of artist
This commit is contained in:
@@ -28,3 +28,7 @@ var numericRegex = regexp.MustCompile(`^[\d]+$`)
|
||||
func IsNumeric(num string) bool {
|
||||
return numericRegex.MatchString(num)
|
||||
}
|
||||
|
||||
func CleanSongTitle(title string, artistName string) string {
|
||||
return title
|
||||
}
|
||||
|
||||
@@ -621,10 +621,13 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
|
||||
type parsedArtistUrl struct {
|
||||
id string
|
||||
name string
|
||||
url string
|
||||
}
|
||||
|
||||
func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) {
|
||||
res := parsedArtistUrl{}
|
||||
res := parsedArtistUrl{
|
||||
url: rawUrl,
|
||||
}
|
||||
|
||||
parsed, err := url.Parse(rawUrl)
|
||||
if err != nil {
|
||||
@@ -648,7 +651,241 @@ func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (m Musify) FetchArtist(source data.Source) (data.Artist, error) {
|
||||
func (m *Musify) fetchInitialArtist(parsed parsedArtistUrl, artist data.Artist) (data.Artist, error) {
|
||||
endpoint := fmt.Sprintf("https://musify.club/artist/%s?_pjax=#bodyContent", parsed.name)
|
||||
resp, err := m.session.Get(endpoint)
|
||||
if err != nil {
|
||||
return artist, err
|
||||
}
|
||||
|
||||
doc, err := scraper.GetHtml(resp)
|
||||
if err != nil {
|
||||
return artist, err
|
||||
}
|
||||
|
||||
// Breadcrumbs
|
||||
breadcrumbs := doc.Find("ol.breadcrumb")
|
||||
if breadcrumbs.Length() > 0 {
|
||||
breadcrumbList := breadcrumbs.Find("li.breadcrumb-item")
|
||||
if breadcrumbList.Length() == 3 {
|
||||
artist.Name = strings.TrimSpace(breadcrumbList.Eq(2).Text())
|
||||
} else {
|
||||
return artist, errors.New("breadcrumb layout on artist page changed")
|
||||
}
|
||||
}
|
||||
|
||||
// Nav tabs for songs link
|
||||
navTabs := doc.Find("ul.nav-tabs")
|
||||
if navTabs.Length() > 0 {
|
||||
navTabs.Find("li.nav-item").Each(func(i int, listItem *goquery.Selection) {
|
||||
if strings.HasPrefix(strings.TrimSpace(listItem.Text()), "песни") {
|
||||
// "песни" translates to "songs"
|
||||
anchor := listItem.Find("a")
|
||||
if href, exists := anchor.Attr("href"); exists {
|
||||
artist.Sources = append(artist.Sources, data.Source{
|
||||
Url: musifyHost + href,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
// Content title
|
||||
contentTitle := doc.Find("header.content__title")
|
||||
if contentTitle.Length() > 0 {
|
||||
h1Name := contentTitle.Find("h1")
|
||||
if h1Name.Length() > 0 {
|
||||
artist.Name = strings.TrimSpace(h1Name.Text())
|
||||
}
|
||||
}
|
||||
|
||||
// Country and additional sources from icon list
|
||||
iconList := doc.Find("ul.icon-list")
|
||||
if iconList.Length() > 0 {
|
||||
// Country flag - simplified version
|
||||
countryFlag := iconList.Find("i.flag-icon")
|
||||
if countryFlag.Length() > 0 {
|
||||
// Extract country code from class names
|
||||
classes, _ := countryFlag.Attr("class")
|
||||
classList := strings.Fields(classes)
|
||||
for _, class := range classList {
|
||||
if class != "flag-icon" && class != "shadow" && len(class) == 2 {
|
||||
// This would be where you'd use a country lookup library
|
||||
// artist.Country = getCountryFromCode(class)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Additional sources
|
||||
iconList.Find("a.link").Each(func(i int, additionalSource *goquery.Selection) {
|
||||
if href, exists := additionalSource.Attr("href"); exists {
|
||||
// Simplified source matching - you'd implement your Source.matchUrl equivalent
|
||||
artist.Sources = append(artist.Sources, data.Source{Url: href})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/*
|
||||
// Notes
|
||||
noteSection := doc.Find("#text-main")
|
||||
if noteSection.Length() > 0 {
|
||||
html, _ := noteSection.Html()
|
||||
// artist.Notes = FormattedText{HTML: html} - if you have this field
|
||||
}
|
||||
|
||||
// Artist artwork
|
||||
doc.Find("img.artist-img").Each(func(i int, img *goquery.Selection) {
|
||||
src, exists := img.Attr("data-src")
|
||||
if !exists {
|
||||
src, _ = img.Attr("src")
|
||||
}
|
||||
if src != "" {
|
||||
// artist.Artwork = append(artist.Artwork, Artwork{Url: src})
|
||||
}
|
||||
})
|
||||
*/
|
||||
|
||||
return artist, nil
|
||||
}
|
||||
|
||||
func (m Musify) parseAlbumCard(albumCard *goquery.Selection, artistName string) (data.Album, error) {
|
||||
album := data.Album{
|
||||
Sources: []data.Source{},
|
||||
Artists: []data.Artist{},
|
||||
Songs: []data.Song{},
|
||||
}
|
||||
|
||||
/*
|
||||
// Album type from data attribute
|
||||
if albumTypeID, exists := albumCard.Attr("data-type"); exists {
|
||||
if parsedType, err := strconv.Atoi(albumTypeID); err == nil {
|
||||
// album.Type = getAlbumTypeFromID(parsedType) - implement your mapping
|
||||
if parsedType == 5 {
|
||||
// album.Status = AlbumStatusBootleg
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Parse release anchor
|
||||
parseReleaseAnchor := func(anchor *goquery.Selection, textIsName bool) {
|
||||
if anchor == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if href, exists := anchor.Attr("href"); exists {
|
||||
album.Sources = append(album.Sources, data.Source{
|
||||
Url: musifyHost + href,
|
||||
ObjectType: data.AlbumSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
|
||||
if textIsName {
|
||||
album.Name = common.CleanSongTitle(strings.TrimSpace(anchor.Text()), artistName)
|
||||
}
|
||||
}
|
||||
|
||||
// Main thumbnail anchor
|
||||
anchorList := albumCard.Find("a")
|
||||
if anchorList.Length() > 0 {
|
||||
mainAnchor := anchorList.First()
|
||||
parseReleaseAnchor(mainAnchor, false)
|
||||
|
||||
/*
|
||||
// Thumbnail image
|
||||
thumbnail := mainAnchor.Find("img")
|
||||
if thumbnail.Length() > 0 {
|
||||
if alt, exists := thumbnail.Attr("alt"); exists {
|
||||
album.Name = common.CleanSongTitle(alt, artistName)
|
||||
}
|
||||
|
||||
// Image URL could be stored if needed
|
||||
// if src, exists := thumbnail.Attr("src"); exists { ... }
|
||||
}
|
||||
*/
|
||||
} else {
|
||||
return album, errors.New("the card has no thumbnail or url")
|
||||
}
|
||||
|
||||
// Card body
|
||||
cardBody := albumCard.Find("div.card-body")
|
||||
if cardBody.Length() > 0 {
|
||||
parseReleaseAnchor(cardBody.Find("a"), true)
|
||||
}
|
||||
|
||||
/*
|
||||
// Parse date from card footer
|
||||
parseSmallDate := func(small *goquery.Selection) {
|
||||
italic := small.Find("i")
|
||||
if italic.Length() == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if title, exists := italic.Attr("title"); exists && title == "Добавлено" {
|
||||
rawTime := strings.TrimSpace(small.Text())
|
||||
// Parse date from "13.11.2021" format
|
||||
// album.Date = parseDate(rawTime, "02.01.2006")
|
||||
}
|
||||
}
|
||||
|
||||
// Card footers
|
||||
cardFooters := albumCard.Find("div.card-footer")
|
||||
if cardFooters.Length() == 3 {
|
||||
lastFooter := cardFooters.Last()
|
||||
lastFooter.Find("small").Each(func(i int, small *goquery.Selection) {
|
||||
parseSmallDate(small)
|
||||
})
|
||||
} else {
|
||||
m.logger.Debug(fmt.Sprintf("expected 3 card footers, got %d", cardFooters.Length()))
|
||||
}
|
||||
*/
|
||||
|
||||
return album, nil
|
||||
}
|
||||
|
||||
func (m *Musify) fetchArtistDiscography(url parsedArtistUrl, artistName string, albumTypeBlacklist []string) ([]data.Album, error) {
|
||||
albumList := []data.Album{}
|
||||
|
||||
endpoint := musifyHost + "/artist/filteralbums"
|
||||
|
||||
// POST request with form data
|
||||
formData := map[string]string{
|
||||
"ArtistID": url.id,
|
||||
"SortOrder.Property": "dateCreated",
|
||||
"SortOrder.IsAscending": "false",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
|
||||
resp, err := m.session.PostForm(endpoint, formData)
|
||||
if err != nil {
|
||||
return albumList, err
|
||||
}
|
||||
|
||||
fmt.Println(scraper.GetText(resp))
|
||||
return albumList, nil
|
||||
|
||||
doc, err := scraper.GetHtml(resp)
|
||||
if err != nil {
|
||||
return albumList, err
|
||||
}
|
||||
|
||||
doc.Find("div.card").Each(func(i int, card *goquery.Selection) {
|
||||
album, err := m.parseAlbumCard(card, artistName)
|
||||
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
|
||||
albumList = append(albumList, album)
|
||||
})
|
||||
|
||||
return albumList, nil
|
||||
}
|
||||
|
||||
func (m *Musify) FetchArtist(source data.Source) (data.Artist, error) {
|
||||
res := data.Artist{
|
||||
Name: extractName(source.Url),
|
||||
}
|
||||
@@ -657,7 +894,16 @@ func (m Musify) FetchArtist(source data.Source) (data.Artist, error) {
|
||||
return res, err
|
||||
}
|
||||
|
||||
fmt.Println(parsed)
|
||||
res, err = m.fetchInitialArtist(parsed, res)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
albumList, err := m.fetchArtistDiscography(parsed, res.Name, []string{})
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res.Albums = append(res.Albums, albumList...)
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/publicsuffix"
|
||||
@@ -118,6 +119,33 @@ func (s *Session) PostMultipartForm(url string, data map[string]string, headers
|
||||
return s.client.Do(req)
|
||||
}
|
||||
|
||||
func (s *Session) PostForm(rawUrl string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
|
||||
fullURL := s.buildURL(rawUrl)
|
||||
|
||||
// Prepare form data
|
||||
formData := url.Values{}
|
||||
for k, v := range data {
|
||||
formData.Add(k, v)
|
||||
}
|
||||
body := strings.NewReader(formData.Encode())
|
||||
req, err := http.NewRequest("POST", fullURL, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.setDefaultHeaders(req)
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
|
||||
// Add any additional headers provided
|
||||
if len(headers) > 0 {
|
||||
for key, value := range headers[0] {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
return s.client.Do(req)
|
||||
}
|
||||
|
||||
// PostJSON performs a POST request with JSON data
|
||||
func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
|
||||
fullURL := s.buildURL(url)
|
||||
|
||||
Reference in New Issue
Block a user