diff --git a/internal/common/strings.go b/internal/common/strings.go index 1237a7f..a16b780 100644 --- a/internal/common/strings.go +++ b/internal/common/strings.go @@ -28,3 +28,7 @@ var numericRegex = regexp.MustCompile(`^[\d]+$`) func IsNumeric(num string) bool { return numericRegex.MatchString(num) } + +func CleanSongTitle(title string, artistName string) string { + return title +} diff --git a/internal/plugin/musify.go b/internal/plugin/musify.go index 2e89d42..b3b0f2e 100644 --- a/internal/plugin/musify.go +++ b/internal/plugin/musify.go @@ -621,10 +621,13 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) { type parsedArtistUrl struct { id string name string + url string } func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) { - res := parsedArtistUrl{} + res := parsedArtistUrl{ + url: rawUrl, + } parsed, err := url.Parse(rawUrl) if err != nil { @@ -648,7 +651,241 @@ func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) { return res, nil } -func (m Musify) FetchArtist(source data.Source) (data.Artist, error) { +func (m *Musify) fetchInitialArtist(parsed parsedArtistUrl, artist data.Artist) (data.Artist, error) { + endpoint := fmt.Sprintf("https://musify.club/artist/%s?_pjax=#bodyContent", parsed.name) + resp, err := m.session.Get(endpoint) + if err != nil { + return artist, err + } + + doc, err := scraper.GetHtml(resp) + if err != nil { + return artist, err + } + + // Breadcrumbs + breadcrumbs := doc.Find("ol.breadcrumb") + if breadcrumbs.Length() > 0 { + breadcrumbList := breadcrumbs.Find("li.breadcrumb-item") + if breadcrumbList.Length() == 3 { + artist.Name = strings.TrimSpace(breadcrumbList.Eq(2).Text()) + } else { + return artist, errors.New("breadcrumb layout on artist page changed") + } + } + + // Nav tabs for songs link + navTabs := doc.Find("ul.nav-tabs") + if navTabs.Length() > 0 { + navTabs.Find("li.nav-item").Each(func(i int, listItem *goquery.Selection) { + if strings.HasPrefix(strings.TrimSpace(listItem.Text()), "песни") { + // "песни" translates to "songs" + anchor := listItem.Find("a") + if href, exists := anchor.Attr("href"); exists { + artist.Sources = append(artist.Sources, data.Source{ + Url: musifyHost + href, + }) + } + } + + }) + } + + // Content title + contentTitle := doc.Find("header.content__title") + if contentTitle.Length() > 0 { + h1Name := contentTitle.Find("h1") + if h1Name.Length() > 0 { + artist.Name = strings.TrimSpace(h1Name.Text()) + } + } + + // Country and additional sources from icon list + iconList := doc.Find("ul.icon-list") + if iconList.Length() > 0 { + // Country flag - simplified version + countryFlag := iconList.Find("i.flag-icon") + if countryFlag.Length() > 0 { + // Extract country code from class names + classes, _ := countryFlag.Attr("class") + classList := strings.Fields(classes) + for _, class := range classList { + if class != "flag-icon" && class != "shadow" && len(class) == 2 { + // This would be where you'd use a country lookup library + // artist.Country = getCountryFromCode(class) + break + } + } + } + + // Additional sources + iconList.Find("a.link").Each(func(i int, additionalSource *goquery.Selection) { + if href, exists := additionalSource.Attr("href"); exists { + // Simplified source matching - you'd implement your Source.matchUrl equivalent + artist.Sources = append(artist.Sources, data.Source{Url: href}) + } + }) + } + + /* + // Notes + noteSection := doc.Find("#text-main") + if noteSection.Length() > 0 { + html, _ := noteSection.Html() + // artist.Notes = FormattedText{HTML: html} - if you have this field + } + + // Artist artwork + doc.Find("img.artist-img").Each(func(i int, img *goquery.Selection) { + src, exists := img.Attr("data-src") + if !exists { + src, _ = img.Attr("src") + } + if src != "" { + // artist.Artwork = append(artist.Artwork, Artwork{Url: src}) + } + }) + */ + + return artist, nil +} + +func (m Musify) parseAlbumCard(albumCard *goquery.Selection, artistName string) (data.Album, error) { + album := data.Album{ + Sources: []data.Source{}, + Artists: []data.Artist{}, + Songs: []data.Song{}, + } + + /* + // Album type from data attribute + if albumTypeID, exists := albumCard.Attr("data-type"); exists { + if parsedType, err := strconv.Atoi(albumTypeID); err == nil { + // album.Type = getAlbumTypeFromID(parsedType) - implement your mapping + if parsedType == 5 { + // album.Status = AlbumStatusBootleg + } + } + } + */ + + // Parse release anchor + parseReleaseAnchor := func(anchor *goquery.Selection, textIsName bool) { + if anchor == nil { + return + } + + if href, exists := anchor.Attr("href"); exists { + album.Sources = append(album.Sources, data.Source{ + Url: musifyHost + href, + ObjectType: data.AlbumSource, + SourceType: &m.sourceType, + }) + } + + if textIsName { + album.Name = common.CleanSongTitle(strings.TrimSpace(anchor.Text()), artistName) + } + } + + // Main thumbnail anchor + anchorList := albumCard.Find("a") + if anchorList.Length() > 0 { + mainAnchor := anchorList.First() + parseReleaseAnchor(mainAnchor, false) + + /* + // Thumbnail image + thumbnail := mainAnchor.Find("img") + if thumbnail.Length() > 0 { + if alt, exists := thumbnail.Attr("alt"); exists { + album.Name = common.CleanSongTitle(alt, artistName) + } + + // Image URL could be stored if needed + // if src, exists := thumbnail.Attr("src"); exists { ... } + } + */ + } else { + return album, errors.New("the card has no thumbnail or url") + } + + // Card body + cardBody := albumCard.Find("div.card-body") + if cardBody.Length() > 0 { + parseReleaseAnchor(cardBody.Find("a"), true) + } + + /* + // Parse date from card footer + parseSmallDate := func(small *goquery.Selection) { + italic := small.Find("i") + if italic.Length() == 0 { + return + } + + if title, exists := italic.Attr("title"); exists && title == "Добавлено" { + rawTime := strings.TrimSpace(small.Text()) + // Parse date from "13.11.2021" format + // album.Date = parseDate(rawTime, "02.01.2006") + } + } + + // Card footers + cardFooters := albumCard.Find("div.card-footer") + if cardFooters.Length() == 3 { + lastFooter := cardFooters.Last() + lastFooter.Find("small").Each(func(i int, small *goquery.Selection) { + parseSmallDate(small) + }) + } else { + m.logger.Debug(fmt.Sprintf("expected 3 card footers, got %d", cardFooters.Length())) + } + */ + + return album, nil +} + +func (m *Musify) fetchArtistDiscography(url parsedArtistUrl, artistName string, albumTypeBlacklist []string) ([]data.Album, error) { + albumList := []data.Album{} + + endpoint := musifyHost + "/artist/filteralbums" + + // POST request with form data + formData := map[string]string{ + "ArtistID": url.id, + "SortOrder.Property": "dateCreated", + "SortOrder.IsAscending": "false", + "X-Requested-With": "XMLHttpRequest", + } + + resp, err := m.session.PostForm(endpoint, formData) + if err != nil { + return albumList, err + } + + fmt.Println(scraper.GetText(resp)) + return albumList, nil + + doc, err := scraper.GetHtml(resp) + if err != nil { + return albumList, err + } + + doc.Find("div.card").Each(func(i int, card *goquery.Selection) { + album, err := m.parseAlbumCard(card, artistName) + + if err != nil { + fmt.Println(err) + } + + albumList = append(albumList, album) + }) + + return albumList, nil +} + +func (m *Musify) FetchArtist(source data.Source) (data.Artist, error) { res := data.Artist{ Name: extractName(source.Url), } @@ -657,7 +894,16 @@ func (m Musify) FetchArtist(source data.Source) (data.Artist, error) { return res, err } - fmt.Println(parsed) + res, err = m.fetchInitialArtist(parsed, res) + if err != nil { + return res, err + } + + albumList, err := m.fetchArtistDiscography(parsed, res.Name, []string{}) + if err != nil { + return res, err + } + res.Albums = append(res.Albums, albumList...) return res, nil } diff --git a/internal/scraper/session.go b/internal/scraper/session.go index c782cc4..9bcf255 100644 --- a/internal/scraper/session.go +++ b/internal/scraper/session.go @@ -8,6 +8,7 @@ import ( "net/http" "net/http/cookiejar" "net/url" + "strings" "time" "golang.org/x/net/publicsuffix" @@ -118,6 +119,33 @@ func (s *Session) PostMultipartForm(url string, data map[string]string, headers return s.client.Do(req) } +func (s *Session) PostForm(rawUrl string, data map[string]string, headers ...map[string]string) (*http.Response, error) { + fullURL := s.buildURL(rawUrl) + + // Prepare form data + formData := url.Values{} + for k, v := range data { + formData.Add(k, v) + } + body := strings.NewReader(formData.Encode()) + req, err := http.NewRequest("POST", fullURL, body) + if err != nil { + return nil, err + } + + s.setDefaultHeaders(req) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + // Add any additional headers provided + if len(headers) > 0 { + for key, value := range headers[0] { + req.Header.Set(key, value) + } + } + + return s.client.Do(req) +} + // PostJSON performs a POST request with JSON data func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) { fullURL := s.buildURL(url)