package plugin import ( "errors" "fmt" "net/url" "regexp" "strings" "gitea.elara.ws/Hazel/music-kraken/internal/common" "gitea.elara.ws/Hazel/music-kraken/internal/data" "gitea.elara.ws/Hazel/music-kraken/internal/scraper" "github.com/PuerkitoBio/goquery" ) func extractName(s string) string { parts := strings.Split(s, "/") lastPart := parts[len(parts)-1] hyphenParts := strings.Split(lastPart, "-") result := strings.Join(hyphenParts[:len(hyphenParts)-1], " ") return result } const musifyHost = "https://musify.club" type Musify struct { session *scraper.Session sourceType data.SourceType } func (m Musify) Name() string { return "musify" } func (m Musify) Regex() *regexp.Regexp { return regexp.MustCompile(`(?i)https?://musify\.club/(artist|release|track)/[a-z\-0-9]+`) } func (m Musify) RegexArtist() *regexp.Regexp { return regexp.MustCompile(`(?i)https?://musify\.club/artist/[a-z\-0-9]+`) } func (m Musify) RegexAlbum() *regexp.Regexp { return regexp.MustCompile(`(?i)https?://musify\.club/release/[a-z\-0-9]+`) } func (m *Musify) Init(sourceType data.SourceType) { m.session = scraper.NewSession() m.sourceType = sourceType } func (m Musify) RegexSong() *regexp.Regexp { return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`) } func (m Musify) parseArtistContact(contact *goquery.Selection) (data.Artist, error) { artist := data.Artist{} var err error anchor := contact.Find("a") if anchor.Length() > 0 { url, urlExists := anchor.Attr("href") if urlExists { artist.Sources = append(artist.Sources, data.Source{ Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType, }) } if name, nameExists := anchor.Attr("title"); nameExists { artist.Name = name } } else { err = errors.New("no anchor found") } /* # artist image image_soup = contact.find("img") if image_soup is not None: alt = image_soup.get("alt") if alt is not None: name = alt artist_thumbnail = image_soup.get("src") */ return artist, err } func (m Musify) parseAlbumContact(contact *goquery.Selection) (data.Album, error) { album := data.Album{} var err error /*
*/ anchor := contact.Find("a") if anchor.Length() > 0 { if url, urlExists := anchor.Attr("href"); urlExists { album.Sources = append(album.Sources, data.Source{ Url: musifyHost + url, ObjectType: data.AlbumSource, SourceType: &m.sourceType, }) } if titleDate, titleExists := anchor.Attr("title"); titleExists { delimiter := " - " splitAttr := strings.Split(strings.TrimSpace(titleDate), delimiter) album.Name = strings.Join(splitAttr[:len(splitAttr)-1], delimiter) } } else { err = errors.New("no anchor found") } contactInfo := contact.Find("div.contacts__info") if contactInfo.Length() > 0 { /* Ghost Bath - 2013 Ghost Bath Треков: 4 9,04 */ // titleSoup := contactInfo.Find("strong") smallList := contactInfo.Find("small") if smallList.Length() == 3 { // artist rawArtistStr := smallList.First().Text() for _, artistStr := range strings.Split(rawArtistStr, "&") { artistStr = strings.TrimRight(artistStr, "& ...\r\n") artistStr = strings.TrimSpace(artistStr) if strings.HasSuffix(artistStr, "]") && strings.Contains(artistStr, "[") { parts := strings.Split(artistStr, "[") if len(parts) > 1 { artistStr = strings.TrimSpace(parts[0]) } } album.Artists = append(album.Artists, data.Artist{ Name: artistStr, }) } // trackCountSoup := smallList[1] // ratingSoup := smallList[2] } } return album, err } func (m Musify) parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject { res := []data.MusicObject{} contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) { anchor := contact.Find("a") if anchor.Length() > 0 { url, exists := anchor.Attr("href") if exists { if strings.Contains(url, "artist") { if artist, err := m.parseArtistContact(contact); err == nil { res = append(res, artist) } } else if strings.Contains(url, "release") { if album, err := m.parseAlbumContact(contact); err == nil { res = append(res, album) } } } } }) return res } func (m Musify) parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) { song := data.Song{} var err error song.Name, _ = playlistItem.Attr("data-name") playlistDetails := playlistItem.Find("div.playlist__heading") if playlistDetails.Length() > 0 { anchorList := playlistDetails.Find("a") if anchorList.Length() >= 2 { // artists anchorList.Each(func(i int, artistAnchor *goquery.Selection) { if i < anchorList.Length()-1 { // all except the last one if url, exists := artistAnchor.Attr("href"); exists { song.Artists = append(song.Artists, data.Artist{ Name: strings.TrimSpace(artistAnchor.Text()), Sources: []data.Source{ {Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType}, }, }) } } }) // track trackAnchor := anchorList.Last() if href, exists := trackAnchor.Attr("href"); exists { song.Sources = append(song.Sources, data.Source{ Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType, }) } } else { err = errors.New("there are not enough anchors (2) for artist and track") } } return song, err } func (m Musify) parsePlaylist(playlist *goquery.Selection) []data.MusicObject { res := []data.MusicObject{} playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) { if song, err := m.parsePlaylistItem(playlistItem); err == nil { res = append(res, song) } }) return res } func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) { musicObjects := []data.MusicObject{} resp, err := m.session.PostMultipartForm("https://musify.club/search", map[string]string{ "SearchText": query.Search, // alternatively I could also add year and genre }) if err != nil { return musicObjects, err } doc, err := scraper.GetHtml(resp) if err != nil { return musicObjects, err } doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) { musicObjects = append(musicObjects, m.parseContactContainer(contactContainer)...) }) doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) { musicObjects = append(musicObjects, m.parsePlaylist(playlist)...) }) return musicObjects, nil } func (m *Musify) FetchSong(source data.Source) (data.Song, error) { song := data.Song{ Sources: []data.Source{ source, }, } resp, err := m.session.Get(source.Url) if err != nil { return song, err } doc, err := scraper.GetHtml(resp) if err != nil { return song, err } // Download URL /* doc.Find("a[itemprop='audio']").Each(func(i int, anchor *goquery.Selection) { href, exists := anchor.Attr("href") if exists { source.AudioURL = p.host + href } }) */ // Song detail var listElement *goquery.Selection doc.Find("ul.album-info").Each(func(i int, albumInfo *goquery.Selection) { listElement = albumInfo.Find("li").First() }) if listElement != nil { listElement.Find("a").Each(func(i int, artistAnchor *goquery.Selection) { if href, exists := artistAnchor.Attr("href"); exists { song.Artists = append(song.Artists, data.Artist{ Name: strings.TrimSpace(artistAnchor.Text()), Sources: []data.Source{ {Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType}, }, }) } }) } // Breadcrumbs if breadcrumbList := doc.Find("ol.breadcrumb"); breadcrumbList.Length() > 0 { listPoints := breadcrumbList.Find("li.breadcrumb-item") if listPoints.Length() != 5 { return song, errors.New("too many breadcrumbs on page") } if artistAnchor := listPoints.Eq(2).Find("a"); artistAnchor != nil && artistAnchor.Length() > 0 { artist := data.Artist{} useArtist := true if href, exists := artistAnchor.Attr("href"); exists { hrefParts := strings.Split(href, "/") if len(hrefParts) <= 1 || hrefParts[len(hrefParts)-2] != "artist" { useArtist = false } artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType}) } else { useArtist = false } if nameElem := artistAnchor.Find("span[itemprop='name']"); nameElem.Length() > 0 { artist.Name = strings.TrimSpace(nameElem.Text()) } else { useArtist = false } if useArtist { song.Artists = append(song.Artists, artist) } } if albumAnchor := listPoints.Eq(3).Find("a"); albumAnchor != nil && albumAnchor.Length() > 0 { if href, exists := albumAnchor.Attr("href"); exists { song.Album.Sources = append(song.Album.Sources, data.Source{ Url: musifyHost + href, ObjectType: data.AlbumSource, SourceType: &m.sourceType, }) } if nameElem := albumAnchor.Find("span[itemprop='name']"); nameElem.Length() > 0 { song.Album.Name = strings.TrimSpace(nameElem.Text()) } } song.Name = strings.TrimSpace(listPoints.Eq(4).Text()) } return song, nil } func (m Musify) parseSongCard(songCard *goquery.Selection) data.Song { song := data.Song{ Artists: []data.Artist{}, Sources: []data.Source{}, } // Get song name from data attribute songName, _ := songCard.Attr("data-name") song.Name = songName /* // Get tracksort tracksortSelection := songCard.Find("div.playlist__position") if tracksortSelection.Length() > 0 { rawTracksort := strings.TrimSpace(tracksortSelection.Text()) if parsedTracksort, err := strconv.Atoi(rawTracksort); err == nil { tracksort = parsedTracksort } } */ // Playlist details playlistDetails := songCard.Find("div.playlist__details") if playlistDetails.Length() > 0 { // Track anchor anchorList := playlistDetails.Find("a") if anchorList.Length() > 1 { trackAnchor := anchorList.Last() if href, exists := trackAnchor.Attr("href"); exists { song.Sources = append(song.Sources, data.Source{Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType}) } song.Name = strings.TrimSpace(trackAnchor.Text()) } // Artist spans playlistDetails.Find("span[itemprop='byArtist']").Each(func(i int, artistSpan *goquery.Selection) { artist := data.Artist{ Sources: []data.Source{}, } // Artist URL metaArtistSrc := artistSpan.Find("meta[itemprop='url']") if metaArtistSrc.Length() > 0 { if content, exists := metaArtistSrc.Attr("content"); exists && content != "" { artist.Sources = append(artist.Sources, data.Source{ Url: musifyHost + content, ObjectType: data.ArtistSource, SourceType: &m.sourceType, }) } } // Artist name metaArtistName := artistSpan.Find("meta[itemprop='name']") if metaArtistName.Length() > 0 { if content, exists := metaArtistName.Attr("content"); exists && content != "" { artist.Name = content } } if artist.Name != "" || len(artist.Sources) > 0 { song.Artists = append(song.Artists, artist) } }) } /* // Playlist actions - download link playlistActions := songCard.Find("div.playlist__actions") if playlistActions.Length() > 0 { downloadAnchor := playlistActions.Find("a[itemprop='audio']") if downloadAnchor.Length() > 0 { if href, exists := downloadAnchor.Attr("href"); exists && currentURL != "" { // Add source with audio URL song.Sources = append(song.Sources, data.Source{ Url: currentURL, }) } } } */ return song } func (m Musify) parseAlbum(doc *goquery.Document) data.Album { album := data.Album{ Artists: []data.Artist{}, Sources: []data.Source{}, Songs: []data.Song{}, } // Breadcrumb breadcrumb := doc.Find("ol.breadcrumb") breadcrumbElements := breadcrumb.Find("li.breadcrumb-item") if breadcrumbElements.Length() == 4 { // Album name from last breadcrumb albumCrumb := breadcrumbElements.Eq(3) album.Name = strings.TrimSpace(albumCrumb.Text()) // Artist from second last breadcrumb artistCrumb := breadcrumbElements.Eq(2) artistAnchor := artistCrumb.Find("a") if artistAnchor.Length() > 0 { if href, exists := artistAnchor.Attr("href"); exists { hrefParts := strings.Split(href, "/") if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" { artist := data.Artist{} artist.Sources = append(artist.Sources, data.Source{ Url: musifyHost + strings.TrimSpace(href), ObjectType: data.ArtistSource, SourceType: &m.sourceType, }) // Artist name from span if span := artistAnchor.Find("span"); span.Length() > 0 { artist.Name = strings.TrimSpace(span.Text()) } else { artist.Name = strings.TrimSpace(artistAnchor.Text()) } album.Artists = append(album.Artists, artist) } } } } else { // m.logger.Debug("there are not 4 breadcrumb items, which shouldn't be the case") } // Meta tags metaURL := doc.Find("meta[itemprop='url']") if metaURL.Length() > 0 { if content, exists := metaURL.Attr("content"); exists { album.Sources = append(album.Sources, data.Source{ Url: musifyHost + content, ObjectType: data.AlbumSource, SourceType: &m.sourceType, }) } } metaName := doc.Find("meta[itemprop='name']") if metaName.Length() > 0 { if content, exists := metaName.Attr("content"); exists { album.Name = content } } // Album info albumInfo := doc.Find("ul.album-info") if albumInfo.Length() > 0 { // Artists albumInfo.Find("a[itemprop='byArtist']").Each(func(i int, artistAnchor *goquery.Selection) { artist := data.Artist{ Sources: []data.Source{}, } // Artist URL artistURLMeta := artistAnchor.Find("meta[itemprop='url']") if artistURLMeta.Length() > 0 { if content, exists := artistURLMeta.Attr("content"); exists { artist.Sources = append(artist.Sources, data.Source{ Url: musifyHost + content, ObjectType: data.ArtistSource, SourceType: &m.sourceType, }) } } // Artist name artistNameMeta := artistAnchor.Find("meta[itemprop='name']") if artistNameMeta.Length() > 0 { if content, exists := artistNameMeta.Attr("content"); exists { artist.Name = content } } if artist.Name != "" { album.Artists = append(album.Artists, artist) } }) /* // Date published timeSelection := albumInfo.Find("time[itemprop='datePublished']") if timeSelection.Length() > 0 { if datetime, exists := timeSelection.Attr("datetime"); exists { // Note: You'll need to parse the datetime according to your needs // For now, we'll store it as a string or you can parse it to time.Time // album.Date = parsedDate } } */ } // Album artwork would be handled here based on your ArtworkCollection implementation return album } func (m Musify) FetchAlbum(source data.Source) (data.Album, error) { album := data.Album{ Sources: []data.Source{source}, Artists: []data.Artist{}, Songs: []data.Song{}, } resp, err := m.session.Get(source.Url) if err != nil { return album, err } doc, err := scraper.GetHtml(resp) if err != nil { return album, err } // Parse album metadata parsedAlbum := m.parseAlbum(doc) album.Name = parsedAlbum.Name album.Artists = parsedAlbum.Artists album.Sources = append(album.Sources, parsedAlbum.Sources...) // Parse songs from cards cardBody := doc.Find("div.card-body") if cardBody.Length() > 0 { cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) { song := m.parseSongCard(songCard) album.Songs = append(album.Songs, song) }) } // Update tracksort would be handled here based on your implementation return album, nil } type parsedArtistUrl struct { id string name string url string } func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) { res := parsedArtistUrl{ url: rawUrl, } parsed, err := url.Parse(rawUrl) if err != nil { return res, err } dirs := strings.Split(parsed.Path, "/") correctPart := dirs[len(dirs)-1] split := strings.Split(correctPart, "-") if len(split) < 2 { return res, errors.New("last part of path has to consist of at least one - " + correctPart) } res.id = split[len(split)-1] res.name = strings.Join(split[:len(split)-1], "-") if !common.IsNumeric(res.id) { return res, errors.New("last elem (id) has to be numeric " + res.id) } return res, nil } func (m *Musify) fetchInitialArtist(parsed parsedArtistUrl, artist data.Artist) (data.Artist, error) { endpoint := fmt.Sprintf("https://musify.club/artist/%s?_pjax=#bodyContent", parsed.name) resp, err := m.session.Get(endpoint) if err != nil { return artist, err } doc, err := scraper.GetHtml(resp) if err != nil { return artist, err } // Breadcrumbs breadcrumbs := doc.Find("ol.breadcrumb") if breadcrumbs.Length() > 0 { breadcrumbList := breadcrumbs.Find("li.breadcrumb-item") if breadcrumbList.Length() == 3 { artist.Name = strings.TrimSpace(breadcrumbList.Eq(2).Text()) } else { return artist, errors.New("breadcrumb layout on artist page changed") } } // Nav tabs for songs link navTabs := doc.Find("ul.nav-tabs") if navTabs.Length() > 0 { navTabs.Find("li.nav-item").Each(func(i int, listItem *goquery.Selection) { if strings.HasPrefix(strings.TrimSpace(listItem.Text()), "песни") { // "песни" translates to "songs" anchor := listItem.Find("a") if href, exists := anchor.Attr("href"); exists { artist.Sources = append(artist.Sources, data.Source{ Url: musifyHost + href, }) } } }) } // Content title contentTitle := doc.Find("header.content__title") if contentTitle.Length() > 0 { h1Name := contentTitle.Find("h1") if h1Name.Length() > 0 { artist.Name = strings.TrimSpace(h1Name.Text()) } } // Country and additional sources from icon list iconList := doc.Find("ul.icon-list") if iconList.Length() > 0 { // Country flag - simplified version countryFlag := iconList.Find("i.flag-icon") if countryFlag.Length() > 0 { // Extract country code from class names classes, _ := countryFlag.Attr("class") classList := strings.Fields(classes) for _, class := range classList { if class != "flag-icon" && class != "shadow" && len(class) == 2 { // This would be where you'd use a country lookup library // artist.Country = getCountryFromCode(class) break } } } // Additional sources iconList.Find("a.link").Each(func(i int, additionalSource *goquery.Selection) { if href, exists := additionalSource.Attr("href"); exists { // Simplified source matching - you'd implement your Source.matchUrl equivalent artist.Sources = append(artist.Sources, data.Source{Url: href}) } }) } /* // Notes noteSection := doc.Find("#text-main") if noteSection.Length() > 0 { html, _ := noteSection.Html() // artist.Notes = FormattedText{HTML: html} - if you have this field } // Artist artwork doc.Find("img.artist-img").Each(func(i int, img *goquery.Selection) { src, exists := img.Attr("data-src") if !exists { src, _ = img.Attr("src") } if src != "" { // artist.Artwork = append(artist.Artwork, Artwork{Url: src}) } }) */ return artist, nil } func (m Musify) parseAlbumCard(albumCard *goquery.Selection, artistName string) (data.Album, error) { album := data.Album{ Sources: []data.Source{}, Artists: []data.Artist{}, Songs: []data.Song{}, } /* // Album type from data attribute if albumTypeID, exists := albumCard.Attr("data-type"); exists { if parsedType, err := strconv.Atoi(albumTypeID); err == nil { // album.Type = getAlbumTypeFromID(parsedType) - implement your mapping if parsedType == 5 { // album.Status = AlbumStatusBootleg } } } */ // Parse release anchor parseReleaseAnchor := func(anchor *goquery.Selection, textIsName bool) { if anchor == nil { return } if href, exists := anchor.Attr("href"); exists { album.Sources = append(album.Sources, data.Source{ Url: musifyHost + href, ObjectType: data.AlbumSource, SourceType: &m.sourceType, }) } if textIsName { album.Name = common.CleanSongTitle(strings.TrimSpace(anchor.Text()), artistName) } } // Main thumbnail anchor anchorList := albumCard.Find("a") if anchorList.Length() > 0 { mainAnchor := anchorList.First() parseReleaseAnchor(mainAnchor, false) /* // Thumbnail image thumbnail := mainAnchor.Find("img") if thumbnail.Length() > 0 { if alt, exists := thumbnail.Attr("alt"); exists { album.Name = common.CleanSongTitle(alt, artistName) } // Image URL could be stored if needed // if src, exists := thumbnail.Attr("src"); exists { ... } } */ } else { return album, errors.New("the card has no thumbnail or url") } // Card body cardBody := albumCard.Find("div.card-body") if cardBody.Length() > 0 { parseReleaseAnchor(cardBody.Find("a"), true) } /* // Parse date from card footer parseSmallDate := func(small *goquery.Selection) { italic := small.Find("i") if italic.Length() == 0 { return } if title, exists := italic.Attr("title"); exists && title == "Добавлено" { rawTime := strings.TrimSpace(small.Text()) // Parse date from "13.11.2021" format // album.Date = parseDate(rawTime, "02.01.2006") } } // Card footers cardFooters := albumCard.Find("div.card-footer") if cardFooters.Length() == 3 { lastFooter := cardFooters.Last() lastFooter.Find("small").Each(func(i int, small *goquery.Selection) { parseSmallDate(small) }) } else { m.logger.Debug(fmt.Sprintf("expected 3 card footers, got %d", cardFooters.Length())) } */ return album, nil } func (m *Musify) fetchArtistDiscography(url parsedArtistUrl, artistName string, albumTypeBlacklist []string) ([]data.Album, error) { albumList := []data.Album{} endpoint := "https://musify.club/discography/filteralbums" // POST request with form data formData := map[string]string{ "ArtistID": url.id, "SortOrder.Property": "dateCreated", "SortOrder.IsAscending": "false", "X-Requested-With": "XMLHttpRequest", } resp, err := m.session.PostForm(endpoint, formData, map[string]string{"X-Requested-With": "XMLHttpRequest"}) if err != nil { return albumList, err } doc, err := scraper.GetHtml(resp) if err != nil { return albumList, err } doc.Find("div.card").Each(func(i int, card *goquery.Selection) { album, err := m.parseAlbumCard(card, artistName) if err != nil { fmt.Println(err) } albumList = append(albumList, album) }) return albumList, nil } func (m *Musify) FetchArtist(source data.Source) (data.Artist, error) { res := data.Artist{ Name: extractName(source.Url), } parsed, err := newParsedArtistUrl(source.Url) if err != nil { return res, err } res, err = m.fetchInitialArtist(parsed, res) if err != nil { return res, err } albumList, err := m.fetchArtistDiscography(parsed, res.Name, []string{}) if err != nil { return res, err } res.Albums = append(res.Albums, albumList...) return res, nil }