adding source types in musify scrapers

This commit is contained in:
Hazel Noack
2025-10-09 13:48:05 +02:00
parent d74a324999
commit 6a1baa9eed

View File

@@ -51,7 +51,7 @@ func (m Musify) RegexSong() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`) return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
} }
func parseArtistContact(contact *goquery.Selection) (data.Artist, error) { func (m Musify) parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist := data.Artist{} artist := data.Artist{}
var err error var err error
@@ -63,6 +63,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist.Sources = append(artist.Sources, data.Source{ artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + url, Url: musifyHost + url,
ObjectType: data.ArtistSource, ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
}) })
} }
@@ -87,7 +88,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
return artist, err return artist, err
} }
func parseAlbumContact(contact *goquery.Selection) (data.Album, error) { func (m Musify) parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
album := data.Album{} album := data.Album{}
var err error var err error
@@ -116,6 +117,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
album.Sources = append(album.Sources, data.Source{ album.Sources = append(album.Sources, data.Source{
Url: musifyHost + url, Url: musifyHost + url,
ObjectType: data.AlbumSource, ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
}) })
} }
@@ -169,7 +171,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
return album, err return album, err
} }
func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject { func (m Musify) parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{} res := []data.MusicObject{}
contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) { contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) {
@@ -180,11 +182,11 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
if exists { if exists {
if strings.Contains(url, "artist") { if strings.Contains(url, "artist") {
if artist, err := parseArtistContact(contact); err == nil { if artist, err := m.parseArtistContact(contact); err == nil {
res = append(res, artist) res = append(res, artist)
} }
} else if strings.Contains(url, "release") { } else if strings.Contains(url, "release") {
if album, err := parseAlbumContact(contact); err == nil { if album, err := m.parseAlbumContact(contact); err == nil {
res = append(res, album) res = append(res, album)
} }
} }
@@ -195,7 +197,7 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
return res return res
} }
func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) { func (m Musify) parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song := data.Song{} song := data.Song{}
var err error var err error
@@ -213,7 +215,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song.Artists = append(song.Artists, data.Artist{ song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()), Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{ Sources: []data.Source{
{Url: musifyHost + url, ObjectType: data.ArtistSource}, {Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
}, },
}) })
} }
@@ -226,6 +228,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song.Sources = append(song.Sources, data.Source{ song.Sources = append(song.Sources, data.Source{
Url: musifyHost + href, Url: musifyHost + href,
ObjectType: data.SongSource, ObjectType: data.SongSource,
SourceType: &m.sourceType,
}) })
} }
@@ -237,11 +240,11 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
return song, err return song, err
} }
func parsePlaylist(playlist *goquery.Selection) []data.MusicObject { func (m Musify) parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{} res := []data.MusicObject{}
playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) { playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) {
if song, err := parsePlaylistItem(playlistItem); err == nil { if song, err := m.parsePlaylistItem(playlistItem); err == nil {
res = append(res, song) res = append(res, song)
} }
}) })
@@ -265,11 +268,11 @@ func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) {
} }
doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) { doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) {
musicObjects = append(musicObjects, parseContactContainer(contactContainer)...) musicObjects = append(musicObjects, m.parseContactContainer(contactContainer)...)
}) })
doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) { doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) {
musicObjects = append(musicObjects, parsePlaylist(playlist)...) musicObjects = append(musicObjects, m.parsePlaylist(playlist)...)
}) })
return musicObjects, nil return musicObjects, nil
@@ -314,7 +317,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
song.Artists = append(song.Artists, data.Artist{ song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()), Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{ Sources: []data.Source{
{Url: musifyHost + href}, {Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
}, },
}) })
} }
@@ -339,7 +342,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
useArtist = false useArtist = false
} }
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href}) artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType})
} else { } else {
useArtist = false useArtist = false
} }
@@ -359,6 +362,8 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
if href, exists := albumAnchor.Attr("href"); exists { if href, exists := albumAnchor.Attr("href"); exists {
song.Album.Sources = append(song.Album.Sources, data.Source{ song.Album.Sources = append(song.Album.Sources, data.Source{
Url: musifyHost + href, Url: musifyHost + href,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
}) })
} }
@@ -373,7 +378,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
return song, nil return song, nil
} }
func parseSongCard(songCard *goquery.Selection) data.Song { func (m Musify) parseSongCard(songCard *goquery.Selection) data.Song {
song := data.Song{ song := data.Song{
Artists: []data.Artist{}, Artists: []data.Artist{},
Sources: []data.Source{}, Sources: []data.Source{},
@@ -402,7 +407,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
if anchorList.Length() > 1 { if anchorList.Length() > 1 {
trackAnchor := anchorList.Last() trackAnchor := anchorList.Last()
if href, exists := trackAnchor.Attr("href"); exists { if href, exists := trackAnchor.Attr("href"); exists {
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href}) song.Sources = append(song.Sources, data.Source{Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType})
} }
song.Name = strings.TrimSpace(trackAnchor.Text()) song.Name = strings.TrimSpace(trackAnchor.Text())
} }
@@ -419,6 +424,8 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
if content, exists := metaArtistSrc.Attr("content"); exists && content != "" { if content, exists := metaArtistSrc.Attr("content"); exists && content != "" {
artist.Sources = append(artist.Sources, data.Source{ artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content, Url: musifyHost + content,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
}) })
} }
} }
@@ -456,7 +463,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
return song return song
} }
func parseAlbum(doc *goquery.Document) data.Album { func (m Musify) parseAlbum(doc *goquery.Document) data.Album {
album := data.Album{ album := data.Album{
Artists: []data.Artist{}, Artists: []data.Artist{},
Sources: []data.Source{}, Sources: []data.Source{},
@@ -478,12 +485,12 @@ func parseAlbum(doc *goquery.Document) data.Album {
if href, exists := artistAnchor.Attr("href"); exists { if href, exists := artistAnchor.Attr("href"); exists {
hrefParts := strings.Split(href, "/") hrefParts := strings.Split(href, "/")
if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" { if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" {
artist := data.Artist{ artist := data.Artist{}
Sources: []data.Source{},
}
artist.Sources = append(artist.Sources, data.Source{ artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + strings.TrimSpace(href), Url: musifyHost + strings.TrimSpace(href),
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
}) })
// Artist name from span // Artist name from span
@@ -507,6 +514,8 @@ func parseAlbum(doc *goquery.Document) data.Album {
if content, exists := metaURL.Attr("content"); exists { if content, exists := metaURL.Attr("content"); exists {
album.Sources = append(album.Sources, data.Source{ album.Sources = append(album.Sources, data.Source{
Url: musifyHost + content, Url: musifyHost + content,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
}) })
} }
} }
@@ -533,6 +542,8 @@ func parseAlbum(doc *goquery.Document) data.Album {
if content, exists := artistURLMeta.Attr("content"); exists { if content, exists := artistURLMeta.Attr("content"); exists {
artist.Sources = append(artist.Sources, data.Source{ artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content, Url: musifyHost + content,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
}) })
} }
} }
@@ -586,7 +597,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
} }
// Parse album metadata // Parse album metadata
parsedAlbum := parseAlbum(doc) parsedAlbum := m.parseAlbum(doc)
album.Name = parsedAlbum.Name album.Name = parsedAlbum.Name
album.Artists = parsedAlbum.Artists album.Artists = parsedAlbum.Artists
album.Sources = append(album.Sources, parsedAlbum.Sources...) album.Sources = append(album.Sources, parsedAlbum.Sources...)
@@ -595,7 +606,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
cardBody := doc.Find("div.card-body") cardBody := doc.Find("div.card-body")
if cardBody.Length() > 0 { if cardBody.Length() > 0 {
cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) { cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) {
song := parseSongCard(songCard) song := m.parseSongCard(songCard)
album.Songs = append(album.Songs, song) album.Songs = append(album.Songs, song)
}) })
} }