adding source types in musify scrapers

This commit is contained in:
Hazel Noack
2025-10-09 13:48:05 +02:00
parent d74a324999
commit 6a1baa9eed

View File

@@ -51,7 +51,7 @@ func (m Musify) RegexSong() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
}
func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
func (m Musify) parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist := data.Artist{}
var err error
@@ -63,6 +63,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + url,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
}
@@ -87,7 +88,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
return artist, err
}
func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
func (m Musify) parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
album := data.Album{}
var err error
@@ -116,6 +117,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + url,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
@@ -169,7 +171,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
return album, err
}
func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
func (m Musify) parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{}
contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) {
@@ -180,11 +182,11 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
if exists {
if strings.Contains(url, "artist") {
if artist, err := parseArtistContact(contact); err == nil {
if artist, err := m.parseArtistContact(contact); err == nil {
res = append(res, artist)
}
} else if strings.Contains(url, "release") {
if album, err := parseAlbumContact(contact); err == nil {
if album, err := m.parseAlbumContact(contact); err == nil {
res = append(res, album)
}
}
@@ -195,7 +197,7 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
return res
}
func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
func (m Musify) parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song := data.Song{}
var err error
@@ -213,7 +215,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{
{Url: musifyHost + url, ObjectType: data.ArtistSource},
{Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
},
})
}
@@ -226,6 +228,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song.Sources = append(song.Sources, data.Source{
Url: musifyHost + href,
ObjectType: data.SongSource,
SourceType: &m.sourceType,
})
}
@@ -237,11 +240,11 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
return song, err
}
func parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
func (m Musify) parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{}
playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) {
if song, err := parsePlaylistItem(playlistItem); err == nil {
if song, err := m.parsePlaylistItem(playlistItem); err == nil {
res = append(res, song)
}
})
@@ -265,11 +268,11 @@ func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) {
}
doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) {
musicObjects = append(musicObjects, parseContactContainer(contactContainer)...)
musicObjects = append(musicObjects, m.parseContactContainer(contactContainer)...)
})
doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) {
musicObjects = append(musicObjects, parsePlaylist(playlist)...)
musicObjects = append(musicObjects, m.parsePlaylist(playlist)...)
})
return musicObjects, nil
@@ -314,7 +317,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{
{Url: musifyHost + href},
{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
},
})
}
@@ -339,7 +342,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
useArtist = false
}
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href})
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType})
} else {
useArtist = false
}
@@ -359,6 +362,8 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
if href, exists := albumAnchor.Attr("href"); exists {
song.Album.Sources = append(song.Album.Sources, data.Source{
Url: musifyHost + href,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
@@ -373,7 +378,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
return song, nil
}
func parseSongCard(songCard *goquery.Selection) data.Song {
func (m Musify) parseSongCard(songCard *goquery.Selection) data.Song {
song := data.Song{
Artists: []data.Artist{},
Sources: []data.Source{},
@@ -402,7 +407,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
if anchorList.Length() > 1 {
trackAnchor := anchorList.Last()
if href, exists := trackAnchor.Attr("href"); exists {
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href})
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType})
}
song.Name = strings.TrimSpace(trackAnchor.Text())
}
@@ -419,6 +424,8 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
if content, exists := metaArtistSrc.Attr("content"); exists && content != "" {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
}
}
@@ -456,7 +463,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
return song
}
func parseAlbum(doc *goquery.Document) data.Album {
func (m Musify) parseAlbum(doc *goquery.Document) data.Album {
album := data.Album{
Artists: []data.Artist{},
Sources: []data.Source{},
@@ -478,12 +485,12 @@ func parseAlbum(doc *goquery.Document) data.Album {
if href, exists := artistAnchor.Attr("href"); exists {
hrefParts := strings.Split(href, "/")
if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" {
artist := data.Artist{
Sources: []data.Source{},
}
artist := data.Artist{}
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + strings.TrimSpace(href),
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
// Artist name from span
@@ -507,6 +514,8 @@ func parseAlbum(doc *goquery.Document) data.Album {
if content, exists := metaURL.Attr("content"); exists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + content,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
}
@@ -533,6 +542,8 @@ func parseAlbum(doc *goquery.Document) data.Album {
if content, exists := artistURLMeta.Attr("content"); exists {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
}
}
@@ -586,7 +597,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
}
// Parse album metadata
parsedAlbum := parseAlbum(doc)
parsedAlbum := m.parseAlbum(doc)
album.Name = parsedAlbum.Name
album.Artists = parsedAlbum.Artists
album.Sources = append(album.Sources, parsedAlbum.Sources...)
@@ -595,7 +606,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
cardBody := doc.Find("div.card-body")
if cardBody.Length() > 0 {
cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) {
song := parseSongCard(songCard)
song := m.parseSongCard(songCard)
album.Songs = append(album.Songs, song)
})
}