adding source types in musify scrapers
This commit is contained in:
@@ -51,7 +51,7 @@ func (m Musify) RegexSong() *regexp.Regexp {
|
||||
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
|
||||
}
|
||||
|
||||
func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
|
||||
func (m Musify) parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
|
||||
artist := data.Artist{}
|
||||
var err error
|
||||
|
||||
@@ -63,6 +63,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
|
||||
artist.Sources = append(artist.Sources, data.Source{
|
||||
Url: musifyHost + url,
|
||||
ObjectType: data.ArtistSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -87,7 +88,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
|
||||
return artist, err
|
||||
}
|
||||
|
||||
func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
|
||||
func (m Musify) parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
|
||||
album := data.Album{}
|
||||
var err error
|
||||
|
||||
@@ -116,6 +117,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
|
||||
album.Sources = append(album.Sources, data.Source{
|
||||
Url: musifyHost + url,
|
||||
ObjectType: data.AlbumSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -169,7 +171,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
|
||||
return album, err
|
||||
}
|
||||
|
||||
func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
|
||||
func (m Musify) parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
|
||||
res := []data.MusicObject{}
|
||||
|
||||
contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) {
|
||||
@@ -180,11 +182,11 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
|
||||
|
||||
if exists {
|
||||
if strings.Contains(url, "artist") {
|
||||
if artist, err := parseArtistContact(contact); err == nil {
|
||||
if artist, err := m.parseArtistContact(contact); err == nil {
|
||||
res = append(res, artist)
|
||||
}
|
||||
} else if strings.Contains(url, "release") {
|
||||
if album, err := parseAlbumContact(contact); err == nil {
|
||||
if album, err := m.parseAlbumContact(contact); err == nil {
|
||||
res = append(res, album)
|
||||
}
|
||||
}
|
||||
@@ -195,7 +197,7 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
|
||||
return res
|
||||
}
|
||||
|
||||
func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
|
||||
func (m Musify) parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
|
||||
song := data.Song{}
|
||||
var err error
|
||||
|
||||
@@ -213,7 +215,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
|
||||
song.Artists = append(song.Artists, data.Artist{
|
||||
Name: strings.TrimSpace(artistAnchor.Text()),
|
||||
Sources: []data.Source{
|
||||
{Url: musifyHost + url, ObjectType: data.ArtistSource},
|
||||
{Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -226,6 +228,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
|
||||
song.Sources = append(song.Sources, data.Source{
|
||||
Url: musifyHost + href,
|
||||
ObjectType: data.SongSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -237,11 +240,11 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
|
||||
return song, err
|
||||
}
|
||||
|
||||
func parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
|
||||
func (m Musify) parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
|
||||
res := []data.MusicObject{}
|
||||
|
||||
playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) {
|
||||
if song, err := parsePlaylistItem(playlistItem); err == nil {
|
||||
if song, err := m.parsePlaylistItem(playlistItem); err == nil {
|
||||
res = append(res, song)
|
||||
}
|
||||
})
|
||||
@@ -265,11 +268,11 @@ func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) {
|
||||
}
|
||||
|
||||
doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) {
|
||||
musicObjects = append(musicObjects, parseContactContainer(contactContainer)...)
|
||||
musicObjects = append(musicObjects, m.parseContactContainer(contactContainer)...)
|
||||
})
|
||||
|
||||
doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) {
|
||||
musicObjects = append(musicObjects, parsePlaylist(playlist)...)
|
||||
musicObjects = append(musicObjects, m.parsePlaylist(playlist)...)
|
||||
})
|
||||
|
||||
return musicObjects, nil
|
||||
@@ -314,7 +317,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
|
||||
song.Artists = append(song.Artists, data.Artist{
|
||||
Name: strings.TrimSpace(artistAnchor.Text()),
|
||||
Sources: []data.Source{
|
||||
{Url: musifyHost + href},
|
||||
{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -339,7 +342,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
|
||||
useArtist = false
|
||||
}
|
||||
|
||||
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href})
|
||||
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType})
|
||||
} else {
|
||||
useArtist = false
|
||||
}
|
||||
@@ -358,7 +361,9 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
|
||||
if albumAnchor := listPoints.Eq(3).Find("a"); albumAnchor != nil && albumAnchor.Length() > 0 {
|
||||
if href, exists := albumAnchor.Attr("href"); exists {
|
||||
song.Album.Sources = append(song.Album.Sources, data.Source{
|
||||
Url: musifyHost + href,
|
||||
Url: musifyHost + href,
|
||||
ObjectType: data.AlbumSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -373,7 +378,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
|
||||
return song, nil
|
||||
}
|
||||
|
||||
func parseSongCard(songCard *goquery.Selection) data.Song {
|
||||
func (m Musify) parseSongCard(songCard *goquery.Selection) data.Song {
|
||||
song := data.Song{
|
||||
Artists: []data.Artist{},
|
||||
Sources: []data.Source{},
|
||||
@@ -402,7 +407,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
|
||||
if anchorList.Length() > 1 {
|
||||
trackAnchor := anchorList.Last()
|
||||
if href, exists := trackAnchor.Attr("href"); exists {
|
||||
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href})
|
||||
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType})
|
||||
}
|
||||
song.Name = strings.TrimSpace(trackAnchor.Text())
|
||||
}
|
||||
@@ -418,7 +423,9 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
|
||||
if metaArtistSrc.Length() > 0 {
|
||||
if content, exists := metaArtistSrc.Attr("content"); exists && content != "" {
|
||||
artist.Sources = append(artist.Sources, data.Source{
|
||||
Url: musifyHost + content,
|
||||
Url: musifyHost + content,
|
||||
ObjectType: data.ArtistSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -456,7 +463,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
|
||||
return song
|
||||
}
|
||||
|
||||
func parseAlbum(doc *goquery.Document) data.Album {
|
||||
func (m Musify) parseAlbum(doc *goquery.Document) data.Album {
|
||||
album := data.Album{
|
||||
Artists: []data.Artist{},
|
||||
Sources: []data.Source{},
|
||||
@@ -478,12 +485,12 @@ func parseAlbum(doc *goquery.Document) data.Album {
|
||||
if href, exists := artistAnchor.Attr("href"); exists {
|
||||
hrefParts := strings.Split(href, "/")
|
||||
if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" {
|
||||
artist := data.Artist{
|
||||
Sources: []data.Source{},
|
||||
}
|
||||
artist := data.Artist{}
|
||||
|
||||
artist.Sources = append(artist.Sources, data.Source{
|
||||
Url: musifyHost + strings.TrimSpace(href),
|
||||
Url: musifyHost + strings.TrimSpace(href),
|
||||
ObjectType: data.ArtistSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
|
||||
// Artist name from span
|
||||
@@ -506,7 +513,9 @@ func parseAlbum(doc *goquery.Document) data.Album {
|
||||
if metaURL.Length() > 0 {
|
||||
if content, exists := metaURL.Attr("content"); exists {
|
||||
album.Sources = append(album.Sources, data.Source{
|
||||
Url: musifyHost + content,
|
||||
Url: musifyHost + content,
|
||||
ObjectType: data.AlbumSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -532,7 +541,9 @@ func parseAlbum(doc *goquery.Document) data.Album {
|
||||
if artistURLMeta.Length() > 0 {
|
||||
if content, exists := artistURLMeta.Attr("content"); exists {
|
||||
artist.Sources = append(artist.Sources, data.Source{
|
||||
Url: musifyHost + content,
|
||||
Url: musifyHost + content,
|
||||
ObjectType: data.ArtistSource,
|
||||
SourceType: &m.sourceType,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -586,7 +597,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
|
||||
}
|
||||
|
||||
// Parse album metadata
|
||||
parsedAlbum := parseAlbum(doc)
|
||||
parsedAlbum := m.parseAlbum(doc)
|
||||
album.Name = parsedAlbum.Name
|
||||
album.Artists = parsedAlbum.Artists
|
||||
album.Sources = append(album.Sources, parsedAlbum.Sources...)
|
||||
@@ -595,7 +606,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
|
||||
cardBody := doc.Find("div.card-body")
|
||||
if cardBody.Length() > 0 {
|
||||
cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) {
|
||||
song := parseSongCard(songCard)
|
||||
song := m.parseSongCard(songCard)
|
||||
album.Songs = append(album.Songs, song)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user